Page MenuHomec4science

No OneTemporary

File Metadata

Created
Wed, Jun 25, 03:56
This file is larger than 256 KB, so syntax highlighting was skipped.
diff --git a/invenio.wsgi b/invenio.wsgi
index 67c999b5c..aa47a61a6 100644
--- a/invenio.wsgi
+++ b/invenio.wsgi
@@ -1,76 +1,76 @@
# -*- coding: utf-8 -*-
## This file is part of Invenio.
## Copyright (C) 2009, 2010, 2011, 2012, 2013 CERN.
##
## Invenio is free software; you can redistribute it and/or
## modify it under the terms of the GNU General Public License as
## published by the Free Software Foundation; either version 2 of the
## License, or (at your option) any later version.
##
## Invenio is distributed in the hope that it will be useful, but
## WITHOUT ANY WARRANTY; without even the implied warranty of
## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
## General Public License for more details.
##
## You should have received a copy of the GNU General Public License
## along with Invenio; if not, write to the Free Software Foundation, Inc.,
## 59 Temple Place, Suite 330, Boston, MA 02111-1307, USA.
"""
mod_wsgi Invenio application loader.
"""
from invenio import config
# Start remote debugger if appropriate:
try:
from invenio.remote_debugger_config import CFG_REMOTE_DEBUGGER_ENABLED, \
CFG_REMOTE_DEBUGGER_WSGI_LOADING
if CFG_REMOTE_DEBUGGER_ENABLED:
from invenio import remote_debugger
remote_debugger.start_file_changes_monitor()
if CFG_REMOTE_DEBUGGER_WSGI_LOADING:
remote_debugger.start()
except:
pass
# wrap warnings (usually from sql queries) to log the traceback
# of their origin for debugging
try:
- from invenio.errorlib import wrap_warn
+ from invenio.ext.logging import wrap_warn
wrap_warn()
except:
pass
# pre-load citation dictionaries upon WSGI application start-up (the
# citation dictionaries are loaded lazily, which is good for CLI
# processes such as bibsched, but for web user queries we want them to
# be available right after web server start-up):
try:
from invenio.bibrank_citation_searcher import get_citedby_hitset, \
get_refersto_hitset
get_citedby_hitset(None)
get_refersto_hitset(None)
except:
pass
## You can't write to stdout in mod_wsgi, but some of our
## dependecies do this! (e.g. 4Suite)
import sys
sys.stdout = sys.stderr
try:
from invenio.base.factory import create_app
application = create_app()
finally:
## mod_wsgi uses one thread to import the .wsgi file
## and a second one to instantiate the application.
## Therefore we need to close redundant conenctions that
## are allocated on the 1st thread.
from invenio.dbquery import close_connection
close_connection()
if 'werkzeug-debugger' in getattr(config, 'CFG_DEVEL_TOOLS', []):
from werkzeug.debug import DebuggedApplication
application = DebuggedApplication(application, evalex=True)
diff --git a/invenio/base/config.py b/invenio/base/config.py
index f7e1b28bb..118ae652d 100644
--- a/invenio/base/config.py
+++ b/invenio/base/config.py
@@ -1,613 +1,613 @@
# -*- coding: utf-8 -*-
import distutils.sysconfig
from os.path import join
EXTENSIONS = [
'invenio.ext.confighacks',
'invenio.ext.jinja2hacks',
'invenio.ext.debug_toolbar',
'invenio.ext.sqlalchemy',
'invenio.ext.sslify',
'invenio.ext.cache',
'invenio.ext.session',
'invenio.ext.login',
'invenio.ext.principal',
'invenio.ext.email',
'invenio.ext.legacy',
'invenio.ext.assets',
'invenio.ext.template',
'invenio.ext.admin',
- 'invenio.ext.logger',
+ 'invenio.ext.logging',
'invenio.ext.gravatar',
'invenio.ext.menu:MenuAlchemy',
'invenio.ext.breadcrumb:BreadcrumbAlchemy',
'invenio.modules.deposit.url_converters',
]
PACKAGES = [
'invenio.modules.*'
]
CFG_PREFIX = distutils.sysconfig.get_config_var("prefix")
CFG_BATCHUPLOADER_DAEMON_DIR = join(CFG_PREFIX, "var/batchupload")
CFG_BIBDOCFILE_FILEDIR = join(CFG_PREFIX, "var/data/files")
CFG_BINDIR = join(CFG_PREFIX, "bin")
CFG_CACHEDIR = join(CFG_PREFIX, "var/cache")
CFG_ETCDIR = join(CFG_PREFIX, "etc")
CFG_LOCALEDIR = join(CFG_PREFIX, "share/locale")
CFG_LOGDIR = join(CFG_PREFIX, "var/log")
CFG_PYLIBDIR = join(CFG_PREFIX, "lib/python")
CFG_TMPDIR = join(CFG_PREFIX, "var/tmp")
CFG_TMPSHAREDDIR = join(CFG_PREFIX, "var/tmp-shared")
CFG_WEBDIR = join(CFG_PREFIX, "var/www")
CFG_WEBSUBMIT_BIBCONVERTCONFIGDIR = join(CFG_PREFIX, "etc/bibconvert/config")
CFG_WEBSUBMIT_COUNTERSDIR = join(CFG_PREFIX, "var/data/submit/counters")
CFG_WEBSUBMIT_STORAGEDIR = join(CFG_PREFIX, "var/data/submit/storage")
#FIXME check the usage and replace by SQLALCHEMY_URL
CFG_DATABASE_HOST = "localhost"
CFG_DATABASE_NAME = "invenio"
CFG_DATABASE_PASS = "my123p$ss"
CFG_DATABASE_PORT = 3306
CFG_DATABASE_SLAVE = None
CFG_DATABASE_TYPE = "mysql"
CFG_DATABASE_USER = "invenio"
# CFG_FLASK_CACHE_TYPE has been deprecated.
CACHE_TYPE = "redis"
# DO NOT EDIT THIS FILE! IT WAS AUTOMATICALLY GENERATED
# FROM INVENIO.CONF BY EXECUTING:
# inveniocfg --update-all
CFG_SITE_NAME_INTL = {}
CFG_SITE_NAME_INTL['af'] = "Atlantis Instituut van Fiktiewe Wetenskap"
CFG_SITE_NAME_INTL['ar'] = "معهد أطلنطيس للعلوم الافتراضية"
CFG_SITE_NAME_INTL['bg'] = "Институт за фиктивни науки Атлантис"
CFG_SITE_NAME_INTL['ca'] = "Institut Atlantis de Ciència Fictícia"
CFG_SITE_NAME_INTL['cs'] = "Atlantis Institut Fiktivních Věd"
CFG_SITE_NAME_INTL['de'] = "Atlantis Institut der fiktiven Wissenschaft"
CFG_SITE_NAME_INTL['el'] = "Ινστιτούτο Φανταστικών Επιστημών Ατλαντίδος"
CFG_SITE_NAME_INTL['en'] = "Atlantis Institute of Fictive Science"
CFG_SITE_NAME_INTL['es'] = "Atlantis Instituto de la Ciencia Fictive"
CFG_SITE_NAME_INTL['fr'] = "Atlantis Institut des Sciences Fictives"
CFG_SITE_NAME_INTL['hr'] = "Institut Fiktivnih Znanosti Atlantis"
CFG_SITE_NAME_INTL['gl'] = "Instituto Atlantis de Ciencia Fictive"
CFG_SITE_NAME_INTL['ka'] = "ატლანტიდის ფიქტიური მეცნიერების ინსტიტუტი"
CFG_SITE_NAME_INTL['it'] = "Atlantis Istituto di Scienza Fittizia"
CFG_SITE_NAME_INTL['rw'] = "Atlantis Ishuri Rikuru Ry'ubuhanga"
CFG_SITE_NAME_INTL['lt'] = "Fiktyvių Mokslų Institutas Atlantis"
CFG_SITE_NAME_INTL['hu'] = "Kitalált Tudományok Atlantiszi Intézete"
CFG_SITE_NAME_INTL['ja'] = "Fictive 科学のAtlantis の協会"
CFG_SITE_NAME_INTL['no'] = "Atlantis Institutt for Fiktiv Vitenskap"
CFG_SITE_NAME_INTL['pl'] = "Instytut Fikcyjnej Nauki Atlantis"
CFG_SITE_NAME_INTL['pt'] = "Instituto Atlantis de Ciência Fictícia"
CFG_SITE_NAME_INTL['ro'] = "Institutul Atlantis al Ştiinţelor Fictive"
CFG_SITE_NAME_INTL['ru'] = "Институт Фиктивных Наук Атлантиды"
CFG_SITE_NAME_INTL['sk'] = "Atlantis Inštitút Fiktívnych Vied"
CFG_SITE_NAME_INTL['sv'] = "Atlantis Institut för Fiktiv Vetenskap"
CFG_SITE_NAME_INTL['uk'] = "Інститут вигаданих наук в Атлантісі"
CFG_SITE_NAME_INTL['zh_CN'] = "阿特兰提斯虚拟科学学院"
CFG_SITE_NAME_INTL['zh_TW'] = "阿特蘭提斯虛擬科學學院"
CFG_ACCESS_CONTROL_LEVEL_ACCOUNTS = 0
CFG_ACCESS_CONTROL_LEVEL_GUESTS = 0
CFG_ACCESS_CONTROL_LEVEL_SITE = 0
CFG_ACCESS_CONTROL_LIMIT_REGISTRATION_TO_DOMAIN = ""
CFG_ACCESS_CONTROL_NOTIFY_ADMIN_ABOUT_NEW_ACCOUNTS = 0
CFG_ACCESS_CONTROL_NOTIFY_USER_ABOUT_ACTIVATION = 0
CFG_ACCESS_CONTROL_NOTIFY_USER_ABOUT_DELETION = 0
CFG_ACCESS_CONTROL_NOTIFY_USER_ABOUT_NEW_ACCOUNT = 1
CFG_ADS_SITE = 0
CFG_APACHE_GROUP_FILE = "demo-site-apache-user-groups"
CFG_APACHE_PASSWORD_FILE = "demo-site-apache-user-passwords"
CFG_BATCHUPLOADER_FILENAME_MATCHING_POLICY = ['reportnumber', 'recid', ]
CFG_BATCHUPLOADER_WEB_ROBOT_AGENTS = r"invenio_webupload|Invenio-.*"
CFG_BATCHUPLOADER_WEB_ROBOT_RIGHTS = {
'127.0.0.1': ['*'], # useful for testing
'127.0.1.1': ['*'], # useful for testing
'10.0.0.1': ['BOOK', 'REPORT'], # Example 1
'10.0.0.2': ['POETRY', 'PREPRINT'], # Example 2
}
CFG_BIBAUTHORID_AUTHOR_TICKET_ADMIN_EMAIL = "info@invenio-software.org"
CFG_BIBAUTHORID_ENABLED = True
CFG_BIBAUTHORID_EXTERNAL_CLAIMED_RECORDS_KEY = []
CFG_BIBAUTHORID_MAX_PROCESSES = 12
CFG_BIBAUTHORID_ON_AUTHORPAGES = True
CFG_BIBAUTHORID_PERSONID_SQL_MAX_THREADS = 12
CFG_BIBAUTHORID_UI_SKIP_ARXIV_STUB_PAGE = False
CFG_BIBCATALOG_SYSTEM = "EMAIL"
CFG_BIBCATALOG_SYSTEM_EMAIL_ADDRESS = "info@invenio-software.org"
CFG_BIBCATALOG_SYSTEM_RT_CLI = "/usr/bin/rt"
CFG_BIBCATALOG_SYSTEM_RT_DEFAULT_PWD = ""
CFG_BIBCATALOG_SYSTEM_RT_DEFAULT_USER = ""
CFG_BIBCATALOG_SYSTEM_RT_URL = "http://localhost/rt3"
CFG_BIBCIRCULATION_ACQ_STATUS_CANCELLED = "cancelled"
CFG_BIBCIRCULATION_ACQ_STATUS_NEW = "new"
CFG_BIBCIRCULATION_ACQ_STATUS_ON_ORDER = "on order"
CFG_BIBCIRCULATION_ACQ_STATUS_PARTIAL_RECEIPT = "partial receipt"
CFG_BIBCIRCULATION_ACQ_STATUS_RECEIVED = "received"
CFG_BIBCIRCULATION_AMAZON_ACCESS_KEY = ""
CFG_BIBCIRCULATION_ILL_STATUS_CANCELLED = "cancelled"
CFG_BIBCIRCULATION_ILL_STATUS_NEW = "new"
CFG_BIBCIRCULATION_ILL_STATUS_ON_LOAN = "on loan"
CFG_BIBCIRCULATION_ILL_STATUS_RECEIVED = "received"
CFG_BIBCIRCULATION_ILL_STATUS_REQUESTED = "requested"
CFG_BIBCIRCULATION_ILL_STATUS_RETURNED = "returned"
CFG_BIBCIRCULATION_ITEM_STATUS_CANCELLED = "cancelled"
CFG_BIBCIRCULATION_ITEM_STATUS_CLAIMED = "claimed"
CFG_BIBCIRCULATION_ITEM_STATUS_IN_PROCESS = "in process"
CFG_BIBCIRCULATION_ITEM_STATUS_NOT_ARRIVED = "not arrived"
CFG_BIBCIRCULATION_ITEM_STATUS_ON_LOAN = "on loan"
CFG_BIBCIRCULATION_ITEM_STATUS_ON_ORDER = "on order"
CFG_BIBCIRCULATION_ITEM_STATUS_ON_SHELF = "on shelf"
CFG_BIBCIRCULATION_ITEM_STATUS_OPTIONAL = []
CFG_BIBCIRCULATION_ITEM_STATUS_UNDER_REVIEW = "under review"
CFG_BIBCIRCULATION_LIBRARY_TYPE_EXTERNAL = "external"
CFG_BIBCIRCULATION_LIBRARY_TYPE_HIDDEN = "hidden"
CFG_BIBCIRCULATION_LIBRARY_TYPE_INTERNAL = "internal"
CFG_BIBCIRCULATION_LIBRARY_TYPE_MAIN = "main"
CFG_BIBCIRCULATION_LOAN_STATUS_EXPIRED = "expired"
CFG_BIBCIRCULATION_LOAN_STATUS_ON_LOAN = "on loan"
CFG_BIBCIRCULATION_LOAN_STATUS_RETURNED = "returned"
CFG_BIBCIRCULATION_PROPOSAL_STATUS_NEW = "proposal-new"
CFG_BIBCIRCULATION_PROPOSAL_STATUS_ON_ORDER = "proposal-on order"
CFG_BIBCIRCULATION_PROPOSAL_STATUS_PUT_ASIDE = "proposal-put aside"
CFG_BIBCIRCULATION_PROPOSAL_STATUS_RECEIVED = "proposal-received"
CFG_BIBCIRCULATION_REQUEST_STATUS_CANCELLED = "cancelled"
CFG_BIBCIRCULATION_REQUEST_STATUS_DONE = "done"
CFG_BIBCIRCULATION_REQUEST_STATUS_PENDING = "pending"
CFG_BIBCIRCULATION_REQUEST_STATUS_PROPOSED = "proposed"
CFG_BIBCIRCULATION_REQUEST_STATUS_WAITING = "waiting"
CFG_BIBCLASSIFY_WEB_MAXKW = 100
CFG_BIBDOCFILE_ADDITIONAL_KNOWN_FILE_EXTENSIONS = ['hpg', 'link', 'lis', 'llb', 'mat', 'mpp', 'msg', 'docx', 'docm', 'xlsx', 'xlsm', 'xlsb', 'pptx', 'pptm', 'ppsx', 'ppsm', ]
CFG_BIBDOCFILE_ADDITIONAL_KNOWN_MIMETYPES = {
"application/xml-dtd": ".dtd",
}
CFG_BIBDOCFILE_BEST_FORMATS_TO_EXTRACT_TEXT_FROM = ('txt', 'html', 'xml', 'odt', 'doc', 'docx', 'djvu', 'pdf', 'ps', 'ps.gz')
CFG_BIBDOCFILE_DESIRED_CONVERSIONS = {
'pdf' : ('pdf;pdfa', ),
'ps.gz' : ('pdf;pdfa', ),
'djvu' : ('pdf', ),
'sxw': ('doc', 'odt', 'pdf;pdfa', ),
'docx' : ('doc', 'odt', 'pdf;pdfa', ),
'doc' : ('odt', 'pdf;pdfa', 'docx'),
'rtf' : ('pdf;pdfa', 'odt', ),
'odt' : ('pdf;pdfa', 'doc', ),
'pptx' : ('ppt', 'odp', 'pdf;pdfa', ),
'ppt' : ('odp', 'pdf;pdfa', 'pptx'),
'sxi': ('odp', 'pdf;pdfa', ),
'odp' : ('pdf;pdfa', 'ppt', ),
'xlsx' : ('xls', 'ods', 'csv'),
'xls' : ('ods', 'csv'),
'ods' : ('xls', 'xlsx', 'csv'),
'sxc': ('xls', 'xlsx', 'csv'),
'tiff' : ('pdf;pdfa', ),
'tif' : ('pdf;pdfa', ),}
CFG_BIBDOCFILE_DOCUMENT_FILE_MANAGER_DOCTYPES = [
('Main', 'Main document'),
('LaTeX', 'LaTeX'),
('Source', 'Source'),
('Additional', 'Additional File'),
('Audio', 'Audio file'),
('Video', 'Video file'),
('Script', 'Script'),
('Data', 'Data'),
('Figure', 'Figure'),
('Schema', 'Schema'),
('Graph', 'Graph'),
('Image', 'Image'),
('Drawing', 'Drawing'),
('Slides', 'Slides')]
CFG_BIBDOCFILE_DOCUMENT_FILE_MANAGER_MISC = {
'can_revise_doctypes': ['*'],
'can_comment_doctypes': ['*'],
'can_describe_doctypes': ['*'],
'can_delete_doctypes': ['*'],
'can_keep_doctypes': ['*'],
'can_rename_doctypes': ['*'],
'can_add_format_to_doctypes': ['*'],
'can_restrict_doctypes': ['*'],
}
CFG_BIBDOCFILE_DOCUMENT_FILE_MANAGER_RESTRICTIONS = [
('', 'Public'),
('restricted', 'Restricted')]
CFG_BIBDOCFILE_ENABLE_BIBDOCFSINFO_CACHE = 0
CFG_BIBDOCFILE_FILESYSTEM_BIBDOC_GROUP_LIMIT = 5000
CFG_BIBDOCFILE_MD5_CHECK_PROBABILITY = 0.1
CFG_BIBDOCFILE_USE_XSENDFILE = 0
CFG_BIBEDIT_AUTOCOMPLETE_INSTITUTIONS_FIELDS = ['100__u', '700__u', '701__u', '502__c', ]
CFG_BIBEDIT_EXTEND_RECORD_WITH_COLLECTION_TEMPLATE = { 'POETRY' : 'record_poem'}
CFG_BIBEDIT_KB_INSTITUTIONS = "InstitutionsCollection"
CFG_BIBEDIT_KB_SUBJECTS = "Subjects"
CFG_BIBEDIT_LOCKLEVEL = 3
CFG_BIBEDIT_PROTECTED_FIELDS = ""
CFG_BIBEDIT_QUEUE_CHECK_METHOD = "bibrecord"
CFG_BIBEDIT_TIMEOUT = 3600
CFG_BIBEDITMULTI_LIMIT_DELAYED_PROCESSING = 20000
CFG_BIBEDITMULTI_LIMIT_DELAYED_PROCESSING_TIME = "22:00-05:00"
CFG_BIBEDITMULTI_LIMIT_INSTANT_PROCESSING = 2000
CFG_BIBFIELD_MASTER_FORMATS = ['marc', ]
CFG_BIBFORMAT_ADDTHIS_ID = ""
CFG_BIBFORMAT_DISABLE_I18N_FOR_CACHED_FORMATS = []
CFG_BIBFORMAT_HIDDEN_FILE_FORMATS = []
CFG_BIBFORMAT_HIDDEN_TAGS = ['595', ]
CFG_BIBINDEX_AUTHOR_WORD_INDEX_EXCLUDE_FIRST_NAMES = False
CFG_BIBINDEX_CHARS_ALPHANUMERIC_SEPARATORS = r"[\!\"\#\$\%\&\'\(\)\*\+\,\-\.\/\:\;\<\=\>\?\@\[\\\]\^\_\`\{\|\}\~]"
CFG_BIBINDEX_CHARS_PUNCTUATION = r"[\.\,\:\;\?\!\"]"
CFG_BIBINDEX_FULLTEXT_INDEX_LOCAL_FILES_ONLY = 1
CFG_BIBINDEX_MIN_WORD_LENGTH = 0
CFG_BIBINDEX_PATH_TO_STOPWORDS_FILE = "etc/bibrank/stopwords.kb"
CFG_BIBINDEX_PERFORM_OCR_ON_DOCNAMES = r"scan-.*"
CFG_BIBINDEX_REMOVE_HTML_MARKUP = 0
CFG_BIBINDEX_REMOVE_LATEX_MARKUP = 0
CFG_BIBINDEX_REMOVE_STOPWORDS = 0
CFG_BIBINDEX_SPLASH_PAGES = {
"http://documents\.cern\.ch/setlink\?.*": ".*",
"http://ilcagenda\.linearcollider\.org/subContributionDisplay\.py\?.*|http://ilcagenda\.linearcollider\.org/contributionDisplay\.py\?.*": "http://ilcagenda\.linearcollider\.org/getFile\.py/access\?.*|http://ilcagenda\.linearcollider\.org/materialDisplay\.py\?.*",
}
CFG_BIBINDEX_SYNONYM_KBRS = {
'global': ['INDEX-SYNONYM-TITLE', 'exact'],
'title': ['INDEX-SYNONYM-TITLE', 'exact'],
}
CFG_BIBINDEX_URLOPENER_PASSWORD = "mysuperpass"
CFG_BIBINDEX_URLOPENER_USERNAME = "mysuperuser"
CFG_BIBMATCH_FUZZY_EMPTY_RESULT_LIMIT = 1
CFG_BIBMATCH_FUZZY_MATCH_VALIDATION_LIMIT = 0.65
CFG_BIBMATCH_FUZZY_WORDLIMITS = {
'100__a': 2,
'245__a': 4
}
CFG_BIBMATCH_LOCAL_SLEEPTIME = 0.0
CFG_BIBMATCH_MATCH_VALIDATION_RULESETS = [('default', [{ 'tags' : '245__%,242__%',
'threshold' : 0.8,
'compare_mode' : 'lazy',
'match_mode' : 'title',
'result_mode' : 'normal' },
{ 'tags' : '037__a,088__a',
'threshold' : 1.0,
'compare_mode' : 'lazy',
'match_mode' : 'identifier',
'result_mode' : 'final' },
{ 'tags' : '100__a,700__a',
'threshold' : 0.8,
'compare_mode' : 'normal',
'match_mode' : 'author',
'result_mode' : 'normal' },
{ 'tags' : '773__a',
'threshold' : 1.0,
'compare_mode' : 'lazy',
'match_mode' : 'title',
'result_mode' : 'normal' }]),
('980__ \$\$a(THESIS|Thesis)', [{ 'tags' : '100__a',
'threshold' : 0.8,
'compare_mode' : 'strict',
'match_mode' : 'author',
'result_mode' : 'normal' },
{ 'tags' : '700__a,701__a',
'threshold' : 1.0,
'compare_mode' : 'lazy',
'match_mode' : 'author',
'result_mode' : 'normal' },
{ 'tags' : '100__a,700__a',
'threshold' : 0.8,
'compare_mode' : 'ignored',
'match_mode' : 'author',
'result_mode' : 'normal' }]),
('260__', [{ 'tags' : '260__c',
'threshold' : 0.8,
'compare_mode' : 'lazy',
'match_mode' : 'date',
'result_mode' : 'normal' }]),
('0247_', [{ 'tags' : '0247_a',
'threshold' : 1.0,
'compare_mode' : 'lazy',
'match_mode' : 'identifier',
'result_mode' : 'final' }]),
('020__', [{ 'tags' : '020__a',
'threshold' : 1.0,
'compare_mode' : 'lazy',
'match_mode' : 'identifier',
'result_mode' : 'joker' }])
]
CFG_BIBMATCH_QUERY_TEMPLATES = {
'title' : '[title]',
'title-author' : '[title] [author]',
'reportnumber' : 'reportnumber:[reportnumber]'
}
CFG_BIBMATCH_REMOTE_SLEEPTIME = 2.0
CFG_BIBMATCH_SEARCH_RESULT_MATCH_LIMIT = 15
CFG_BIBRANK_SELFCITES_PRECOMPUTE = 0
CFG_BIBRANK_SELFCITES_USE_BIBAUTHORID = 0
CFG_BIBRANK_SHOW_CITATION_GRAPHS = 1
CFG_BIBRANK_SHOW_CITATION_LINKS = 1
CFG_BIBRANK_SHOW_CITATION_STATS = 1
CFG_BIBRANK_SHOW_DOWNLOAD_GRAPHS = 1
CFG_BIBRANK_SHOW_DOWNLOAD_GRAPHS_CLIENT_IP_DISTRIBUTION = 0
CFG_BIBRANK_SHOW_DOWNLOAD_STATS = 1
CFG_BIBRANK_SHOW_READING_STATS = 1
CFG_BIBSCHED_EDITOR = "/usr/bin/vim"
CFG_BIBSCHED_GC_TASKS_OLDER_THAN = 30
CFG_BIBSCHED_GC_TASKS_TO_ARCHIVE = ['bibupload', 'oairepositoryupdater', ]
CFG_BIBSCHED_GC_TASKS_TO_REMOVE = ['bibindex', 'bibreformat', 'webcoll', 'bibrank', 'inveniogc', ]
CFG_BIBSCHED_LOG_PAGER = "/usr/bin/less"
CFG_BIBSCHED_MAX_ARCHIVED_ROWS_DISPLAY = 500
CFG_BIBSCHED_MAX_NUMBER_CONCURRENT_TASKS = 1
CFG_BIBSCHED_NODE_TASKS = {}
CFG_BIBSCHED_PROCESS_USER = ""
CFG_BIBSCHED_REFRESHTIME = 5
CFG_BIBSORT_BUCKETS = 1
CFG_BIBUPLOAD_CONFLICTING_REVISION_TICKET_QUEUE = ""
CFG_BIBUPLOAD_CONTROLLED_PROVENANCE_TAGS = ['6531_9', ]
CFG_BIBUPLOAD_DELETE_FORMATS = ['hb', ]
CFG_BIBUPLOAD_DISABLE_RECORD_REVISIONS = 0
CFG_BIBUPLOAD_EXTERNAL_OAIID_PROVENANCE_TAG = "035__9"
CFG_BIBUPLOAD_EXTERNAL_OAIID_TAG = "035__a"
CFG_BIBUPLOAD_EXTERNAL_SYSNO_TAG = "970__a"
CFG_BIBUPLOAD_FFT_ALLOWED_EXTERNAL_URLS = [
('http(s)?://.*', {}),
]
CFG_BIBUPLOAD_FFT_ALLOWED_LOCAL_PATHS = ['/tmp', '/home', ]
CFG_BIBUPLOAD_REFERENCE_TAG = "999"
CFG_BIBUPLOAD_SERIALIZE_RECORD_STRUCTURE = 1
CFG_BIBUPLOAD_STRONG_TAGS = ['964', ]
CFG_BIBWORKFLOW_WORKER = "worker_celery"
CFG_BROKER_URL = "amqp://guest@localhost:5672//"
CFG_CELERY_RESULT_BACKEND = "amqp"
CFG_CERN_SITE = 0
CFG_ORGANIZATION_IDENTIFIER = ""
CFG_CROSSREF_PASSWORD = ""
CFG_CROSSREF_USERNAME = ""
CFG_DEVEL_SITE = 0
CFG_DEVEL_TEST_DATABASE_ENGINES = {}
CFG_DEVEL_TOOLS = []
CFG_EMAIL_BACKEND = "flask.ext.email.backends.smtp.Mail"
CFG_ERRORLIB_RESET_EXCEPTION_NOTIFICATION_COUNTER_AFTER = 14400
CFG_FLASK_DISABLED_BLUEPRINTS = []
CFG_FLASK_SERVE_STATIC_FILES = 1
CFG_INSPIRE_SITE = 0
CFG_INTBITSET_ENABLE_SANITY_CHECKS = False
CFG_JSTESTDRIVER_PORT = 9876
CFG_MATHJAX_HOSTING = "local"
CFG_MISCUTIL_DEFAULT_PROCESS_TIMEOUT = 300
CFG_MISCUTIL_SMTP_HOST = "localhost"
CFG_MISCUTIL_SMTP_PASS = ""
CFG_MISCUTIL_SMTP_PORT = 25
CFG_MISCUTIL_SMTP_TLS = False
CFG_MISCUTIL_SMTP_USER = ""
CFG_MISCUTIL_SQL_RUN_SQL_MANY_LIMIT = 10000
CFG_MISCUTIL_SQL_USE_SQLALCHEMY = False
CFG_OAI_DELETED_POLICY = "persistent"
CFG_OAI_EXPIRE = 90000
CFG_OAI_FAILED_HARVESTING_EMAILS_ADMIN = True
CFG_OAI_FAILED_HARVESTING_STOP_QUEUE = 1
CFG_OAI_FRIENDS = ['http://cds.cern.ch/oai2d', 'http://openaire.cern.ch/oai2d', 'http://export.arxiv.org/oai2', ]
CFG_OAI_ID_FIELD = "909COo"
CFG_OAI_ID_PREFIX = "atlantis.cern.ch"
CFG_OAI_IDENTIFY_DESCRIPTION = """<description>
<eprints xmlns="http://www.openarchives.org/OAI/1.1/eprints"
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://www.openarchives.org/OAI/1.1/eprints
http://www.openarchives.org/OAI/1.1/eprints.xsd">
<content>
<URL>http://localhost</URL>
</content>
<metadataPolicy>
<text>Free and unlimited use by anybody with obligation to refer to original record</text>
</metadataPolicy>
<dataPolicy>
<text>Full content, i.e. preprints may not be harvested by robots</text>
</dataPolicy>
<submissionPolicy>
<text>Submission restricted. Submitted documents are subject of approval by OAI repository admins.</text>
</submissionPolicy>
</eprints>
</description>"""
CFG_OAI_LICENSE_FIELD = "540__"
CFG_OAI_LICENSE_PUBLISHER_SUBFIELD = "b"
CFG_OAI_LICENSE_TERMS_SUBFIELD = "a"
CFG_OAI_LICENSE_URI_SUBFIELD = "u"
CFG_OAI_LOAD = 500
CFG_OAI_METADATA_FORMATS = {
'marcxml': ('XOAIMARC', 'http://www.openarchives.org/OAI/1.1/dc.xsd', 'http://purl.org/dc/elements/1.1/'),
'oai_dc': ('XOAIDC', 'http://www.loc.gov/standards/marcxml/schema/MARC21slim.xsd', 'http://www.loc.gov/MARC21/slim'),
}
CFG_OAI_PREVIOUS_SET_FIELD = "909COq"
CFG_OAI_PROVENANCE_ALTERED_SUBFIELD = "t"
CFG_OAI_PROVENANCE_BASEURL_SUBFIELD = "u"
CFG_OAI_PROVENANCE_DATESTAMP_SUBFIELD = "d"
CFG_OAI_PROVENANCE_HARVESTDATE_SUBFIELD = "h"
CFG_OAI_PROVENANCE_METADATANAMESPACE_SUBFIELD = "m"
CFG_OAI_PROVENANCE_ORIGINDESCRIPTION_SUBFIELD = "d"
CFG_OAI_RIGHTS_CONTACT_SUBFIELD = "e"
CFG_OAI_RIGHTS_DATE_SUBFIELD = "g"
CFG_OAI_RIGHTS_FIELD = "542__"
CFG_OAI_RIGHTS_HOLDER_SUBFIELD = "d"
CFG_OAI_RIGHTS_STATEMENT_SUBFIELD = "f"
CFG_OAI_RIGHTS_URI_SUBFIELD = "u"
CFG_OAI_SAMPLE_IDENTIFIER = "oai:atlantis.cern.ch:123"
CFG_OAI_SET_FIELD = "909COp"
CFG_OAI_SLEEP = 2
CFG_OAUTH1_CONFIGURATIONS = {}
CFG_OAUTH1_PROVIDERS = []
CFG_OAUTH2_CONFIGURATIONS = {}
CFG_OAUTH2_PROVIDERS = []
CFG_OPENAIRE_SITE = 0
CFG_OPENID_CONFIGURATIONS = {}
CFG_OPENID_PROVIDERS = []
CFG_OPENOFFICE_SERVER_HOST = "localhost"
CFG_OPENOFFICE_SERVER_PORT = 2002
CFG_OPENOFFICE_USER = "nobody"
CFG_PATH_ANY2DJVU = ""
CFG_PATH_CONVERT = "/usr/local/bin/convert"
CFG_PATH_DJVUPS = ""
CFG_PATH_DJVUTXT = ""
CFG_PATH_FFMPEG = ""
CFG_PATH_FFPROBE = ""
CFG_PATH_GFILE = "/usr/bin/file"
CFG_PATH_GS = "/usr/local/bin/gs"
CFG_PATH_GUNZIP = "/usr/bin/gunzip"
CFG_PATH_GZIP = "/usr/bin/gzip"
CFG_PATH_MD5SUM = ""
CFG_PATH_MEDIAINFO = ""
CFG_PATH_MYSQL = "/usr/local/bin/mysql"
CFG_PATH_OCROSCRIPT = ""
CFG_PATH_OPENOFFICE_PYTHON = "bin/python"
CFG_PATH_PAMFILE = "/usr/local/bin/pdftoppm"
CFG_PATH_PDF2PS = "/usr/local/bin/pdf2ps"
CFG_PATH_PDFINFO = "/usr/local/bin/pdfinfo"
CFG_PATH_PDFLATEX = "/usr/texbin/pdflatex"
CFG_PATH_PDFOPT = "/usr/local/bin/pdfopt"
CFG_PATH_PDFTK = ""
CFG_PATH_PDFTOPPM = "/usr/local/bin/pdftoppm"
CFG_PATH_PDFTOPS = "/usr/local/bin/pdftops"
CFG_PATH_PDFTOTEXT = "/usr/local/bin/pdftotext"
CFG_PATH_PHP = "/usr/bin/php"
CFG_PATH_PS2PDF = "/usr/local/bin/ps2pdf"
CFG_PATH_PSTOASCII = "/usr/local/bin/ps2ascii"
CFG_PATH_PSTOTEXT = ""
CFG_PATH_TAR = "/usr/bin/tar"
CFG_PATH_TIFF2PDF = "/usr/local/bin/tiff2pdf"
CFG_PATH_WGET = "/usr/local/bin/wget"
CFG_PLOTEXTRACTOR_CONTEXT_EXTRACT_LIMIT = 750
CFG_PLOTEXTRACTOR_CONTEXT_SENTENCE_LIMIT = 2
CFG_PLOTEXTRACTOR_CONTEXT_WORD_LIMIT = 75
CFG_PLOTEXTRACTOR_DISALLOWED_TEX = ['begin', 'end', 'section', 'includegraphics', 'caption', 'acknowledgements', ]
CFG_PLOTEXTRACTOR_DOWNLOAD_TIMEOUT = 2.0
CFG_PLOTEXTRACTOR_SOURCE_BASE_URL = "http://arxiv.org/"
CFG_PLOTEXTRACTOR_SOURCE_PDF_FOLDER = "pdf/"
CFG_PLOTEXTRACTOR_SOURCE_TARBALL_FOLDER = "e-print/"
CFG_REFEXTRACT_KBS_OVERRIDE = {}
CFG_REFEXTRACT_TICKET_QUEUE = None
CFG_SITE_ADMIN_EMAIL = "info@invenio-software.org"
CFG_SITE_ADMIN_EMAIL_EXCEPTIONS = 1
CFG_SITE_EMERGENCY_EMAIL_ADDRESSES = {}
CFG_SITE_LANG = "en"
CFG_SITE_LANGS = ['af', 'ar', 'bg', 'ca', 'cs', 'de', 'el', 'en', 'es', 'fr', 'hr', 'gl', 'ka', 'it', 'rw', 'lt', 'hu', 'ja', 'no', 'pl', 'pt', 'ro', 'ru', 'sk', 'sv', 'uk', 'zh_CN', 'zh_TW', ]
CFG_SITE_NAME = "Atlantis Institute of Fictive Science"
CFG_SITE_RECORD = "record"
SECRET_KEY = "change_me"
CFG_SITE_SECURE_URL = "http://localhost:4000"
CFG_SITE_SUPPORT_EMAIL = "info@invenio-software.org"
CFG_SITE_URL = "http://localhost:4000"
CFG_SOLR_URL = ""
CFG_VERSION = "1.1.2.1185-8a57d"
CFG_WEB_API_KEY_ALLOWED_URL = []
CFG_WEBALERT_ALERT_ENGINE_EMAIL = "info@invenio-software.org"
CFG_WEBALERT_MAX_NUM_OF_CHARS_PER_LINE_IN_ALERT_EMAIL = 72
CFG_WEBALERT_MAX_NUM_OF_RECORDS_IN_ALERT_EMAIL = 20
CFG_WEBALERT_SEND_EMAIL_NUMBER_OF_TRIES = 3
CFG_WEBALERT_SEND_EMAIL_SLEEPTIME_BETWEEN_TRIES = 300
CFG_WEBAUTHORPROFILE_CACHE_EXPIRED_DELAY_BIBSCHED = 5
CFG_WEBAUTHORPROFILE_CACHE_EXPIRED_DELAY_LIVE = 7
CFG_WEBAUTHORPROFILE_MAX_AFF_LIST = 100
CFG_WEBAUTHORPROFILE_MAX_COAUTHOR_LIST = 100
CFG_WEBAUTHORPROFILE_MAX_COLLAB_LIST = 100
CFG_WEBAUTHORPROFILE_MAX_HEP_CHOICES = 10
CFG_WEBAUTHORPROFILE_MAX_KEYWORD_LIST = 100
CFG_WEBAUTHORPROFILE_USE_BIBAUTHORID = False
CFG_WEBBASKET_MAX_NUMBER_OF_DISPLAYED_BASKETS = 20
CFG_WEBBASKET_USE_RICH_TEXT_EDITOR = False
CFG_WEBCOMMENT_ADMIN_NOTIFICATION_LEVEL = 1
CFG_WEBCOMMENT_ALERT_ENGINE_EMAIL = "info@invenio-software.org"
CFG_WEBCOMMENT_ALLOW_COMMENTS = 1
CFG_WEBCOMMENT_ALLOW_REVIEWS = 1
CFG_WEBCOMMENT_ALLOW_SHORT_REVIEWS = 0
CFG_WEBCOMMENT_AUTHOR_DELETE_COMMENT_OPTION = 1
CFG_WEBCOMMENT_DEFAULT_MODERATOR = "info@invenio-software.org"
CFG_WEBCOMMENT_EMAIL_REPLIES_TO = {
'Articles': ['506__d', '506__m'],
}
CFG_WEBCOMMENT_MAX_ATTACHED_FILES = 5
CFG_WEBCOMMENT_MAX_ATTACHMENT_SIZE = 5242880
CFG_WEBCOMMENT_MAX_COMMENT_THREAD_DEPTH = 1
CFG_WEBCOMMENT_NB_COMMENTS_IN_DETAILED_VIEW = 1
CFG_WEBCOMMENT_NB_REPORTS_BEFORE_SEND_EMAIL_TO_ADMIN = 5
CFG_WEBCOMMENT_NB_REVIEWS_IN_DETAILED_VIEW = 1
CFG_WEBCOMMENT_RESTRICTION_DATAFIELD = {
'Articles': '5061_a',
'Pictures': '5061_a',
'Theses': '5061_a',
}
CFG_WEBCOMMENT_ROUND_DATAFIELD = {
'Articles': '562__c',
'Pictures': '562__c',
}
CFG_WEBCOMMENT_TIMELIMIT_PROCESSING_COMMENTS_IN_SECONDS = 20
CFG_WEBCOMMENT_TIMELIMIT_PROCESSING_REVIEWS_IN_SECONDS = 20
CFG_WEBCOMMENT_USE_MATHJAX_IN_COMMENTS = 1
CFG_WEBCOMMENT_USE_RICH_TEXT_EDITOR = False
CFG_WEBDEPOSIT_MAX_UPLOAD_SIZE = 104857600
CFG_WEBDEPOSIT_UPLOAD_FOLDER = "var/tmp/webdeposit_uploads"
CFG_WEBLINKBACK_TRACKBACK_ENABLED = 0
CFG_WEBMESSAGE_DAYS_BEFORE_DELETE_ORPHANS = 60
CFG_WEBMESSAGE_MAX_NB_OF_MESSAGES = 30
CFG_WEBMESSAGE_MAX_SIZE_OF_MESSAGE = 20000
CFG_WEBSEARCH_ADVANCEDSEARCH_PATTERN_BOX_WIDTH = 30
CFG_WEBSEARCH_AUTHOR_ET_AL_THRESHOLD = 3
CFG_WEBSEARCH_CALL_BIBFORMAT = 0
CFG_WEBSEARCH_CITESUMMARY_SELFCITES_THRESHOLD = 2000
CFG_WEBSEARCH_CREATE_SIMILARLY_NAMED_AUTHORS_LINK_BOX = 1
CFG_WEBSEARCH_DEF_RECORDS_IN_GROUPS = 10
CFG_WEBSEARCH_DETAILED_META_FORMAT = "hdm"
CFG_WEBSEARCH_DISPLAY_NEAREST_TERMS = 1
CFG_WEBSEARCH_ENABLE_GOOGLESCHOLAR = True
CFG_WEBSEARCH_ENABLE_OPENGRAPH = False
CFG_WEBSEARCH_EXTERNAL_COLLECTION_SEARCH_MAXRESULTS = 10
CFG_WEBSEARCH_EXTERNAL_COLLECTION_SEARCH_TIMEOUT = 5
CFG_WEBSEARCH_FIELDS_CONVERT = {}
CFG_WEBSEARCH_FULLTEXT_SNIPPETS = {
'': 4,
}
CFG_WEBSEARCH_FULLTEXT_SNIPPETS_CHARS = {
'': 100,
}
CFG_WEBSEARCH_FULLTEXT_SNIPPETS_GENERATOR = "native"
CFG_WEBSEARCH_I18N_LATEST_ADDITIONS = 0
CFG_WEBSEARCH_INSTANT_BROWSE = 10
CFG_WEBSEARCH_INSTANT_BROWSE_RSS = 25
CFG_WEBSEARCH_LIGHTSEARCH_PATTERN_BOX_WIDTH = 60
CFG_WEBSEARCH_MAX_RECORDS_IN_GROUPS = 200
CFG_WEBSEARCH_NARROW_SEARCH_SHOW_GRANDSONS = 1
CFG_WEBSEARCH_NB_RECORDS_TO_SORT = 1000
CFG_WEBSEARCH_PREV_NEXT_HIT_FOR_GUESTS = 1
CFG_WEBSEARCH_PREV_NEXT_HIT_LIMIT = 1000
CFG_WEBSEARCH_RSS_I18N_COLLECTIONS = []
CFG_WEBSEARCH_RSS_MAX_CACHED_REQUESTS = 1000
CFG_WEBSEARCH_RSS_TTL = 360
CFG_WEBSEARCH_SEARCH_CACHE_SIZE = 1
CFG_WEBSEARCH_SEARCH_CACHE_TIMEOUT = 600
CFG_WEBSEARCH_SHOW_COMMENT_COUNT = 1
CFG_WEBSEARCH_SHOW_REVIEW_COUNT = 1
CFG_WEBSEARCH_SIMPLESEARCH_PATTERN_BOX_WIDTH = 40
CFG_WEBSEARCH_SPIRES_SYNTAX = 1
CFG_WEBSEARCH_SPLIT_BY_COLLECTION = 1
CFG_WEBSEARCH_SYNONYM_KBRS = {
'journal': ['SEARCH-SYNONYM-JOURNAL', 'leading_to_number'],
}
CFG_WEBSEARCH_USE_ALEPH_SYSNOS = 0
CFG_WEBSEARCH_USE_MATHJAX_FOR_FORMATS = []
CFG_WEBSEARCH_VIEWRESTRCOLL_POLICY = "ANY"
CFG_WEBSEARCH_WILDCARD_LIMIT = 50000
CFG_WEBSESSION_ADDRESS_ACTIVATION_EXPIRE_IN_DAYS = 3
CFG_WEBSESSION_DIFFERENTIATE_BETWEEN_GUESTS = 0
CFG_WEBSESSION_EXPIRY_LIMIT_DEFAULT = 2
CFG_WEBSESSION_EXPIRY_LIMIT_REMEMBER = 365
CFG_WEBSESSION_IPADDR_CHECK_SKIP_BITS = 0
CFG_WEBSESSION_NOT_CONFIRMED_EMAIL_ADDRESS_EXPIRE_IN_DAYS = 10
CFG_WEBSESSION_RESET_PASSWORD_EXPIRE_IN_DAYS = 3
CFG_WEBSTAT_BIBCIRCULATION_START_YEAR = ""
CFG_WEBSTYLE_CDSPAGEBOXLEFTBOTTOM = ""
CFG_WEBSTYLE_CDSPAGEBOXLEFTTOP = ""
CFG_WEBSTYLE_CDSPAGEBOXRIGHTBOTTOM = ""
CFG_WEBSTYLE_CDSPAGEBOXRIGHTTOP = ""
CFG_WEBSTYLE_EMAIL_ADDRESSES_OBFUSCATION_MODE = 2
CFG_WEBSTYLE_HTTP_STATUS_ALERT_LIST = ['404r', '400', '5*', '41*', ]
CFG_WEBSTYLE_HTTP_USE_COMPRESSION = 0
CFG_WEBSTYLE_REVERSE_PROXY_IPS = []
CFG_WEBSTYLE_TEMPLATE_SKIN = "default"
CFG_WEBSUBMIT_USE_MATHJAX = 0
CFG_XAPIAN_ENABLED = ""
CFG_WEBSEARCH_DEFAULT_SEARCH_INTERFACE = 0
CFG_WEBSEARCH_ENABLED_SEARCH_INTERFACES = [0, 1,]
# END OF GENERATED FILE
diff --git a/invenio/base/factory.py b/invenio/base/factory.py
index 4c19e764f..543fb8c50 100644
--- a/invenio/base/factory.py
+++ b/invenio/base/factory.py
@@ -1,217 +1,217 @@
# -*- coding: utf-8 -*-
## This file is part of Invenio.
## Copyright (C) 2011, 2012, 2013 CERN.
##
## Invenio is free software; you can redistribute it and/or
## modify it under the terms of the GNU General Public License as
## published by the Free Software Foundation; either version 2 of the
## License, or (at your option) any later version.
##
## Invenio is distributed in the hope that it will be useful, but
## WITHOUT ANY WARRANTY; without even the implied warranty of
## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
## General Public License for more details.
##
## You should have received a copy of the GNU General Public License
## along with Invenio; if not, write to the Free Software Foundation, Inc.,
## 59 Temple Place, Suite 330, Boston, MA 02111-1307, USA.
"""
invenio.base.factory
--------------------
Implements application factory.
"""
-#from invenio.errorlib import register_exception
+#from invenio.ext.logging import register_exception
from .helpers import with_app_context, unicodifier
from .utils import collect_blueprints, register_extensions, \
register_configurations
from .wrappers import Flask
__all__ = ['create_app', 'with_app_context']
def create_app(**kwargs_config):
"""
Prepare WSGI Invenio application based on Flask.
Invenio consists of a new Flask application with legacy support for
the old WSGI legacy application and the old Python legacy
scripts (URLs to *.py files).
An incoming request is processed in the following manner:
* The Flask application first routes request via its URL routing
system (see LegacyAppMiddleware.__call__()).
* One route in the Flask system, will match Python legacy
scripts (see static_handler_with_legacy_publisher()).
* If the Flask application aborts the request with a 404 error, the request
is passed on to the WSGI legacy application (see page_not_found()). E.g.
either the Flask application did not find a route, or a view aborted the
request with a 404 error.
"""
## The Flask application instance
_app = Flask('.'.join(__name__.split('.')[0:2]),
## Static files are usually handled directly by the webserver (e.g. Apache)
## However in case WSGI is required to handle static files too (such
## as when running simple server), then this flag can be
## turned on (it is done automatically by wsgi_handler_test).
## We assume anything under '/' which is static to be server directly
## by the webserver from CFG_WEBDIR. In order to generate independent
## url for static files use func:`url_for('static', filename='test')`.
static_url_path='',
template_folder='templates',
instance_relative_config=True,
)
# Handle both url with and without trailing slashe by Flask.
# @blueprint.route('/test')
# @blueprint.route('/test/') -> not necessary when strict_slashes == False
_app.url_map.strict_slashes = False
# Load invenio.conf
_app.config.from_object('invenio.base.config')
try:
#print _app.instance_path
import os
os.makedirs(_app.instance_path)
except:
pass
# Load invenio.cfg
_app.config.from_pyfile('invenio.cfg', silent=True)
## Update application config from parameters.
_app.config.update(kwargs_config)
## Database was here.
## First check that you have all rights to logs
#from invenio.bibtask import check_running_process_user
#check_running_process_user()
#from invenio.base.i18n import language_list_long
def language_list_long():
return []
# Jinja2 hacks were here.
# See note on Jinja2 string decoding using ASCII codec instead of UTF8 in
# function documentation
# SECRET_KEY is needed by Flask Debug Toolbar
SECRET_KEY = _app.config.get('SECRET_KEY') or \
_app.config.get('CFG_SITE_SECRET_KEY', 'change_me')
if not SECRET_KEY or SECRET_KEY == 'change_me':
fill_secret_key = """
Set variable SECRET_KEY with random string in invenio.cfg.
You can use following commands:
$ %s
""" % ('inveniomanage config create secret-key', )
print fill_secret_key
#try:
# raise Exception(fill_secret_key)
#except Exception:
# #register_exception(alert_admin=True,
# # subject="Missing CFG_SITE_SECRET_KEY")
# raise Exception(fill_secret_key)
_app.config["SECRET_KEY"] = SECRET_KEY
# Register extendsions listed in invenio.cfg
register_extensions(_app)
# Extend application config with packages configuration.
register_configurations(_app)
# Debug toolbar was here
# Set email backend for Flask-Email plugin
# Mailutils were here
# SSLify was here
# Legacy was here
# Jinja2 Memcache Bytecode Cache was here.
# Jinja2 custom loader was here.
# SessionInterface was here.
## Set custom request class was here.
## ... and map certain common parameters
_app.config['CFG_LANGUAGE_LIST_LONG'] = [(lang, longname.decode('utf-8'))
for (lang, longname) in language_list_long()]
## Invenio is all using str objects. Let's change them to unicode
_app.config.update(unicodifier(dict(_app.config)))
from invenio.base import before_request_functions
before_request_functions.setup_app(_app)
# Cache was here
# Logging was here.
# Login manager was here.
# Main menu was here.
# Jinja2 extensions loading was here.
# Custom template filters were here.
# Gravatar bridge was here.
# Set the user language was here.
# Custom templete filters loading was here.
def _invenio_blueprint_plugin_builder(plugin):
"""
Handy function to bridge pluginutils with (Invenio) blueprints.
"""
from flask import Blueprint
if 'blueprints' in dir(plugin):
candidates = getattr(plugin, 'blueprints')
elif 'blueprint' in dir(plugin):
candidates = [getattr(plugin, 'blueprint')]
else:
candidates = []
for candidate in candidates:
if isinstance(candidate, Blueprint):
if candidate.name in _app.config.get('CFG_FLASK_DISABLED_BLUEPRINTS', []):
_app.logger.info('%s is excluded by CFG_FLASK_DISABLED_BLUEPRINTS' % candidate.name)
return
return candidate
_app.logger.error('%s is not a valid blueprint plugin' % plugin.__name__)
## Let's load all the blueprints that are composing this Invenio instance
_BLUEPRINTS = [m for m in map(_invenio_blueprint_plugin_builder,
collect_blueprints(app=_app))
if m is not None]
## Let's attach all the blueprints
for plugin in _BLUEPRINTS:
_app.register_blueprint(plugin,
url_prefix=_app.config.get(
'BLUEPRINTS_URL_PREFIXES',
{}).get(plugin.name))
# Flask-Admin was here.
@_app.route('/testing')
def testing():
from flask import render_template
return render_template('404.html')
return _app
diff --git a/invenio/base/scripts/database.py b/invenio/base/scripts/database.py
index c8eea3936..cf914762a 100644
--- a/invenio/base/scripts/database.py
+++ b/invenio/base/scripts/database.py
@@ -1,376 +1,376 @@
# -*- coding: utf-8 -*-
##
## This file is part of Invenio.
## Copyright (C) 2013 CERN.
##
## Invenio is free software; you can redistribute it and/or
## modify it under the terms of the GNU General Public License as
## published by the Free Software Foundation; either version 2 of the
## License, or (at your option) any later version.
##
## Invenio is distributed in the hope that it will be useful, but
## WITHOUT ANY WARRANTY; without even the implied warranty of
## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
## General Public License for more details.
##
## You should have received a copy of the GNU General Public License
## along with Invenio; if not, write to the Free Software Foundation, Inc.,
## 59 Temple Place, Suite 330, Boston, MA 02111-1307, USA.
import os
import sys
import shutil
import datetime
from pipes import quote
from flask import current_app
from invenio.ext.script import Manager, change_command_name, print_progress
manager = Manager(usage="Perform database operations")
# Shortcuts for manager options to keep code DRY.
option_yes_i_know = manager.option('--yes-i-know', action='store_true',
dest='yes_i_know', help='use with care!')
option_default_data = manager.option('--no-data', action='store_false',
dest='default_data',
help='do not populate tables with default data')
@manager.option('-u', '--user', dest='user', default="root")
@manager.option('-p', '--password', dest='password', default="")
@option_yes_i_know
def init(user='root', password='', yes_i_know=False):
"""Initializes database and user."""
from invenio.ext.sqlalchemy import db
from invenio.utils.text import wrap_text_in_a_box, wait_for_user
## Step 0: confirm deletion
wait_for_user(wrap_text_in_a_box("""WARNING: You are going to destroy your database tables! Run first `inveniomanage database drop`."""))
## Step 1: drop database and recreate it
if db.engine.name == 'mysql':
#FIXME improve escaping
args = dict((k, str(v).replace('$', '\$'))
for (k, v) in current_app.config.iteritems()
if k.startswith('CFG_DATABASE'))
args = dict(zip(args, map(quote, args.values())))
prefix = ('{cmd} -u {user} --password={password} '
'-h {CFG_DATABASE_HOST} -P {CFG_DATABASE_PORT} ')
cmd_prefix = prefix.format(cmd='mysql', user=user, password=password,
**args)
cmd_admin_prefix = prefix.format(cmd='mysqladmin', user=user,
password=password,
**args)
cmds = [
cmd_prefix + '-e "DROP DATABASE IF EXISTS {CFG_DATABASE_NAME}"',
(cmd_prefix + '-e "CREATE DATABASE IF NOT EXISTS '
'{CFG_DATABASE_NAME} DEFAULT CHARACTER SET utf8 '
'COLLATE utf8_general_ci"'),
# Create user and grant access to database.
(cmd_prefix + '-e "GRANT ALL PRIVILEGES ON '
'{CFG_DATABASE_USER}.* TO {CFG_DATABASE_NAME}@localhost '
'IDENTIFIED BY {CFG_DATABASE_PASS}"'),
cmd_admin_prefix + 'flush-privileges'
]
for cmd in cmds:
cmd = cmd.format(**args)
print cmd
if os.system(cmd):
print "ERROR: failed execution of", cmd
sys.exit(1)
print '>>> Database has been installed.'
@option_yes_i_know
def drop(yes_i_know=False):
"""Drops database tables"""
print ">>> Going to drop tables and related data on filesystem ..."
from sqlalchemy import event
from invenio.utils.date import get_time_estimator
from invenio.utils.text import wrap_text_in_a_box, wait_for_user
from invenio.webstat import destroy_customevents
from invenio.legacy.inveniocfg import test_db_connection
from invenio.base.utils import autodiscover_models
from invenio.ext.sqlalchemy import db
from invenio.bibdocfile import _make_base_dir
## Step 0: confirm deletion
wait_for_user(wrap_text_in_a_box("""WARNING: You are going to destroy your database tables and related data on filesystem!"""))
## Step 1: test database connection
test_db_connection()
list(autodiscover_models())
## Step 2: disable foreign key checks
if db.engine.name == 'mysql':
db.engine.execute('SET FOREIGN_KEY_CHECKS=0;')
## Step 3: destroy associated data
try:
msg = destroy_customevents()
if msg:
print msg
except:
print "ERROR: Could not destroy customevents."
## FIXME: move to bibedit_model
def bibdoc_before_drop(target, connection_dummy, **kw_dummy):
print
print ">>> Going to remove records data..."
for (docid,) in db.session.query(target.c.id).all():
directory = _make_base_dir(docid)
if os.path.isdir(directory):
print ' >>> Removing files for docid =', docid
shutil.rmtree(directory)
db.session.commit()
print ">>> Data has been removed."
from invenio.modules.record_editor.models import Bibdoc
event.listen(Bibdoc.__table__, "before_drop", bibdoc_before_drop)
tables = list(reversed(db.metadata.sorted_tables))
N = len(tables)
prefix = '>>> Dropping %d tables ...' % N
e = get_time_estimator(N)
dropped = 0
for i, table in enumerate(tables):
try:
print_progress(1.0 * i / N, prefix=prefix,
suffix=str(datetime.timedelta(seconds=e()[0])))
table.drop(bind=db.engine)
dropped += 1
except:
print '\r', '>>> problem with dropping table', table
print
if dropped == N:
print ">>> Tables dropped successfully."
else:
print "ERROR: not all tables were properly dropped."
print ">>> Dropped", dropped, 'out of', N
@option_default_data
def create(default_data=True):
"""Creates database tables from sqlalchemy models"""
print ">>> Going to create tables..."
from sqlalchemy import event
from invenio.utils.date import get_time_estimator
from invenio.legacy.inveniocfg import test_db_connection
from invenio.base.utils import autodiscover_models
from invenio.ext.sqlalchemy import db
try:
test_db_connection()
except:
- from invenio.errorlib import get_tracestack
+ from invenio.ext.logging import get_tracestack
print get_tracestack()
list(autodiscover_models())
def cfv_after_create(target, connection, **kw):
print
print ">>> Modifing table structure..."
from invenio.dbquery import run_sql
run_sql('ALTER TABLE collection_field_fieldvalue DROP PRIMARY KEY')
run_sql('ALTER TABLE collection_field_fieldvalue ADD INDEX id_collection(id_collection)')
run_sql('ALTER TABLE collection_field_fieldvalue CHANGE id_fieldvalue id_fieldvalue mediumint(9) unsigned')
#print run_sql('SHOW CREATE TABLE collection_field_fieldvalue')
from invenio.modules.search.models import CollectionFieldFieldvalue
event.listen(CollectionFieldFieldvalue.__table__, "after_create", cfv_after_create)
tables = db.metadata.sorted_tables
N = len(tables)
prefix = '>>> Creating %d tables ...' % N
e = get_time_estimator(N)
created = 0
for i, table in enumerate(tables):
try:
print_progress(1.0 * i / N, prefix=prefix,
suffix=str(datetime.timedelta(seconds=e()[0])))
table.create(bind=db.engine)
created += 1
except:
print '\r', '>>> problem with creating table', table
print
if created == N:
print ">>> Tables created successfully."
else:
print "ERROR: not all tables were properly created."
print ">>> Created", created, 'out of', N
populate(default_data)
@option_yes_i_know
@option_default_data
def recreate(yes_i_know=False, default_data=True):
"""Recreates database tables (same as issuing 'drop' and then 'create')"""
drop()
create(default_data)
@manager.command
def uri():
"""Prints SQLAlchemy database uri."""
from flask import current_app
print current_app.config['SQLALCHEMY_DATABASE_URI']
def load_fixtures(packages=['invenio.modules.*'], truncate_tables_first=False):
from invenio.base.utils import autodiscover_models, \
import_module_from_packages
from invenio.ext.sqlalchemy import db
from fixture import SQLAlchemyFixture
fixture_modules = list(import_module_from_packages('fixtures',
packages=packages))
model_modules = list(autodiscover_models())
fixtures = dict((f, getattr(ff, f)) for ff in fixture_modules
for f in dir(ff) if f[-4:] == 'Data')
fixture_names = fixtures.keys()
models = dict((m+'Data', getattr(mm, m)) for mm in model_modules
for m in dir(mm) if m+'Data' in fixture_names)
dbfixture = SQLAlchemyFixture(env=models, engine=db.metadata.bind,
session=db.session)
data = dbfixture.data(*[f for (n, f) in fixtures.iteritems() if n in models])
if len(models) != len(fixtures):
print ">>> ERROR: There are", len(models), "tables and", len(fixtures), "fixtures."
print ">>>", set(fixture_names) ^ set(models.keys())
else:
print ">>> There are", len(models), "tables to be loaded."
if truncate_tables_first:
print ">>> Going to truncate following tables:",
print map(lambda t: t.__tablename__, models.values())
db.session.execute("TRUNCATE %s" % ('collectionname', ))
db.session.execute("TRUNCATE %s" % ('collection_externalcollection', ))
for m in models.values():
db.session.execute("TRUNCATE %s" % (m.__tablename__, ))
db.session.commit()
data.setup()
db.session.commit()
@option_default_data
@manager.option('--truncate', action='store_true',
dest='truncate_tables_first', help='use with care!')
def populate(default_data=True, truncate_tables_first=False):
"""Populate database with default data"""
from invenio.config import CFG_PREFIX
from invenio.base.scripts.config import get_conf
if not default_data:
print '>>> No data filled...'
return
print ">>> Going to fill tables..."
load_fixtures(truncate_tables_first=truncate_tables_first)
conf = get_conf()
from invenio.legacy.inveniocfg import cli_cmd_reset_sitename, \
cli_cmd_reset_siteadminemail, cli_cmd_reset_fieldnames
cli_cmd_reset_sitename(conf)
cli_cmd_reset_siteadminemail(conf)
cli_cmd_reset_fieldnames(conf)
for cmd in ["%s/bin/webaccessadmin -u admin -c -a" % CFG_PREFIX]:
if os.system(cmd):
print "ERROR: failed execution of", cmd
sys.exit(1)
from invenio.modules.upgrader.engine import InvenioUpgrader
iu = InvenioUpgrader()
map(iu.register_success, iu.get_upgrades())
print ">>> Tables filled successfully."
def version():
""" Get running version of database driver."""
from invenio.ext.sqlalchemy import db
try:
return db.engine.dialect.dbapi.__version__
except:
import MySQLdb
return MySQLdb.__version__
@manager.option('-v', '--verbose', action='store_true', dest='verbose',
help='Display more details (driver version).')
@change_command_name
def driver_info(verbose=False):
""" Get name of running database driver."""
from invenio.ext.sqlalchemy import db
try:
return db.engine.dialect.dbapi.__name__ + (('==' + version())
if verbose else '')
except:
import MySQLdb
return MySQLdb.__name__ + (('==' + version()) if verbose else '')
@manager.option('-l', '--line-format', dest='line_format', default="%s: %s")
@manager.option('-s', '--separator', dest='separator', default="\n")
@change_command_name
def mysql_info(separator=None, line_format=None):
"""
Detect and print MySQL details useful for debugging problems on various OS.
"""
from invenio.ext.sqlalchemy import db
if db.engine.name != 'mysql':
raise Exception('Database engine is not mysql.')
from invenio.dbquery import run_sql
out = []
for key, val in run_sql("SHOW VARIABLES LIKE 'version%'") + \
run_sql("SHOW VARIABLES LIKE 'charact%'") + \
run_sql("SHOW VARIABLES LIKE 'collat%'"):
if False:
print " - %s: %s" % (key, val)
elif key in ['version',
'character_set_client',
'character_set_connection',
'character_set_database',
'character_set_results',
'character_set_server',
'character_set_system',
'collation_connection',
'collation_database',
'collation_server']:
out.append((key, val))
if separator is not None:
if line_format is None:
line_format = "%s: %s"
return separator.join(map(lambda i: line_format % i, out))
return dict(out)
def main():
from invenio.base.factory import create_app
app = create_app()
manager.app = app
manager.run()
if __name__ == '__main__':
main()
diff --git a/invenio/ext/admin/__init__.py b/invenio/ext/admin/__init__.py
index defd37987..ad6a6ebba 100644
--- a/invenio/ext/admin/__init__.py
+++ b/invenio/ext/admin/__init__.py
@@ -1,92 +1,92 @@
# -*- coding: utf-8 -*-
## This file is part of Invenio.
## Copyright (C) 2013 CERN.
##
## Invenio is free software; you can redistribute it and/or
## modify it under the terms of the GNU General Public License as
## published by the Free Software Foundation; either version 2 of the
## License, or (at your option) any later version.
##
## Invenio is distributed in the hope that it will be useful, but
## WITHOUT ANY WARRANTY; without even the implied warranty of
## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
## General Public License for more details.
##
## You should have received a copy of the GNU General Public License
## along with Invenio; if not, write to the Free Software Foundation, Inc.,
## 59 Temple Place, Suite 330, Boston, MA 02111-1307, USA.
"""
Flask-Admin support in Invenio
------------------------------
Please see http://flask-admin.readthedocs.org/en/latest/quickstart/ prior to
reading this documentation, to understand how Flask-Admin works.
Flask admin allows you to easily create web administration interfaces for your
SQLAlchemy models. This extension takes care of using Blueprint as base class
for the admin views.
By default this extension will look for invenio.<name>_admin modules and call
the method register_admin(app, admin) in each module to allow to register its
administration views.
By default all view are restricted to super users only. This can be changed via
the acc_<action>_action class variables.
Usage example - create a file called <module>_admin.py::
from invenio.ext.admin import InvenioModelView
from invenio.ext.sqlalchemy import db
from invenio.<module>_models import MyModel
class MyModelAdmin(InvenioModelView):
acc_edit_action = 'cfgmymodel'
_can_create = False
_can_edit = True
_can_delete = False
# ... Flaks-Admin options ...
# column_list = ( ... )
def __init__(self, model, session, **kwargs):
super(MyModelAdmin, self).__init__(model, session, **kwargs)
def register_admin(app, admin):
admin.add_view(MyModelAdmin(MyModel, db.session, name='My model',
category="My Category"))
"""
from flask.ext.admin import Admin
from .views import AdminIndexView
#
# Utility method
#
def setup_app(app):
"""
Register all administration views with the Flask application
"""
from invenio.base.utils import autodiscover_admin_views
# Initialize app
admin = Admin(
name="Invenio",
index_view=AdminIndexView(),
base_template="admin_base.html"
)
admin.init_app(app)
# Call register() in admin module to register views.
modules = autodiscover_admin_views(app)
for m in modules:
register_func = getattr(m, 'register_admin', None)
if register_func and callable(register_func):
try:
register_func(app, admin)
except Exception:
- from invenio.errorlib import register_exception
+ from invenio.ext.logging import register_exception
register_exception()
diff --git a/invenio/ext/email/__init__.py b/invenio/ext/email/__init__.py
index 9689da742..a3b5aff56 100644
--- a/invenio/ext/email/__init__.py
+++ b/invenio/ext/email/__init__.py
@@ -1,430 +1,430 @@
# -*- coding: utf-8 -*-
##
## This file is part of Invenio.
## Copyright (C) 2007, 2008, 2009, 2010, 2011, 2012 CERN.
##
## Invenio is free software; you can redistribute it and/or
## modify it under the terms of the GNU General Public License as
## published by the Free Software Foundation; either version 2 of the
## License, or (at your option) any later version.
##
## Invenio is distributed in the hope that it will be useful, but
## WITHOUT ANY WARRANTY; without even the implied warranty of
## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
## General Public License for more details.
##
## You should have received a copy of the GNU General Public License
## along with Invenio; if not, write to the Free Software Foundation, Inc.,
## 59 Temple Place, Suite 330, Boston, MA 02111-1307, USA.
"""
Invenio mail sending utilities. send_email() is the main API function
people should be using; just check out its docstring.
"""
__revision__ = "$Id$"
from time import sleep
import re
import os
import sys
from email.MIMEMultipart import MIMEMultipart
from email.MIMEBase import MIMEBase
from email import Encoders
from email.MIMEImage import MIMEImage
from email.Utils import formatdate
from cStringIO import StringIO
from flask import g
from formatter import DumbWriter, AbstractFormatter
from flask.ext.email.message import EmailMultiAlternatives, EmailMessage
from invenio.base.globals import cfg
default_ln = lambda ln: cfg.get('CFG_SITE_LANG') if ln is None else ln
from .errors import EmailError
from invenio.ext.template import render_template_to_string
from invenio.base.helpers import unicodifier
def setup_app(app):
"""
Prepare application config from Invenio configuration.
@see: https://flask-email.readthedocs.org/en/latest/#configuration
"""
cfg = app.config
app.config.setdefault('EMAIL_BACKEND', cfg.get(
'CFG_EMAIL_BACKEND', 'flask.ext.email.backends.smtp.Mail'))
app.config.setdefault('DEFAULT_FROM_EMAIL', cfg['CFG_SITE_SUPPORT_EMAIL'])
app.config.setdefault('SERVER_EMAIL', cfg['CFG_SITE_ADMIN_EMAIL'])
app.config.setdefault('ADMINS', (cfg['CFG_SITE_ADMIN_EMAIL'], ))
app.config.setdefault('MANAGERS', (cfg['CFG_SITE_SUPPORT_EMAIL'], ))
CFG_MISCUTIL_SMTP_HOST = cfg.get('CFG_MISCUTIL_SMTP_HOST')
CFG_MISCUTIL_SMTP_PORT = cfg.get('CFG_MISCUTIL_SMTP_PORT')
CFG_MISCUTIL_SMTP_USER = cfg.get('CFG_MISCUTIL_SMTP_USER', '')
CFG_MISCUTIL_SMTP_PASS = cfg.get('CFG_MISCUTIL_SMTP_PASS', '')
CFG_MISCUTIL_SMTP_TLS = cfg.get('CFG_MISCUTIL_SMTP_TLS', False)
app.config.setdefault('EMAIL_HOST', CFG_MISCUTIL_SMTP_HOST)
app.config.setdefault('EMAIL_PORT', CFG_MISCUTIL_SMTP_PORT)
app.config.setdefault('EMAIL_HOST_USER', CFG_MISCUTIL_SMTP_USER)
app.config.setdefault('EMAIL_HOST_PASSWORD', CFG_MISCUTIL_SMTP_PASS)
app.config.setdefault('EMAIL_USE_TLS', CFG_MISCUTIL_SMTP_TLS)
# app.config['EMAIL_USE_SSL']: defaults to False
app.config.setdefault('EMAIL_FILE_PATH', cfg['CFG_LOGDIR'])
return app
def scheduled_send_email(fromaddr,
toaddr,
subject="",
content="",
header=None,
footer=None,
copy_to_admin=0,
attempt_times=1,
attempt_sleeptime=10,
user=None,
other_bibtasklet_arguments=None,
replytoaddr=""):
"""
Like send_email, but send an email via the bibsched
infrastructure.
@param fromaddr: sender
@type fromaddr: string
@param toaddr: list of receivers
@type toaddr: string (comma separated) or list of strings
@param subject: the subject
@param content: the body of the message
@param header: optional header, otherwise default is used
@param footer: optional footer, otherwise default is used
@param copy_to_admin: set to 1 in order to send email the admins
@param attempt_times: try at least n times before giving up sending
@param attempt_sleeptime: number of seconds to sleep between two attempts
@param user: the user name to user when scheduling the bibtasklet. If
None, the sender will be used
@param other_bibtasklet_arguments: other arguments to append to the list
of arguments to the call of task_low_level_submission
@param replytoaddr: [string or list-of-strings] to be used for the
reply-to header of the email (if string, then
receivers are separated by ',')
@return: the scheduled bibtasklet
"""
from invenio.bibtask import task_low_level_submission
if not isinstance(toaddr, (unicode, str)):
toaddr = ','.join(toaddr)
if not isinstance(replytoaddr, (unicode, str)):
replytoaddr = ','.join(replytoaddr)
toaddr = remove_temporary_emails(toaddr)
if user is None:
user = fromaddr
if other_bibtasklet_arguments is None:
other_bibtasklet_arguments = []
else:
other_bibtasklet_arguments = list(other_bibtasklet_arguments)
if not header is None:
other_bibtasklet_arguments.extend(("-a", "header=%s" % header))
if not footer is None:
other_bibtasklet_arguments.extend(("-a", "footer=%s" % footer))
return task_low_level_submission(
"bibtasklet", user, "-T", "bst_send_email",
"-a", "fromaddr=%s" % fromaddr,
"-a", "toaddr=%s" % toaddr,
"-a", "replytoaddr=%s" % replytoaddr,
"-a", "subject=%s" % subject,
"-a", "content=%s" % content,
"-a", "copy_to_admin=%s" % copy_to_admin,
"-a", "attempt_times=%s" % attempt_times,
"-a", "attempt_sleeptime=%s" % attempt_sleeptime,
*other_bibtasklet_arguments)
def send_email(fromaddr,
toaddr,
subject="",
content="",
html_content='',
html_images=None,
header=None,
footer=None,
html_header=None,
html_footer=None,
copy_to_admin=0,
attempt_times=1,
attempt_sleeptime=10,
debug_level=0,
ln=None,
charset=None,
replytoaddr="",
attachments=None
):
"""Send a forged email to TOADDR from FROMADDR with message created from subjet, content and possibly
header and footer.
@param fromaddr: [string] sender
@param toaddr: [string or list-of-strings] list of receivers (if string, then
receivers are separated by ',')
@param subject: [string] subject of the email
@param content: [string] content of the email
@param html_content: [string] html version of the email
@param html_images: [dict] dictionary of image id, image path
@param header: [string] header to add, None for the Default
@param footer: [string] footer to add, None for the Default
@param html_header: [string] header to add to the html part, None for the Default
@param html_footer: [string] footer to add to the html part, None for the Default
@param copy_to_admin: [int] if 1 add CFG_SITE_ADMIN_EMAIL in receivers
@param attempt_times: [int] number of tries
@param attempt_sleeptime: [int] seconds in between tries
@param debug_level: [int] debug level
@param ln: [string] invenio language
@param charset: [string] the content charset. By default is None which means
to try to encode the email as ascii, then latin1 then utf-8.
@param replytoaddr: [string or list-of-strings] to be used for the
reply-to header of the email (if string, then
receivers are separated by ',')
@param attachments: list of paths of files to be attached. Alternatively,
every element of the list could be a tuple: (filename, mimetype)
If sending fails, try to send it ATTEMPT_TIMES, and wait for
ATTEMPT_SLEEPTIME seconds in between tries.
e.g.:
send_email('foo.bar@cern.ch', 'bar.foo@cern.ch', 'Let\'s try!'', 'check 1234', '<strong>check</strong> <em>1234</em><img src="cid:image1">', {'image1': '/tmp/quantum.jpg'})
@return: [bool]: True if email was sent okay, False if it was not.
"""
- from invenio.errorlib import register_exception
+ from invenio.ext.logging import register_exception
ln = default_ln(ln)
if html_images is None:
html_images = {}
if type(toaddr) is not list:
toaddr = toaddr.strip().split(',')
toaddr = remove_temporary_emails(toaddr)
usebcc = len(toaddr.split(',')) > 1 # More than one address, let's use Bcc in place of To
if copy_to_admin:
if cfg['CFG_SITE_ADMIN_EMAIL'] not in toaddr:
toaddr.append(cfg['CFG_SITE_ADMIN_EMAIL'])
body = forge_email(fromaddr, toaddr, subject, content, html_content,
html_images, usebcc, header, footer, html_header,
html_footer, ln, charset, replytoaddr, attachments)
if attempt_times < 1 or not toaddr:
try:
raise EmailError(g._('The system is not attempting to send an email from %s, to %s, with body %s.') % (fromaddr, toaddr, body))
except EmailError:
register_exception()
return False
sent = False
while not sent and attempt_times > 0:
try:
sent = body.send()
except Exception:
register_exception()
if debug_level > 1:
try:
raise EmailError(g._('Error in sending message. Waiting %s seconds. Exception is %s, while sending email from %s to %s with body %s.') % (attempt_sleeptime, sys.exc_info()[0], fromaddr, toaddr, body))
except EmailError:
register_exception()
if not sent:
attempt_times -= 1
if attempt_times > 0: # sleep only if we shall retry again
sleep(attempt_sleeptime)
if not sent:
try:
raise EmailError(g._('Error in sending email from %s to %s with body %s.') % (fromaddr, toaddr, body))
except EmailError:
register_exception()
return sent
def attach_embed_image(email, image_id, image_path):
"""
Attach an image to the email.
"""
with open(image_path, 'rb') as image_data:
img = MIMEImage(image_data.read())
img.add_header('Content-ID', '<%s>' % image_id)
img.add_header('Content-Disposition', 'attachment', filename=os.path.split(image_path)[1])
email.attach(img)
def forge_email(fromaddr, toaddr, subject, content, html_content='',
html_images=None, usebcc=False, header=None, footer=None,
html_header=None, html_footer=None, ln=None,
charset=None, replytoaddr="", attachments=None):
"""Prepare email. Add header and footer if needed.
@param fromaddr: [string] sender
@param toaddr: [string or list-of-strings] list of receivers (if string, then
receivers are separated by ',')
@param usebcc: [bool] True for using Bcc in place of To
@param subject: [string] subject of the email
@param content: [string] content of the email
@param html_content: [string] html version of the email
@param html_images: [dict] dictionary of image id, image path
@param header: [string] None for the default header
@param footer: [string] None for the default footer
@param ln: language
@charset: [string] the content charset. By default is None which means
to try to encode the email as ascii, then latin1 then utf-8.
@param replytoaddr: [string or list-of-strings] to be used for the
reply-to header of the email (if string, then
receivers are separated by ',')
@param attachments: list of paths of files to be attached. Alternatively,
every element of the list could be a tuple: (filename, mimetype)
@return: forged email as a string"""
ln = default_ln(ln)
if html_images is None:
html_images = {}
content = render_template_to_string('mail_text.tpl',
content=unicodifier(content),
header=unicodifier(header),
footer=unicodifier(footer)
).encode('utf8')
if type(toaddr) is list:
toaddr = ','.join(toaddr)
if type(replytoaddr) is list:
replytoaddr = ','.join(replytoaddr)
toaddr = remove_temporary_emails(toaddr)
headers = {}
kwargs = {'to': [], 'cc': [], 'bcc': []}
if replytoaddr:
headers['Reply-To'] = replytoaddr
if usebcc:
headers['Bcc'] = toaddr
kwargs['bcc'] = toaddr.split(',')
kwargs['to'] = ['Undisclosed.Recipients:']
else:
kwargs['to'] = toaddr.split(',')
headers['From'] = fromaddr
headers['Date'] = formatdate(localtime=True)
headers['User-Agent'] = 'Invenio %s at %s' % (cfg['CFG_VERSION'],
cfg['CFG_SITE_URL'])
if html_content:
html_content = render_template_to_string(
'mail_html.tpl',
content=unicodifier(html_content),
header=unicodifier(html_header),
footer=unicodifier(html_footer)
).encode('utf8')
msg_root = EmailMultiAlternatives(subject=subject, body=content,
from_email=fromaddr,
headers=headers, **kwargs)
msg_root.attach_alternative(html_content, "text/html")
#if not html_images:
# # No image? Attach the HTML to the root
# msg_root.attach(msg_text)
#else:
if html_images:
# Image(s)? Attach the HTML and image(s) as children of a
# "related" block
msg_related = MIMEMultipart('related')
#msg_related.attach(msg_text)
for image_id, image_path in html_images.iteritems():
attach_embed_image(msg_related, image_id, image_path)
msg_root.attach(msg_related)
else:
msg_root = EmailMessage(subject=subject, body=content,
from_email=fromaddr, headers=headers, **kwargs)
if attachments:
from invenio.bibdocfile import _mimes, guess_format_from_url
#old_msg_root = msg_root
#msg_root = MIMEMultipart()
#msg_root.attach(old_msg_root)
for attachment in attachments:
try:
mime = None
if type(attachment) in (list, tuple):
attachment, mime = attachment
if mime is None:
## Automatic guessing of mimetype
mime = _mimes.guess_type(attachment)[0]
if mime is None:
ext = guess_format_from_url(attachment)
mime = _mimes.guess_type("foo" + ext)[0]
if not mime:
mime = 'application/octet-stream'
part = MIMEBase(*mime.split('/', 1))
part.set_payload(open(attachment, 'rb').read())
Encoders.encode_base64(part)
part.add_header('Content-Disposition', 'attachment; filename="%s"' % os.path.basename(attachment))
msg_root.attach(part)
except:
- from invenio.errorlib import register_exception
+ from invenio.ext.logging import register_exception
register_exception(alert_admin=True, prefix="Can't attach %s" % attachment)
return msg_root
RE_NEWLINES = re.compile(r'<br\s*/?>|</p>', re.I)
RE_SPACES = re.compile(r'\s+')
RE_HTML_TAGS = re.compile(r'<.+?>')
def email_strip_html(html_content):
"""Strip html tags from html_content, trying to respect formatting."""
html_content = RE_SPACES.sub(' ', html_content)
html_content = RE_NEWLINES.sub('\n', html_content)
html_content = RE_HTML_TAGS.sub('', html_content)
html_content = html_content.split('\n')
out = StringIO()
out_format = AbstractFormatter(DumbWriter(out))
for row in html_content:
out_format.add_flowing_data(row)
out_format.end_paragraph(1)
return out.getvalue()
def remove_temporary_emails(emails):
"""
Removes the temporary emails (which are constructed randomly when user logs in
with an external authentication provider which doesn't supply an email
address) from an email list.
@param emails: email list (if string, then receivers are separated by ',')
@type emails: str|[str]
@rtype: str
"""
from invenio.access_control_config import CFG_TEMP_EMAIL_ADDRESS
if not isinstance(emails, (str, unicode)):
emails = ','.join(emails)
# Remove all of the spaces
emails = emails.replace(' ', '')
# Remove all of the emails formatted like CFG_TEMP_EMAIL_ADDRESS
emails = re.sub((CFG_TEMP_EMAIL_ADDRESS % '\w+') + '(,|$)', '', emails,
re.IGNORECASE)
# Remove all consecutive commas
emails = re.sub(',+', ',', emails)
if emails[0] == ',':
# Remove the comma at the beginning of the string
emails = emails[1:]
if emails[-1] == ',':
# Remove the comma at the end of the string
emails = emails[:-1]
return emails
diff --git a/invenio/ext/legacy/__init__.py b/invenio/ext/legacy/__init__.py
index c423a9ef8..8f747cc7a 100644
--- a/invenio/ext/legacy/__init__.py
+++ b/invenio/ext/legacy/__init__.py
@@ -1,138 +1,138 @@
# -*- coding: utf-8 -*-
## This file is part of Invenio.
## Copyright (C) 2011, 2012, 2013 CERN.
##
## Invenio is free software; you can redistribute it and/or
## modify it under the terms of the GNU General Public License as
## published by the Free Software Foundation; either version 2 of the
## License, or (at your option) any later version.
##
## Invenio is distributed in the hope that it will be useful, but
## WITHOUT ANY WARRANTY; without even the implied warranty of
## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
## General Public License for more details.
##
## You should have received a copy of the GNU General Public License
## along with Invenio; if not, write to the Free Software Foundation, Inc.,
## 59 Temple Place, Suite 330, Boston, MA 02111-1307, USA.
import warnings
## Import the remote debugger as a first thing, if allowed
#FIXME enable remote_debugger when invenio.config is ready
#try:
# from invenio import remote_debugger
#except:
# remote_debugger = None
from werkzeug.exceptions import HTTPException, NotFound
from werkzeug.wrappers import BaseResponse
from flask import request, g, current_app, render_template, abort, \
safe_join, send_from_directory
from .request_class import LegacyRequest
def setup_app(app):
## Legacy config support
USE_X_SENDFILE = app.config.get('CFG_BIBDOCFILE_USE_XSENDFILE')
DEBUG = app.config.get('CFG_DEVEL_SITE', 0) > 0
app.config.setdefault('USE_X_SENDFILE', USE_X_SENDFILE)
app.config.setdefault('DEBUG', DEBUG)
app.debug = DEBUG
class LegacyAppMiddleware(object):
def __init__(self, app):
self.app = app
def __call__(self, environ, start_response):
#FIXME
#if remote_debugger:
# remote_debugger.start()
with self.app.request_context(environ):
g.start_response = start_response
try:
response = self.app.full_dispatch_request()
except Exception as e:
- from invenio.errorlib import register_exception
+ from invenio.ext.logging import register_exception
register_exception(req=request, alert_admin=True)
response = self.app.handle_exception(e)
return response(environ, start_response)
## Set custom request class.
app.request_class = LegacyRequest
app.wsgi_app = LegacyAppMiddleware(app)
@app.errorhandler(404)
def page_not_found(error):
try:
from invenio.webinterface_handler_wsgi import \
application as legacy_application
response = legacy_application(request.environ, g.start_response)
if not isinstance(response, BaseResponse):
response = current_app.make_response(str(response))
return response
except HTTPException:
return render_template("404.html"), 404
@app.endpoint('static')
@app.route(app.static_url_path + '/<path:filename>', methods=['POST', 'PUT'])
def static_handler_with_legacy_publisher(*args, **kwargs):
"""
Adds support for legacy publisher.
NOTE: It changes order of url page lookup. First, the invenio_handler
will be called and on 404 error the mp_legacy_publisher is called.
"""
from invenio.webinterface_handler_wsgi import \
is_mp_legacy_publisher_path, mp_legacy_publisher, \
application as legacy_application
possible_module, possible_handler = is_mp_legacy_publisher_path(
request.environ['PATH_INFO'])
if possible_module is not None:
legacy_publisher = lambda req: \
mp_legacy_publisher(req, possible_module, possible_handler)
return legacy_application(request.environ, g.start_response,
handler=legacy_publisher)
# Static file serving for devserver
# ---------------------------------
# Apache normally serve all static files, but if we are using the
# devserver we need to serve static files here. Werkzeugs default
# behaviour is to return a '405 Method not allowed' for POST requests
# to static files. However, if we abort all POST requests with 405, the
# legacy_application (see page_not_found()) will not be given a chance
# to serve static files as it only get's invokved when we abort with a
# 404. Hence, on POST requests, we first check if the static file exists,
# and if it does we return we abort the request with a 405.
if not app.config.get('CFG_FLASK_SERVE_STATIC_FILES'):
abort(404)
else:
try:
static_file_response = app.send_static_file(*args, **kwargs)
except NotFound:
static_file_response = send_from_directory(
safe_join(app.instance_path, 'static'), kwargs['filename'])
if request.method in ['POST', 'PUT']:
abort(405)
else:
return static_file_response
try:
# pylint: disable=E0611
from invenio.webinterface_handler_local import customize_app
# pylint: enable=E0611
warnings.warn("Do not use 'invenio.webinterface_handler_local:"
"customize_app' directly. Please, adapt your function "
"into package and use configuration option "
"EXTENSIONS = ['mypackage.customize_app'] instead.",
DeprecationWarning)
except ImportError:
## No customization needed.
pass
return app
diff --git a/invenio/ext/legacy/handler.py b/invenio/ext/legacy/handler.py
index 5cddaf9a5..5f2d19d1e 100644
--- a/invenio/ext/legacy/handler.py
+++ b/invenio/ext/legacy/handler.py
@@ -1,450 +1,450 @@
# -*- coding: utf-8 -*-
##
## This file is part of Invenio.
## Copyright (C) 2006, 2007, 2008, 2009, 2010, 2011, 2012, 2013 CERN.
##
## Invenio is free software; you can redistribute it and/or
## modify it under the terms of the GNU General Public License as
## published by the Free Software Foundation; either version 2 of the
## License, or (at your option) any later version.
##
## Invenio is distributed in the hope that it will be useful, but
## WITHOUT ANY WARRANTY; without even the implied warranty of
## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
## General Public License for more details.
##
## You should have received a copy of the GNU General Public License
## along with Invenio; if not, write to the Free Software Foundation, Inc.,
## 59 Temple Place, Suite 330, Boston, MA 02111-1307, USA.
"""
Apache request handler mechanism.
It gives the tools to map url to functions, handles the legacy url
scheme (/search.py queries), HTTP/HTTPS switching, language
specification,...
"""
__revision__ = "$Id$"
## Import the remote debugger as a first thing, if allowed
try:
import invenio.remote_debugger as remote_debugger
except:
remote_debugger = None
import urlparse
import cgi
import sys
import re
import os
import gc
from flask import session
from invenio.utils import apache
from invenio.config import CFG_SITE_URL, CFG_SITE_SECURE_URL, \
CFG_SITE_RECORD, CFG_ACCESS_CONTROL_LEVEL_SITE
from invenio.base.i18n import wash_language
from invenio.utils.url import redirect_to_url
-from invenio.errorlib import register_exception
+from invenio.ext.logging import register_exception
from invenio.webuser import get_preferred_user_language, isGuestUser, \
getUid, isUserSuperAdmin, collect_user_info, setUid
from invenio.webinterface_handler_wsgi_utils import StringField
from invenio.modules import apikeys as web_api_key
## The following variable is True if the installation make any difference
## between HTTP Vs. HTTPS connections.
CFG_HAS_HTTPS_SUPPORT = CFG_SITE_SECURE_URL.startswith("https://")
## The following variable is True if HTTPS is used for *any* URL.
CFG_FULL_HTTPS = CFG_SITE_URL.lower().startswith("https://")
## Set this to True in order to log some more information.
DEBUG = False
# List of URIs for which the 'ln' argument must not be added
# automatically
CFG_NO_LANG_RECOGNITION_URIS = ['/rss',
'/oai2d',
'/journal']
RE_SLASHES = re.compile('/+')
RE_SPECIAL_URI = re.compile('^/%s/\d+|^/collection/.+' % CFG_SITE_RECORD)
_RE_BAD_MSIE = re.compile("MSIE\s+(\d+\.\d+)")
def _debug(req, msg):
"""
Log the message.
@param req: the request.
@param msg: the message.
@type msg: string
"""
if DEBUG:
req.log_error(msg)
def _check_result(req, result):
"""
Check that a page handler actually wrote something, and
properly finish the apache request.
@param req: the request.
@param result: the produced output.
@type result: string
@return: an apache error code
@rtype: int
@raise apache.SERVER_RETURN: in case of a HEAD request.
@note: that this function actually takes care of writing the result
to the client.
"""
if result or req.bytes_sent > 0:
if result is None:
result = ""
else:
result = str(result)
# unless content_type was manually set, we will attempt
# to guess it
if not req.content_type_set_p:
# make an attempt to guess content-type
if result[:100].strip()[:6].lower() == '<html>' \
or result.find('</') > 0:
req.content_type = 'text/html'
else:
req.content_type = 'text/plain'
if req.method == 'HEAD':
if req.status in (apache.HTTP_NOT_FOUND, ):
raise apache.SERVER_RETURN, req.status
else:
req.write(result)
return apache.OK
else:
req.log_error("publisher: %s returned nothing." % `object`)
return apache.HTTP_INTERNAL_SERVER_ERROR
class TraversalError(Exception):
"""
Exception raised in case of an error in parsing the URL of the request.
"""
pass
class WebInterfaceDirectory(object):
"""
A directory groups web pages, and can delegate dispatching of
requests to the actual handler. This has been heavily borrowed
from Quixote's dispatching mechanism, with specific adaptations.
"""
# Lists the valid URLs contained in this directory.
_exports = []
# Set this to True in order to redirect queries over HTTPS
_force_https = False
def _translate(self, component):
"""(component : string) -> string | None
Translate a path component into a Python identifier. Returning
None signifies that the component does not exist.
"""
if component in self._exports:
if component == '':
return 'index' # implicit mapping
else:
return component
else:
# check for an explicit external to internal mapping
for value in self._exports:
if isinstance(value, tuple):
if value[0] == component:
return value[1]
else:
return None
def _lookup(self, component, path):
""" Override this method if you need to map dynamic URLs.
It can eat up as much of the remaining path as needed, and
return the remaining parts, so that the traversal can
continue.
"""
return None, path
def _traverse(self, req, path, do_head=False, guest_p=True):
""" Locate the handler of an URI by traversing the elements of
the path."""
_debug(req, 'traversing %r' % path)
component, path = path[0], path[1:]
name = self._translate(component)
if name is None:
obj, path = self._lookup(component, path)
else:
obj = getattr(self, name)
if obj is None:
_debug(req, 'could not resolve %s' % repr((component, path)))
raise TraversalError()
# We have found the next segment. If we know that from this
# point our subpages are over HTTPS, do the switch.
if (CFG_FULL_HTTPS or CFG_HAS_HTTPS_SUPPORT and (self._force_https or session.need_https())) and not req.is_https():
# We need to isolate the part of the URI that is after
# CFG_SITE_URL, and append that to our CFG_SITE_SECURE_URL.
original_parts = urlparse.urlparse(req.unparsed_uri)
plain_prefix_parts = urlparse.urlparse(CFG_SITE_URL)
secure_prefix_parts = urlparse.urlparse(CFG_SITE_SECURE_URL)
# Compute the new path
plain_path = original_parts[2]
plain_path = secure_prefix_parts[2] + \
plain_path[len(plain_prefix_parts[2]):]
# ...and recompose the complete URL
final_parts = list(secure_prefix_parts)
final_parts[2] = plain_path
final_parts[-3:] = original_parts[-3:]
target = urlparse.urlunparse(final_parts)
## The following condition used to allow certain URLs to
## by-pass the forced SSL redirect. Since SSL certificates
## are deployed on INSPIRE, this is no longer needed
## Will be left here for reference.
#from invenio.config import CFG_INSPIRE_SITE
#if not CFG_INSPIRE_SITE or plain_path.startswith('/youraccount/login'):
redirect_to_url(req, target)
# Continue the traversal. If there is a path, continue
# resolving, otherwise call the method as it is our final
# renderer. We even pass it the parsed form arguments.
if path:
if hasattr(obj, '_traverse'):
return obj._traverse(req, path, do_head, guest_p)
else:
raise apache.SERVER_RETURN, apache.HTTP_NOT_FOUND
if do_head:
req.content_type = "text/html; charset=UTF-8"
raise apache.SERVER_RETURN, apache.DONE
form = req.form
#if 'ln' not in form and \
# req.uri not in CFG_NO_LANG_RECOGNITION_URIS:
# ln = get_preferred_user_language(req)
# form.add_field('ln', ln)
result = _check_result(req, obj(req, form))
return result
def __call__(self, req, form):
""" Maybe resolve the final / of a directory """
# When this method is called, we either are a directory which
# has an 'index' method, and we redirect to it, or we don't
# have such a method, in which case it is a traversal error.
if "" in self._exports:
if not form:
# Fix missing trailing slash as a convenience, unless
# we are processing a form (in which case it is better
# to fix the form posting).
redirect_to_url(req, req.uri + "/", apache.HTTP_MOVED_PERMANENTLY)
_debug(req, 'directory %r is not callable' % self)
raise TraversalError()
def create_handler(root):
""" Return a handler function that will dispatch apache requests
through the URL layout passed in parameter."""
def _handler(req):
""" This handler is invoked by mod_python with the apache request."""
allowed_methods = ("GET", "POST", "HEAD", "OPTIONS", "PUT")
#req.allow_methods(allowed_methods, 1)
#if req.method not in allowed_methods:
# raise apache.SERVER_RETURN, apache.HTTP_METHOD_NOT_ALLOWED
if req.method == 'OPTIONS':
## OPTIONS is used to now which method are allowed
req.headers_out['Allow'] = ', '.join(allowed_methods)
raise apache.SERVER_RETURN, apache.OK
# Set user agent for fckeditor.py, which needs it here
os.environ["HTTP_USER_AGENT"] = req.headers_in.get('User-Agent', '')
# Check if REST authentication can be performed
if req.args:
args = cgi.parse_qs(req.args)
if 'apikey' in args and req.is_https():
uid = web_api_key.acc_get_uid_from_request(req.uri, req.args)
if uid < 0:
raise apache.SERVER_RETURN, apache.HTTP_UNAUTHORIZED
else:
setUid(req=req, uid=uid)
guest_p = isGuestUser(getUid(req), run_on_slave=False)
uri = req.uri
if uri == '/':
path = ['']
else:
## Let's collapse multiple slashes into a single /
uri = RE_SLASHES.sub('/', uri)
path = uri[1:].split('/')
if CFG_ACCESS_CONTROL_LEVEL_SITE > 1:
## If the site is under maintainance mode let's return
## 503 to casual crawler to avoid having the site being
## indexed
req.status = 503
g = _RE_BAD_MSIE.search(req.headers_in.get('User-Agent', "MSIE 6.0"))
bad_msie = g and float(g.group(1)) < 9.0
if uri.startswith('/yours') or not guest_p:
## Private/personalized request should not be cached
if bad_msie and req.is_https():
req.headers_out['Cache-Control'] = 'private, max-age=0, must-revalidate'
else:
req.headers_out['Cache-Control'] = 'private, no-cache, no-store, max-age=0, must-revalidate'
req.headers_out['Pragma'] = 'no-cache'
req.headers_out['Vary'] = '*'
elif not (bad_msie and req.is_https()):
req.headers_out['Cache-Control'] = 'public, max-age=3600'
req.headers_out['Vary'] = 'Cookie, ETag, Cache-Control'
try:
if req.header_only and not RE_SPECIAL_URI.match(req.uri):
return root._traverse(req, path, True, guest_p)
else:
## bibdocfile have a special treatment for HEAD
return root._traverse(req, path, False, guest_p)
except TraversalError:
raise apache.SERVER_RETURN, apache.HTTP_NOT_FOUND
except apache.SERVER_RETURN:
## This is one of mod_python way of communicating
raise
except IOError, exc:
if 'Write failed, client closed connection' not in "%s" % exc:
## Workaround for considering as false positive exceptions
## rised by mod_python when the user close the connection
## or in some other rare and not well identified cases.
register_exception(req=req, alert_admin=True)
raise
except Exception:
# send the error message, much more convenient than log hunting
if remote_debugger:
args = {}
if req.args:
args = cgi.parse_qs(req.args)
if 'debug' in args:
remote_debugger.error_msg(args['debug'])
register_exception(req=req, alert_admin=True)
raise
# Serve an error by default.
raise apache.SERVER_RETURN, apache.HTTP_NOT_FOUND
return _handler
def wash_urlargd(form, content):
"""
Wash the complete form based on the specification in
content. Content is a dictionary containing the field names as a
key, and a tuple (type, default) as value.
'type' can be list, str, invenio.webinterface_handler_wsgi_utils.StringField, int, tuple, or
invenio.webinterface_handler_wsgi_utils.Field (for
file uploads).
The specification automatically includes the 'ln' field, which is
common to all queries.
Arguments that are not defined in 'content' are discarded.
Note that in case {list,tuple} were asked for, we assume that
{list,tuple} of strings is to be returned. Therefore beware when
you want to use wash_urlargd() for multiple file upload forms.
@Return: argd dictionary that can be used for passing function
parameters by keywords.
"""
result = {}
content['ln'] = (str, '')
for k, (dst_type, default) in content.items():
try:
value = form[k]
except KeyError:
result[k] = default
continue
#FIXES problems with unicode arguments from Flask
if isinstance(value, unicode):
value = value.encode('utf-8')
src_type = type(value)
# First, handle the case where we want all the results. In
# this case, we need to ensure all the elements are strings,
# and not Field instances.
if src_type in (list, tuple):
if dst_type is list:
result[k] = [str(x) for x in value]
continue
if dst_type is tuple:
result[k] = tuple([str(x) for x in value])
continue
# in all the other cases, we are only interested in the
# first value.
value = value[0]
# Maybe we already have what is expected? Then don't change
# anything.
if isinstance(value, dst_type):
if isinstance(value, StringField):
result[k] = str(value)
else:
result[k] = value
continue
# Since we got here, 'value' is sure to be a single symbol,
# not a list kind of structure anymore.
if dst_type in (str, int):
try:
result[k] = dst_type(value)
except:
result[k] = default
elif dst_type is tuple:
result[k] = (str(value), )
elif dst_type is list:
result[k] = [str(value)]
else:
raise ValueError('cannot cast form value %s of type %r into type %r' % (value, src_type, dst_type))
result['ln'] = wash_language(result['ln'])
return result
diff --git a/invenio/ext/legacy/layout.py b/invenio/ext/legacy/layout.py
index 95f9500ec..917fdd146 100644
--- a/invenio/ext/legacy/layout.py
+++ b/invenio/ext/legacy/layout.py
@@ -1,369 +1,369 @@
# -*- coding: utf-8 -*-
## This file is part of Invenio.
## Copyright (C) 2006, 2007, 2008, 2009, 2010, 2011, 2012 CERN.
##
## Invenio is free software; you can redistribute it and/or
## modify it under the terms of the GNU General Public License as
## published by the Free Software Foundation; either version 2 of the
## License, or (at your option) any later version.
##
## Invenio is distributed in the hope that it will be useful, but
## WITHOUT ANY WARRANTY; without even the implied warranty of
## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
## General Public License for more details.
##
## You should have received a copy of the GNU General Public License
## along with Invenio; if not, write to the Free Software Foundation, Inc.,
## 59 Temple Place, Suite 330, Boston, MA 02111-1307, USA.
"""
Global organisation of the application's URLs.
This module binds together Invenio's modules and maps them to
their corresponding URLs (ie, /search to the websearch modules,...)
"""
from invenio.webinterface_handler import create_handler
-from invenio.errorlib import register_exception
+from invenio.ext.logging import register_exception
from invenio.webinterface_handler import WebInterfaceDirectory
from invenio.utils import apache
from invenio.config import CFG_DEVEL_SITE, CFG_OPENAIRE_SITE
class WebInterfaceDumbPages(WebInterfaceDirectory):
"""This class implements a dumb interface to use as a fallback in case of
errors importing particular module pages."""
_exports = ['']
def __call__(self, req, form):
try:
from invenio.webpage import page
except ImportError:
page = lambda * args: args[1]
req.status = apache.HTTP_INTERNAL_SERVER_ERROR
msg = "<p>This functionality is experiencing a temporary failure.</p>"
msg += "<p>The administrator has been informed about the problem.</p>"
try:
from invenio.config import CFG_SITE_ADMIN_EMAIL
msg += """<p>You can contact <code>%s</code>
in case of questions.</p>""" % \
CFG_SITE_ADMIN_EMAIL
except ImportError:
pass
msg += """<p>We hope to restore the service soon.</p>
<p>Sorry for the inconvenience.</p>"""
try:
return page('Service failure', msg)
except:
return msg
def _lookup(self, component, path):
return WebInterfaceDumbPages(), path
index = __call__
try:
from invenio.websearch_webinterface import WebInterfaceSearchInterfacePages
except:
register_exception(alert_admin=True, subject='EMERGENCY')
WebInterfaceSearchInterfacePages = WebInterfaceDumbPages
try:
from invenio.websearch_webinterface import WebInterfaceRSSFeedServicePages
except:
register_exception(alert_admin=True, subject='EMERGENCY')
WebInterfaceRSSFeedServicePages = WebInterfaceDumbPages
try:
from invenio.websearch_webinterface import WebInterfaceUnAPIPages
except:
register_exception(alert_admin=True, subject='EMERGENCY')
WebInterfaceUnAPIPages = WebInterfaceDumbPages
try:
from invenio.bibdocfile_webinterface import bibdocfile_legacy_getfile
except:
register_exception(alert_admin=True, subject='EMERGENCY')
bibdocfile_legacy_getfile = WebInterfaceDumbPages
try:
from invenio.websubmit_webinterface import WebInterfaceSubmitPages
except:
register_exception(alert_admin=True, subject='EMERGENCY')
WebInterfaceSubmitPages = WebInterfaceDumbPages
try:
from invenio.websession_webinterface import WebInterfaceYourAccountPages
except:
register_exception(alert_admin=True, subject='EMERGENCY')
WebInterfaceYourAccountPages = WebInterfaceDumbPages
try:
from invenio.websession_webinterface import WebInterfaceYourTicketsPages
except:
register_exception(alert_admin=True, subject='EMERGENCY')
WebInterfaceYourTicketsPages = WebInterfaceDumbPages
try:
from invenio.websession_webinterface import WebInterfaceYourGroupsPages
except:
register_exception(alert_admin=True, subject='EMERGENCY')
WebInterfaceYourGroupsPages = WebInterfaceDumbPages
try:
from invenio.webalert_webinterface import WebInterfaceYourAlertsPages
except:
register_exception(alert_admin=True, subject='EMERGENCY')
WebInterfaceYourAlertsPages = WebInterfaceDumbPages
try:
from invenio.webbasket_webinterface import WebInterfaceYourBasketsPages
except:
register_exception(alert_admin=True, subject='EMERGENCY')
WebInterfaceYourBasketsPages = WebInterfaceDumbPages
try:
from invenio.webcomment_webinterface import WebInterfaceCommentsPages
except:
register_exception(alert_admin=True, subject='EMERGENCY')
WebInterfaceCommentsPages = WebInterfaceDumbPages
try:
from invenio.weblinkback_webinterface import WebInterfaceRecentLinkbacksPages
except:
register_exception(alert_admin=True, subject='EMERGENCY')
WebInterfaceRecentLinkbacksPages = WebInterfaceDumbPages
try:
from invenio.webmessage_webinterface import WebInterfaceYourMessagesPages
except:
register_exception(alert_admin=True, subject='EMERGENCY')
WebInterfaceYourMessagesPages = WebInterfaceDumbPages
try:
from invenio.errorlib_webinterface import WebInterfaceErrorPages
except:
register_exception(alert_admin=True, subject='EMERGENCY')
WebInterfaceErrorPages = WebInterfaceDumbPages
try:
from invenio.oai_repository_webinterface import WebInterfaceOAIProviderPages
except:
register_exception(alert_admin=True, subject='EMERGENCY')
WebInterfaceOAIProviderPages = WebInterfaceDumbPages
try:
from invenio.webstat_webinterface import WebInterfaceStatsPages
except:
register_exception(alert_admin=True, subject='EMERGENCY')
WebInterfaceStatsPages = WebInterfaceDumbPages
try:
from invenio.bibcirculation_webinterface import WebInterfaceYourLoansPages
except:
register_exception(alert_admin=True, subject='EMERGENCY')
WebInterfaceYourLoansPages = WebInterfaceDumbPages
try:
from invenio.bibcirculation_webinterface import WebInterfaceILLPages
except:
register_exception(alert_admin=True, subject='EMERGENCY')
WebInterfaceILLPages = WebInterfaceDumbPages
try:
from invenio.webjournal_webinterface import WebInterfaceJournalPages
except:
register_exception(alert_admin=True, subject='EMERGENCY')
WebInterfaceJournalPages = WebInterfaceDumbPages
try:
from invenio.webdoc_webinterface import WebInterfaceDocumentationPages
except:
register_exception(alert_admin=True, subject='EMERGENCY')
WebInterfaceDocumentationPages = WebInterfaceDumbPages
try:
from invenio.bibexport_method_fieldexporter_webinterface import \
WebInterfaceFieldExporterPages
except:
register_exception(alert_admin=True, subject='EMERGENCY')
WebInterfaceFieldExporterPages = WebInterfaceDumbPages
try:
from invenio.bibknowledge_webinterface import WebInterfaceBibKnowledgePages
except:
register_exception(alert_admin=True, subject='EMERGENCY')
WebInterfaceBibKnowledgePages = WebInterfaceDumbPages
try:
from invenio.batchuploader_webinterface import \
WebInterfaceBatchUploaderPages
except:
register_exception(alert_admin=True, subject='EMERGENCY')
WebInterfaceBatchUploaderPages = WebInterfaceDumbPages
try:
from invenio.bibsword_webinterface import \
WebInterfaceSword
except:
register_exception(alert_admin=True, subject='EMERGENCY')
WebInterfaceSword = WebInterfaceDumbPages
try:
from invenio.ping_webinterface import \
WebInterfacePingPages
except:
register_exception(alert_admin=True, subject='EMERGENCE')
WebInterfacePingPages = WebInterfaceDumbPages
try:
from invenio.bibauthorid_webinterface import WebInterfaceBibAuthorIDPages
except:
register_exception(alert_admin=True, subject='EMERGENCY')
WebInterfaceBibAuthorIDPages = WebInterfaceDumbPages
try:
from invenio.bibcirculationadmin_webinterface import \
WebInterfaceBibCirculationAdminPages
except:
register_exception(alert_admin=True, subject='EMERGENCY')
WebInterfaceBibCirculationAdminPages = WebInterfaceDumbPages
try:
from invenio.bibsched_webinterface import \
WebInterfaceBibSchedPages
except:
register_exception(alert_admin=True, subject='EMERGENCY')
WebInterfaceBibSchedPages = WebInterfaceDumbPages
try:
from invenio.webauthorprofile_webinterface import WebAuthorPages
WebInterfaceWebAuthorPages = WebAuthorPages
except:
register_exception(alert_admin=True, subject='EMERGENCY')
WebInterfaceWebAuthorPages = WebInterfaceDumbPages
try:
from invenio.docextract_webinterface import WebInterfaceDocExtract
except:
register_exception(alert_admin=True, subject='EMERGENCY')
WebInterfaceDocExtract = WebInterfaceDumbPages
try:
from invenio.webcomment_webinterface import WebInterfaceYourCommentsPages
except:
register_exception(alert_admin=True, subject='EMERGENCY')
WebInterfaceYourAlertsPages = WebInterfaceDumbPages
try:
from invenio.goto_webinterface import WebInterfaceGotoPages
except:
register_exception(alert_admin=True, subject='EMERGENCY')
WebInterfaceGotoPages = WebInterfaceDumbPages
if CFG_OPENAIRE_SITE:
try:
from invenio.openaire_deposit_webinterface import \
WebInterfaceOpenAIREDepositPages
except:
register_exception(alert_admin=True, subject='EMERGENCY')
WebInterfaceOpenAIREDepositPages = WebInterfaceDumbPages
openaire_exports = ['deposit']
else:
openaire_exports = []
if CFG_DEVEL_SITE:
try:
from invenio.httptest_webinterface import WebInterfaceHTTPTestPages
except:
register_exception(alert_admin=True, subject='EMERGENCY')
WebInterfaceHTTPTestPages = WebInterfaceDumbPages
test_exports = ['httptest']
else:
test_exports = []
class WebInterfaceAdminPages(WebInterfaceDirectory):
"""This class implements /admin2 admin pages."""
_exports = ['index', 'bibcirculation', 'bibsched']
def index(self, req, form):
return "FIXME: return /help/admin content"
bibcirculation = WebInterfaceBibCirculationAdminPages()
bibsched = WebInterfaceBibSchedPages()
class WebInterfaceInvenio(WebInterfaceSearchInterfacePages):
""" The global URL layout is composed of the search API plus all
the other modules."""
_exports = WebInterfaceSearchInterfacePages._exports + \
[
'youraccount',
'youralerts',
'yourbaskets',
'yourmessages',
'yourloans',
'yourcomments',
'ill',
'yourgroups',
'yourtickets',
'comments',
'error',
'oai2d', ('oai2d.py', 'oai2d'),
('getfile.py', 'getfile'),
'submit',
'rss',
'stats',
'journal',
'help',
'unapi',
'exporter',
'kb',
'batchuploader',
'bibsword',
'ping',
'person',
'admin2',
'linkbacks',
'author',
'textmining',
'goto',
] + test_exports + openaire_exports
def __init__(self):
self.getfile = bibdocfile_legacy_getfile
if CFG_DEVEL_SITE:
self.httptest = WebInterfaceHTTPTestPages()
if CFG_OPENAIRE_SITE:
self.deposit = WebInterfaceOpenAIREDepositPages()
submit = WebInterfaceSubmitPages()
youraccount = WebInterfaceYourAccountPages()
youralerts = WebInterfaceYourAlertsPages()
yourbaskets = WebInterfaceYourBasketsPages()
yourmessages = WebInterfaceYourMessagesPages()
yourloans = WebInterfaceYourLoansPages()
ill = WebInterfaceILLPages()
yourgroups = WebInterfaceYourGroupsPages()
yourtickets = WebInterfaceYourTicketsPages()
comments = WebInterfaceCommentsPages()
error = WebInterfaceErrorPages()
oai2d = WebInterfaceOAIProviderPages()
rss = WebInterfaceRSSFeedServicePages()
stats = WebInterfaceStatsPages()
journal = WebInterfaceJournalPages()
help = WebInterfaceDocumentationPages()
unapi = WebInterfaceUnAPIPages()
exporter = WebInterfaceFieldExporterPages()
kb = WebInterfaceBibKnowledgePages()
admin2 = WebInterfaceAdminPages()
batchuploader = WebInterfaceBatchUploaderPages()
bibsword = WebInterfaceSword()
ping = WebInterfacePingPages()
person = WebInterfaceBibAuthorIDPages()
linkbacks = WebInterfaceRecentLinkbacksPages()
#redirects author to the new webauthor
author = WebInterfaceWebAuthorPages()
#author = WebInterfaceAuthorPages()
textmining = WebInterfaceDocExtract()
yourcomments = WebInterfaceYourCommentsPages()
goto = WebInterfaceGotoPages()
# This creates the 'handler' function, which will be invoked directly
# by mod_python.
invenio_handler = create_handler(WebInterfaceInvenio())
diff --git a/invenio/ext/logging/__init__.py b/invenio/ext/logging/__init__.py
index c10cd5cbf..54425a9ed 100644
--- a/invenio/ext/logging/__init__.py
+++ b/invenio/ext/logging/__init__.py
@@ -1,47 +1,49 @@
# -*- coding: utf-8 -*-
## This file is part of Invenio.
## Copyright (C) 2012, 2013 CERN.
##
## Invenio is free software; you can redistribute it and/or
## modify it under the terms of the GNU General Public License as
## published by the Free Software Foundation; either version 2 of the
## License, or (at your option) any later version.
##
## Invenio is distributed in the hope that it will be useful, but
## WITHOUT ANY WARRANTY; without even the implied warranty of
## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
## General Public License for more details.
##
## You should have received a copy of the GNU General Public License
## along with Invenio; if not, write to the Free Software Foundation, Inc.,
## 59 Temple Place, Suite 330, Boston, MA 02111-1307, USA.
"""
- invenio.ext.logger
- --------------------------------------
+ invenio.ext.logging
+ -------------------
This module adds and configures custom logger.
"""
import os
from logging.handlers import RotatingFileHandler
from logging import Formatter
+from .wrappers import *
+
def setup_app(app):
"""Adds new Rotating File Handler to application."""
try:
os.makedirs(os.path.join(app.instance_path,
app.config.get('CFG_LOGDIR', '')))
- except:
+ except Exception:
pass
file_log_name = os.path.join(app.instance_path,
app.config.get('CFG_LOGDIR', ''),
- app.import_name + '.log')
+ app.logger_name + '.log')
handler = RotatingFileHandler(file_log_name)
handler.setFormatter(Formatter(
'%(asctime)s %(levelname)s: %(message)s '
'[in %(pathname)s:%(lineno)d]'
))
app.logger.addHandler(handler)
diff --git a/invenio/ext/logging/models.py b/invenio/ext/logging/models.py
new file mode 100644
index 000000000..3223328fa
--- /dev/null
+++ b/invenio/ext/logging/models.py
@@ -0,0 +1,115 @@
+# -*- coding: utf-8 -*-
+#
+## This file is part of Invenio.
+## Copyright (C) 2011, 2012 CERN.
+##
+## Invenio is free software; you can redistribute it and/or
+## modify it under the terms of the GNU General Public License as
+## published by the Free Software Foundation; either version 2 of the
+## License, or (at your option) any later version.
+##
+## Invenio is distributed in the hope that it will be useful, but
+## WITHOUT ANY WARRANTY; without even the implied warranty of
+## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+## General Public License for more details.
+##
+## You should have received a copy of the GNU General Public License
+## along with Invenio; if not, write to the Free Software Foundation, Inc.,
+## 59 Temple Place, Suite 330, Boston, MA 02D111-1307, USA.
+
+"""
+errorlib database models.
+"""
+
+# General imports.
+from datetime import datetime
+from invenio.base.globals import cfg
+from invenio.ext.sqlalchemy import db
+
+# Create your models here.
+
+
+def _is_pow_of_2(n):
+ """
+ Return True if n is a power of 2
+ """
+ while n > 1:
+ if n % 2:
+ return False
+ n = n / 2
+ return True
+
+
+class HstEXCEPTION(db.Model):
+ """Represents a HstEXCEPTION record."""
+ __tablename__ = 'hstEXCEPTION'
+ id = db.Column(db.Integer(15, unsigned=True), nullable=False,
+ primary_key=True, autoincrement=True)
+ name = db.Column(db.String(50), nullable=False)
+ filename = db.Column(db.String(255), nullable=True)
+ line = db.Column(db.Integer(9), nullable=True)
+ last_seen = db.Column(db.DateTime, nullable=False,
+ server_default='1900-01-01 00:00:00', index=True)
+ last_notified = db.Column(db.DateTime, nullable=False,
+ server_default='1900-01-01 00:00:00', index=True)
+ counter = db.Column(db.Integer(15), nullable=False,
+ server_default='0')
+ total = db.Column(db.Integer(15), nullable=False,
+ server_default='0', index=True)
+
+ __table_args__ = (db.Index('name', name, filename, line, unique=True),
+ db.Model.__table_args__)
+
+ @classmethod
+ def get_or_create(cls, name, filename, line):
+ """Finds or create exception log."""
+ try:
+ log = cls.query.filter_by(name=name, filename=filename,
+ line=line).one()
+ delta = datetime.datetime.now() - log.last_notified
+ reset_counter = (delta.seconds + delta.days * 86400) >= \
+ cfg['CFG_ERRORLIB_RESET_EXCEPTION_NOTIFICATION_COUNTER_AFTER']
+ counter = 1 if reset_counter else log.counter + 1
+ log.update({'last_notified': db.func.now(),
+ 'counter': counter,
+ 'total': log.total + 1}, synchronize_settion=False)
+ db.session.merge(log)
+ except:
+ log = HstEXCEPTION(name=name,
+ filename=filename,
+ line=line,
+ last_seen=db.func.now(),
+ last_notified=db.func.now(),
+ counter=1,
+ total=1)
+ db.session.add(log)
+ db.session.commit()
+ return log
+
+ @property
+ def exception_should_be_notified(self):
+ return _is_pow_of_2(self.counter)
+
+ @property
+ def pretty_notification_info(self):
+ return ("This exception has already been seen %s times\n "
+ "last time it was seen: %s\n "
+ "last time it was notified: %s\n" % (
+ self.total,
+ self.last_seen.strftime("%Y-%m-%d %H:%M:%S"),
+ self.last_notified.strftime("%Y-%m-%d %H:%M:%S")))
+
+ @classmethod
+ def get_pretty_notification_info(cls, name, filename, line):
+ """
+ Return a sentence describing when this exception was already seen.
+ """
+ try:
+ return cls.query.filter_by(name=name, filename=filename, line=line).\
+ one().pretty_notification_info
+ except:
+ return "It is the first time this exception has been seen.\n"
+
+
+
+__all__ = ['HstEXCEPTION']
diff --git a/invenio/ext/logging/wrappers.py b/invenio/ext/logging/wrappers.py
index 257d09bba..a6c5dd7cd 100644
--- a/invenio/ext/logging/wrappers.py
+++ b/invenio/ext/logging/wrappers.py
@@ -1,577 +1,518 @@
# -*- coding: utf-8 -*-
##
## This file is part of Invenio.
## Copyright (C) 2005, 2006, 2007, 2008, 2009, 2010, 2011, 2013 CERN.
##
## Invenio is free software; you can redistribute it and/or
## modify it under the terms of the GNU General Public License as
## published by the Free Software Foundation; either version 2 of the
## License, or (at your option) any later version.
##
## Invenio is distributed in the hope that it will be useful, but
## WITHOUT ANY WARRANTY; without even the implied warranty of
## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
## General Public License for more details.
##
## You should have received a copy of the GNU General Public License
## along with Invenio; if not, write to the Free Software Foundation, Inc.,
## 59 Temple Place, Suite 330, Boston, MA 02111-1307, USA.
""" Error handling library """
__revision__ = "$Id$"
import traceback
import os
import sys
import time
import datetime
import re
import inspect
from cStringIO import StringIO
+from flask import current_app
-from invenio.utils.url import wash_url_argument
-from invenio.base.i18n import wash_language, gettext_set_language
from invenio.base.globals import cfg
-from invenio.utils.date import convert_datestruct_to_datetext
-from invenio.dbquery import run_sql
+from .models import HstEXCEPTION
## Regular expression to match possible password related variable that should
## be disclosed in frame analysis.
RE_PWD = re.compile(r"pwd|pass|p_pw", re.I)
-def get_client_info(req):
- """
- Returns a dictionary with client information
- @param req: mod_python request
- """
- try:
- return {
- 'host': req.hostname,
- 'url': req.unparsed_uri,
- 'time': convert_datestruct_to_datetext(time.localtime()),
- 'browser': 'User-Agent' in req.headers_in and \
- req.headers_in['User-Agent'] or "N/A",
- 'client_ip': req.remote_ip}
- except:
- return {}
-
-
def get_pretty_wide_client_info(req):
"""Return in a pretty way all the avilable information about the current
user/client"""
if req:
from invenio.webuser import collect_user_info
user_info = collect_user_info(req)
keys = user_info.keys()
keys.sort()
max_key = max([len(key) for key in keys])
ret = ""
fmt = "%% %is: %%s\n" % max_key
for key in keys:
if RE_PWD.search(key):
continue
if key in ('uri', 'referer'):
ret += fmt % (key, "<%s>" % user_info[key])
else:
ret += fmt % (key, user_info[key])
if ret.endswith('\n'):
return ret[:-1]
else:
return ret
else:
return "No client information available"
def get_tracestack():
"""
If an exception has been caught, return the system tracestack or else
return tracestack of what is currently in the stack
"""
if traceback.format_tb(sys.exc_info()[2]):
delimiter = "\n"
tracestack_pretty = "Traceback: \n%s" % \
delimiter.join(traceback.format_tb(sys.exc_info()[2]))
else:
## force traceback except for this call
tracestack = traceback.extract_stack()[:-1]
tracestack_pretty = "%sForced traceback (most recent call last)" % \
(' '*4, )
for trace_tuple in tracestack:
tracestack_pretty += """
File "%(file)s", line %(line)s, in %(function)s
%(text)s""" % {
'file': trace_tuple[0],
'line': trace_tuple[1],
'function': trace_tuple[2],
'text': trace_tuple[3] is not None and \
str(trace_tuple[3]) or ""}
return tracestack_pretty
+
def register_emergency(msg, recipients=None):
"""Launch an emergency. This means to send email messages to each
address in 'recipients'. By default recipients will be obtained via
get_emergency_recipients() which loads settings from
CFG_SITE_EMERGENCY_EMAIL_ADDRESSES
"""
from invenio.ext.email import send_email
if not recipients:
recipients = get_emergency_recipients()
recipients = set(recipients)
recipients.add(cfg['CFG_SITE_ADMIN_EMAIL'])
for address_str in recipients:
send_email(cfg['CFG_SITE_SUPPORT_EMAIL'], address_str, "Emergency notification", msg)
+
def get_emergency_recipients(recipient_cfg=None):
"""Parse a list of appropriate emergency email recipients from
CFG_SITE_EMERGENCY_EMAIL_ADDRESSES, or from a provided dictionary
comprised of 'time constraint' => 'comma separated list of addresses'
CFG_SITE_EMERGENCY_EMAIL_ADDRESSES format example:
CFG_SITE_EMERGENCY_EMAIL_ADDRESSES = {
'Sunday 22:00-06:00': '0041761111111@email2sms.foo.com',
'06:00-18:00': 'team-in-europe@foo.com,0041762222222@email2sms.foo.com',
'18:00-06:00': 'team-in-usa@foo.com',
'*': 'john.doe.phone@foo.com'}
"""
from invenio.utils.date import parse_runtime_limit
if recipient_cfg is None:
- recifient_cfg = cfg['CFG_SITE_EMERGENCY_EMAIL_ADDRESSES']
+ recipient_cfg = cfg['CFG_SITE_EMERGENCY_EMAIL_ADDRESSES']
recipients = set()
for time_condition, address_str in recipient_cfg.items():
if time_condition and time_condition is not '*':
(current_range, future_range) = parse_runtime_limit(time_condition)
if not current_range[0] <= datetime.datetime.now() <= current_range[1]:
continue
recipients.update([address_str])
return list(recipients)
+
def find_all_values_to_hide(local_variables, analyzed_stack=None):
"""Return all the potential password to hyde."""
## Let's add at least the DB password.
if analyzed_stack is None:
ret = set([cfg['CFG_DATABASE_PASS']])
analyzed_stack = set()
else:
ret = set()
for key, value in local_variables.iteritems():
if id(value) in analyzed_stack:
## Let's avoid loops
continue
analyzed_stack.add(id(value))
if RE_PWD.search(key):
ret.add(str(value))
if isinstance(value, dict):
ret |= find_all_values_to_hide(value, analyzed_stack)
if '' in ret:
## Let's discard the empty string in case there is an empty password,
## or otherwise anything will be separated by '<*****>' in the output
## :-)
ret.remove('')
return ret
+
def get_pretty_traceback(req=None, exc_info=None, skip_frames=0):
"""
Given an optional request object and an optional exc_info,
returns a text string representing many details about an exception.
"""
if exc_info is None:
exc_info = sys.exc_info()
if exc_info[0]:
## We found an exception.
## We want to extract the name of the Exception
exc_name = exc_info[0].__name__
exc_value = str(exc_info[1])
filename, line_no, function_name = _get_filename_and_line(exc_info)
## Let's record when and where and what
www_data = "%(time)s -> %(name)s: %(value)s (%(file)s:%(line)s:%(function)s)" % {
'time': time.strftime("%Y-%m-%d %H:%M:%S"),
'name': exc_name,
'value': exc_value,
'file': filename,
'line': line_no,
'function': function_name }
## Let's retrieve contextual user related info, if any
try:
client_data = get_pretty_wide_client_info(req)
except Exception, err:
client_data = "Error in retrieving " \
"contextual information: %s" % err
## Let's extract the traceback:
tracestack_data_stream = StringIO()
print >> tracestack_data_stream, \
"\n** Traceback details \n"
traceback.print_exc(file=tracestack_data_stream)
stack = [frame[0] for frame in inspect.trace()]
#stack = [frame[0] for frame in inspect.getouterframes(exc_info[2])][skip_frames:]
try:
stack.reverse()
print >> tracestack_data_stream, \
"\n** Stack frame details"
values_to_hide = set()
for frame in stack:
try:
print >> tracestack_data_stream
print >> tracestack_data_stream, \
"Frame %s in %s at line %s" % (
frame.f_code.co_name,
frame.f_code.co_filename,
frame.f_lineno)
## Dereferencing f_locals
## See: http://utcc.utoronto.ca/~cks/space/blog/python/FLocalsAndTraceFunctions
local_values = frame.f_locals
try:
values_to_hide |= find_all_values_to_hide(local_values)
code = open(frame.f_code.co_filename).readlines()
first_line = max(1, frame.f_lineno-3)
last_line = min(len(code), frame.f_lineno+3)
print >> tracestack_data_stream, "-" * 79
for line in xrange(first_line, last_line+1):
code_line = code[line-1].rstrip()
if line == frame.f_lineno:
print >> tracestack_data_stream, \
"----> %4i %s" % (line, code_line)
else:
print >> tracestack_data_stream, \
" %4i %s" % (line, code_line)
print >> tracestack_data_stream, "-" * 79
except:
pass
for key, value in local_values.items():
print >> tracestack_data_stream, "\t%20s = " % key,
try:
value = repr(value)
except Exception, err:
## We shall gracefully accept errors when repr() of
## a value fails (e.g. when we are trying to repr() a
## variable that was not fully initialized as the
## exception was raised during its __init__ call).
value = "ERROR: when representing the value: %s" % (err)
try:
print >> tracestack_data_stream, \
_truncate_dynamic_string(value)
except:
print >> tracestack_data_stream, \
"<ERROR WHILE PRINTING VALUE>"
finally:
del frame
finally:
del stack
tracestack_data = tracestack_data_stream.getvalue()
for to_hide in values_to_hide:
## Let's hide passwords
tracestack_data = tracestack_data.replace(to_hide, '<*****>')
## Okay, start printing:
output = StringIO()
print >> output, "* %s" % www_data
print >> output, "\n** User details"
print >> output, client_data
if tracestack_data:
print >> output, tracestack_data
return output.getvalue()
else:
return ""
-def _is_pow_of_2(n):
- """
- Return True if n is a power of 2
- """
- while n > 1:
- if n % 2:
- return False
- n = n / 2
- return True
-
-def exception_should_be_notified(name, filename, line):
- """
- Return True if the exception should be notified to the admin.
- This actually depends on several considerations, e.g. wethever
- it has passed some since the last time this exception has been notified.
- """
- try:
- exc_log = run_sql("SELECT id,last_notified,counter,total FROM hstEXCEPTION WHERE name=%s AND filename=%s AND line=%s", (name, filename, line))
- if exc_log:
- exc_id, last_notified, counter, total = exc_log[0]
- delta = datetime.datetime.now() - last_notified
- counter += 1
- total += 1
- if (delta.seconds + delta.days * 86400) >= cfg['CFG_ERRORLIB_RESET_EXCEPTION_NOTIFICATION_COUNTER_AFTER']:
- run_sql("UPDATE hstEXCEPTION SET last_seen=NOW(), last_notified=NOW(), counter=1, total=%s WHERE id=%s", (total, exc_id))
- return True
- else:
- run_sql("UPDATE hstEXCEPTION SET last_seen=NOW(), counter=%s, total=%s WHERE id=%s", (counter, total, exc_id))
- return _is_pow_of_2(counter)
- else:
- run_sql("INSERT INTO hstEXCEPTION(name, filename, line, last_seen, last_notified, counter, total) VALUES(%s, %s, %s, NOW(), NOW(), 1, 1)", (name, filename, line))
- return True
- except:
- raise
- return True
-
-def get_pretty_notification_info(name, filename, line):
- """
- Return a sentence describing when this exception was already seen.
- """
- exc_log = run_sql("SELECT last_notified,last_seen,total FROM hstEXCEPTION WHERE name=%s AND filename=%s AND line=%s", (name, filename, line))
- if exc_log:
- last_notified, last_seen, total = exc_log[0]
- return "This exception has already been seen %s times\n last time it was seen: %s\n last time it was notified: %s\n" % (total, last_seen.strftime("%Y-%m-%d %H:%M:%S"), last_notified.strftime("%Y-%m-%d %H:%M:%S"))
- else:
- return "It is the first time this exception has been seen.\n"
def register_exception(stream='error',
req=None,
prefix='',
suffix='',
alert_admin=False,
subject=''):
"""
Log error exception to invenio.err and warning exception to invenio.log.
Errors will be logged together with client information (if req is
given).
Note: For sanity reasons, dynamic params such as PREFIX, SUFFIX and
local stack variables are checked for length, and only first 500
chars of their values are printed.
@param stream: 'error' or 'warning'
@param req: mod_python request
@param prefix: a message to be printed before the exception in
the log
@param suffix: a message to be printed before the exception in
the log
@param alert_admin: wethever to send the exception to the administrator via
email. Note this parameter is bypassed when
CFG_SITE_ADMIN_EMAIL_EXCEPTIONS is set to a value different than 1
@param subject: overrides the email subject
@return: 1 if successfully wrote to stream, 0 if not
"""
try:
## Let's extract exception information
exc_info = sys.exc_info()
exc_name = exc_info[0].__name__
output = get_pretty_traceback(
req=req, exc_info=exc_info, skip_frames=2)
if output:
## Okay, start printing:
log_stream = StringIO()
email_stream = StringIO()
print >> email_stream, '\n',
## If a prefix was requested let's print it
if prefix:
#prefix = _truncate_dynamic_string(prefix)
print >> log_stream, prefix + '\n'
print >> email_stream, prefix + '\n'
print >> log_stream, output
print >> email_stream, output
## If a suffix was requested let's print it
if suffix:
#suffix = _truncate_dynamic_string(suffix)
print >> log_stream, suffix
print >> email_stream, suffix
log_text = log_stream.getvalue()
email_text = email_stream.getvalue()
if email_text.endswith('\n'):
email_text = email_text[:-1]
## Preparing the exception dump
- stream = stream=='error' and 'err' or 'log'
+ if stream=='error':
+ logger_method = current_app.logger.error
+ else:
+ logger_method = current_app.logger.info
## We now have the whole trace
written_to_log = False
try:
## Let's try to write into the log.
- open(os.path.join(cfg['CFG_LOGDIR'], 'invenio.' + stream), 'a').write(
- log_text)
+ logger_method(log_text)
written_to_log = True
except:
written_to_log = False
filename, line_no, function_name = _get_filename_and_line(exc_info)
## let's log the exception and see whether we should report it.
- pretty_notification_info = get_pretty_notification_info(exc_name, filename, line_no)
- if exception_should_be_notified(exc_name, filename, line_no) and (cfg['CFG_SITE_ADMIN_EMAIL_EXCEPTIONS'] > 1 or
- (alert_admin and cfg['CFG_SITE_ADMIN_EMAIL_EXCEPTIONS'] > 0) or
- not written_to_log):
+ log = HstEXCEPTION.get_or_create(exc_name, filename, line_no)
+ if log.exception_should_be_notified and (
+ cfg['CFG_SITE_ADMIN_EMAIL_EXCEPTIONS'] > 1 or
+ (alert_admin and
+ cfg['CFG_SITE_ADMIN_EMAIL_EXCEPTIONS'] > 0) or
+ not written_to_log):
## If requested or if it's impossible to write in the log
from invenio.ext.email import send_email
if not subject:
- subject = 'Exception (%s:%s:%s)' % (filename, line_no, function_name)
+ subject = 'Exception (%s:%s:%s)' % (
+ filename, line_no, function_name)
subject = '%s at %s' % (subject, cfg['CFG_SITE_URL'])
- email_text = "\n%s\n%s" % (pretty_notification_info, email_text)
+ email_text = "\n%s\n%s" % (log.pretty_notification_info,
+ email_text)
if not written_to_log:
email_text += """\
Note that this email was sent to you because it has been impossible to log
this exception into %s""" % os.path.join(cfg['CFG_LOGDIR'], 'invenio.' + stream)
send_email(
cfg['CFG_SITE_ADMIN_EMAIL'],
cfg['CFG_SITE_ADMIN_EMAIL'],
subject=subject,
content=email_text)
return 1
else:
return 0
except Exception, err:
print >> sys.stderr, "Error in registering exception to '%s': '%s'" % (
cfg['CFG_LOGDIR'] + '/invenio.' + stream, err)
return 0
-def raise_exception(exception_type = Exception,
- msg = '',
- stream='error',
- req=None,
- prefix='',
- suffix='',
- alert_admin=False,
- subject=''):
+def raise_exception(exception_type=Exception, msg='', stream='error',
+ req=None, prefix='', suffix='', alert_admin=False,
+ subject=''):
"""
Log error exception to invenio.err and warning exception to invenio.log.
Errors will be logged together with client information (if req is
given).
It does not require a previously risen exception.
Note: For sanity reasons, dynamic params such as PREFIX, SUFFIX and
local stack variables are checked for length, and only first 500
chars of their values are printed.
@param exception_type: exception type to be used internally
@param msg: error message
@param stream: 'error' or 'warning'
@param req: mod_python request
@param prefix: a message to be printed before the exception in
the log
@param suffix: a message to be printed before the exception in
the log
@param alert_admin: wethever to send the exception to the administrator via
email. Note this parameter is bypassed when
CFG_SITE_ADMIN_EMAIL_EXCEPTIONS is set to a value different than 1
@param subject: overrides the email subject
@return: 1 if successfully wrote to stream, 0 if not
"""
try:
raise exception_type(msg)
except:
return register_exception(stream=stream,
req=req,
prefix=prefix,
suffix=suffix,
alert_admin=alert_admin,
subject=subject)
def send_error_report_to_admin(header, url, time_msg,
browser, client, error,
sys_error, traceback_msg):
"""
Sends an email to the admin with client info and tracestack
"""
from_addr = '%s Alert Engine <%s>' % (
cfg['CFG_SITE_NAME'], cfg['CFG_WEBALERT_ALERT_ENGINE_EMAIL'])
to_addr = cfg['CFG_SITE_ADMIN_EMAIL']
body = """
The following error was seen by a user and sent to you.
%(contact)s
%(header)s
%(url)s
%(time)s
%(browser)s
%(client)s
%(error)s
%(sys_error)s
%(traceback)s
Please see the %(logdir)s/invenio.err for traceback details.""" % {
'header': header,
'url': url,
'time': time_msg,
'browser': browser,
'client': client,
'error': error,
'sys_error': sys_error,
'traceback': traceback_msg,
'logdir': cfg['CFG_LOGDIR'],
'contact': "Please contact %s quoting the following information:" %
(cfg['CFG_SITE_SUPPORT_EMAIL'], )}
from invenio.ext.email import send_email
send_email(from_addr, to_addr, subject="Error notification", content=body)
+
def _get_filename_and_line(exc_info):
- """
- Return the filename, the line and the function_name where the exception happened.
- """
+ """Return the filename, the line and the function_name where
+ the exception happened."""
tb = exc_info[2]
exception_info = traceback.extract_tb(tb)[-1]
filename = os.path.basename(exception_info[0])
line_no = exception_info[1]
function_name = exception_info[2]
return filename, line_no, function_name
+
def _truncate_dynamic_string(val, maxlength=500):
"""
Return at most MAXLENGTH characters of VAL. Useful for
sanitizing dynamic variable values in the output.
"""
out = repr(val)
if len(out) > maxlength:
out = out[:maxlength] + ' [...]'
return out
def wrap_warn():
import warnings
from functools import wraps
def wrapper(showwarning):
@wraps(showwarning)
def new_showwarning(message=None, category=None, filename=None, lineno=None, file=None, line=None):
invenio_err = open(os.path.join(CFG_LOGDIR, 'invenio.err'), "a")
print >> invenio_err, "* %(time)s -> WARNING: %(category)s: %(message)s (%(file)s:%(line)s)\n" % {
'time': time.strftime("%Y-%m-%d %H:%M:%S"),
'category': category,
'message': message,
'file': filename,
'line': lineno}
print >> invenio_err, "** Traceback details\n"
traceback.print_stack(file=invenio_err)
print >> invenio_err, "\n"
return new_showwarning
warnings.showwarning = wrapper(warnings.showwarning)
diff --git a/invenio/ext/sqlalchemy/__init__.py b/invenio/ext/sqlalchemy/__init__.py
index c5c6e8198..34f3b33a6 100644
--- a/invenio/ext/sqlalchemy/__init__.py
+++ b/invenio/ext/sqlalchemy/__init__.py
@@ -1,203 +1,203 @@
# -*- coding: utf-8 -*-
#
## This file is part of Invenio.
## Copyright (C) 2011, 2012, 2013 CERN.
##
## Invenio is free software; you can redistribute it and/or
## modify it under the terms of the GNU General Public License as
## published by the Free Software Foundation; either version 2 of the
## License, or (at your option) any later version.
##
## Invenio is distributed in the hope that it will be useful, but
## WITHOUT ANY WARRANTY; without even the implied warranty of
## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
## General Public License for more details.
##
## You should have received a copy of the GNU General Public License
## along with Invenio; if not, write to the Free Software Foundation, Inc.,
## 59 Temple Place, Suite 330, Boston, MA 02111-1307, USA.
"""
invenio.ext.sqlalchemy
----------------------
This module provides initialization and configuration for
`flask.ext.sqlalchemy` module.
"""
from .expressions import AsBINARY
from .types import JSONEncodedTextDict, MarshalBinary, PickleBinary
from .utils import get_model_type
import sqlalchemy
from flask.ext.sqlalchemy import SQLAlchemy as FlaskSQLAlchemy
from sqlalchemy import event
from sqlalchemy.pool import Pool
from sqlalchemy.ext.hybrid import hybrid_property, Comparator
from invenio.utils.hash import md5
#from invenio.base.utils import autodiscover_models
def _include_sqlalchemy(obj, engine=None):
#for module in sqlalchemy, sqlalchemy.orm:
# for key in module.__all__:
# if not hasattr(obj, key):
# setattr(obj, key,
# getattr(module, key))
if engine == 'mysql':
from sqlalchemy.dialects import mysql as engine_types
else:
from sqlalchemy import types as engine_types
setattr(obj, 'JSON', JSONEncodedTextDict)
setattr(obj, 'Char', engine_types.CHAR)
try:
setattr(obj, 'TinyText', engine_types.TINYTEXT)
except:
setattr(obj, 'TinyText', engine_types.TEXT)
setattr(obj, 'hybrid_property', hybrid_property)
try:
setattr(obj, 'Double', engine_types.DOUBLE)
except:
setattr(obj, 'Double', engine_types.FLOAT)
setattr(obj, 'Integer', engine_types.INTEGER)
setattr(obj, 'SmallInteger', engine_types.SMALLINT)
try:
setattr(obj, 'MediumInteger', engine_types.MEDIUMINT)
except:
setattr(obj, 'MediumInteger', engine_types.INT)
setattr(obj, 'BigInteger', engine_types.BIGINT)
try:
setattr(obj, 'TinyInteger', engine_types.TINYINT)
except:
setattr(obj, 'TinyInteger', engine_types.INT)
setattr(obj, 'Binary', sqlalchemy.types.LargeBinary)
setattr(obj, 'iBinary', sqlalchemy.types.LargeBinary)
setattr(obj, 'iLargeBinary', sqlalchemy.types.LargeBinary)
setattr(obj, 'iMediumBinary', sqlalchemy.types.LargeBinary)
if engine == 'mysql':
from .engines import mysql as dummy_mysql
# module = invenio.sqlalchemyutils_mysql
# for key in module.__dict__:
# setattr(obj, key,
# getattr(module, key))
def default_enum(f):
def decorated(*args, **kwargs):
kwargs['native_enum'] = engine == 'mysql' # False
return f(*args, **kwargs)
return decorated
obj.Enum.__init__ = default_enum(obj.Enum.__init__)
obj.AsBINARY = AsBINARY
obj.MarshalBinary = MarshalBinary
obj.PickleBinary = PickleBinary
## Overwrite :meth:`MutableDick.update` to detect changes.
from sqlalchemy.ext.mutable import MutableDict
def update_mutable_dict(self, *args, **kwargs):
super(MutableDict, self).update(*args, **kwargs)
self.changed()
MutableDict.update = update_mutable_dict
obj.MutableDict = MutableDict
class PasswordComparator(Comparator):
def __eq__(self, other):
return self.__clause_element__() == self.hash(other)
def hash(self, password):
if db.engine.name != 'mysql':
return md5(password).digest()
email = self.__clause_element__().table.columns.email
return db.func.aes_encrypt(email, password)
def autocommit_on_checkin(dbapi_con, con_record):
"""Calls autocommit on raw mysql connection for fixing bug in MySQL 5.5"""
try:
dbapi_con.autocommit(True)
except:
pass
#FIXME
- #from invenio.errorlib import register_exception
+ #from invenio.ext.logging import register_exception
#register_exception()
## Possibly register globally.
#event.listen(Pool, 'checkin', autocommit_on_checkin)
class SQLAlchemy(FlaskSQLAlchemy):
"""Database object."""
PasswordComparator = PasswordComparator
def init_app(self, app):
super(self.__class__, self).init_app(app)
engine = app.config.get('CFG_DATABASE_TYPE', 'mysql')
self.Model = get_model_type(self.Model)
if engine == 'mysql':
self.Model.__table_args__ = {'keep_existing': True,
'extend_existing': False,
'mysql_engine': 'MyISAM',
'mysql_charset': 'utf8'}
_include_sqlalchemy(self, engine=engine)
def __getattr__(self, name):
# This is only called when the normal mechanism fails, so in practice
# should never be called.
# It is only provided to satisfy pylint that it is okay not to
# raise E1101 errors in the client code.
# :see http://stackoverflow.com/a/3515234/780928
raise AttributeError("%r instance has no attribute %r" % (self, name))
def schemadiff(self, excludeTables=None):
from migrate.versioning import schemadiff
return schemadiff.getDiffOfModelAgainstDatabase(self.metadata,
self.engine,
excludeTables=excludeTables)
def apply_driver_hacks(self, app, info, options):
"""
This method is called before engine creation.
"""
# Don't forget to apply hacks defined on parent object.
super(self.__class__, self).apply_driver_hacks(app, info, options)
if info.drivername == 'mysql':
options.setdefault('execution_options', {'autocommit': True,
'use_unicode': False # , 'charset': 'utf8'
})
event.listen(Pool, 'checkin', autocommit_on_checkin)
db = SQLAlchemy()
"""
Provides access to :class:`~.SQLAlchemy` instance.
"""
def setup_app(app):
"""Setup SQLAlchemy extension."""
if 'SQLALCHEMY_DATABASE_URI' not in app.config:
from sqlalchemy.engine.url import URL
cfg = app.config
app.config['SQLALCHEMY_DATABASE_URI'] = URL(
cfg.get('CFG_DATABASE_TYPE', 'mysql'),
username=cfg.get('CFG_DATABASE_USER'),
password=cfg.get('CFG_DATABASE_PASS'),
host=cfg.get('CFG_DATABASE_HOST'),
database=cfg.get('CFG_DATABASE_NAME'),
port=cfg.get('CFG_DATABASE_PORT'),
)
## Let's initialize database.
db.init_app(app)
## Make sure that all tables are loaded in `db.metadata.tables`.
#autodiscover_models()
return app
diff --git a/invenio/legacy/bibcatalog/system_email.py b/invenio/legacy/bibcatalog/system_email.py
index c8d55ee44..1193c7e3a 100644
--- a/invenio/legacy/bibcatalog/system_email.py
+++ b/invenio/legacy/bibcatalog/system_email.py
@@ -1,174 +1,174 @@
# -*- coding: utf-8 -*-
##
## This file is part of Invenio.
## Copyright (C) 2009, 2010, 2011, 2013 CERN.
##
## Invenio is free software; you can redistribute it and/or
## modify it under the terms of the GNU General Public License as
## published by the Free Software Foundation; either version 2 of the
## License, or (at your option) any later version.
##
## Invenio is distributed in the hope that it will be useful, but
## WITHOUT ANY WARRANTY; without even the implied warranty of
## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
## General Public License for more details.
##
## You should have received a copy of the GNU General Public License
## along with Invenio; if not, write to the Free Software Foundation, Inc.,
## 59 Temple Place, Suite 330, Boston, MA 02111-1307, USA.
"""
Provide a "ticket" interface with Email.
This is a subclass of BibCatalogSystem
"""
import datetime
from time import mktime
import invenio.webuser
from invenio.shellutils import escape_shell_arg
from invenio.bibcatalog_system import BibCatalogSystem
from invenio.ext.email import send_email
-from invenio.errorlib import register_exception
+from invenio.ext.logging import register_exception
EMAIL_SUBMIT_CONFIGURED = False
import invenio.config
if hasattr(invenio.config, 'CFG_BIBCATALOG_SYSTEM') and invenio.config.CFG_BIBCATALOG_SYSTEM == "EMAIL":
if hasattr(invenio.config, 'CFG_BIBCATALOG_SYSTEM_EMAIL_ADDRESS'):
EMAIL_SUBMIT_CONFIGURED = True
FROM_ADDRESS = invenio.config.CFG_SITE_SUPPORT_EMAIL
TO_ADDRESS = invenio.config.CFG_BIBCATALOG_SYSTEM_EMAIL_ADDRESS
class BibCatalogSystemEmail(BibCatalogSystem):
#BIBCATALOG_RT_SERVER = "" #construct this by http://user:password@RT_URL
def check_system(self, uid=None):
"""return an error string if there are problems"""
ret = ''
if not EMAIL_SUBMIT_CONFIGURED:
ret = "Please configure bibcatalog email sending in CFG_BIBCATALOG_SYSTEM and CFG_BIBCATALOG_SYSTEM_EMAIL_ADDRESS"
return ret
def ticket_search(self, uid, recordid=-1, subject="", text="", creator="", owner="", \
date_from="", date_until="", status="", priority="", queue=""):
"""Not implemented."""
raise NotImplementedError
def ticket_submit(self, uid=None, subject="", recordid=-1, text="", queue="", priority="", owner="", requestor=""):
"""creates a ticket. return true on success, otherwise false"""
if not EMAIL_SUBMIT_CONFIGURED:
register_exception(stream='warning',
subject='bibcatalog email not configured',
prefix="please configure bibcatalog email sending in CFG_BIBCATALOG_SYSTEM and CFG_BIBCATALOG_SYSTEM_EMAIL_ADDRESS")
ticket_id = self._get_ticket_id()
priorityset = ""
queueset = ""
requestorset = ""
ownerset = ""
recidset = " cf-recordID:" + escape_shell_arg(str(recordid)) + '\n'
textset = ""
subjectset = ""
if subject:
subjectset = 'ticket #' + ticket_id + ' - ' + escape_shell_arg(subject)
if priority:
priorityset = " priority:" + escape_shell_arg(str(priority)) + '\n'
if queue:
queueset = " queue:" + escape_shell_arg(queue) + '\n'
if requestor:
requestorset = " requestor:" + escape_shell_arg(requestor) + '\n'
if owner:
ownerprefs = invenio.webuser.get_user_preferences(owner)
if ownerprefs.has_key("bibcatalog_username"):
owner = ownerprefs["bibcatalog_username"]
ownerset = " owner:" + escape_shell_arg(owner) + '\n'
textset = textset + ownerset + requestorset + recidset + queueset + priorityset + '\n'
textset = textset + escape_shell_arg(text) + '\n'
ok = send_email(fromaddr=FROM_ADDRESS, toaddr=TO_ADDRESS, subject=subjectset, header='Hello,\n\n', content=textset)
if ok:
return ticket_id
return None
def ticket_comment(self, uid, ticketid, comment):
""" Comment on ticket with given ticketid"""
subjectset = 'ticket #' + ticketid + ' - Comment ...'
textset = '...\n\n*Comment on ticket #' + ticketid + '\nComment:' + comment
ok = send_email(fromaddr=FROM_ADDRESS, toaddr=TO_ADDRESS, subject=subjectset, header='Hello,\n\n', content=textset)
if ok:
return 1
return 0
def ticket_assign(self, uid, ticketid, to_user):
""" Re-assign existing ticket with given ticketid to user to_user"""
subjectset = 'ticket #' + ticketid + ' - Re-assign ...'
textset = '...\n\n*Please re-assigning ticket #' + ticketid + ' to ' + to_user
ok = send_email(fromaddr=FROM_ADDRESS, toaddr=TO_ADDRESS, subject=subjectset, header='Hello,\n\n', content=textset)
if ok:
return 1
return 0
def ticket_set_attribute(self, uid, ticketid, attribute, new_value):
""" Request to set attribute to new value on ticket with given ticketid"""
subjectset = 'ticket #' + ticketid + ' - Attribute Update ...'
textset = '...\n\n*Please modify attribute:' + attribute + ' to:' + new_value + ' on ticket:' + ticketid
ok = send_email(fromaddr=FROM_ADDRESS, toaddr=TO_ADDRESS, subject=subjectset, header='Hello,\n\n', content=textset)
if ok:
return 1
return 0
def ticket_get_attribute(self, uid, ticketid, attribute):
"""Not implemented."""
raise NotImplementedError
def ticket_get_info(self, uid, ticketid, attributes = None):
"""Not implemented."""
raise NotImplementedError
def _str_base(self, num, base, numerals = '0123456789abcdefghijklmnopqrstuvwxyz'):
""" Convert number to base (2 to 36) """
if base < 2 or base > len(numerals):
raise ValueError("str_base: base must be between 2 and %i" % len(numerals))
if num == 0:
return '0'
if num < 0:
sign = '-'
num = -num
else:
sign = ''
result = ''
while num:
result = numerals[num % (base)] + result
num //= base
return sign + result
def _get_ticket_id(self):
""" Return timestamp in seconds since the Epoch converted to base36 """
now = datetime.datetime.now()
t = mktime(now.timetuple())+1e-6*now.microsecond
t_str = str("%.6f" % t)
t1, t2 = t_str.split('.')
t_str = t1 + t2
#return base64.encodestring(t_str).strip()
return self._str_base(int(t_str), 36)
diff --git a/invenio/legacy/bibcirculation/adminlib.py b/invenio/legacy/bibcirculation/adminlib.py
index c9132d470..4e25cba19 100644
--- a/invenio/legacy/bibcirculation/adminlib.py
+++ b/invenio/legacy/bibcirculation/adminlib.py
@@ -1,6234 +1,6234 @@
## Administrator interface for Bibcirculation
##
## This file is part of Invenio.
## Copyright (C) 2008, 2009, 2010, 2011, 2012, 2013 CERN.
##
## Invenio is free software; you can redistribute it and/or
## modify it under the terms of the GNU General Public License as
## published by the Free Software Foundation; either version 2 of the
## License, or (at your option) any later version.
##
## Invenio is distributed in the hope that it will be useful, but
## WITHOUT ANY WARRANTY; without even the implied warranty of
## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
## General Public License for more details.
##
## You should have received a copy of the GNU General Public License
## along with Invenio; if not, write to the Free Software Foundation, Inc.,
## 59 Temple Place, Suite 330, Boston, MA 02111-1307, USA.
## """Invenio Bibcirculation Administrator Interface."""
from __future__ import division
"""
Invenio Bibcirculation Administrator.
The functions are positioned by grouping into logical
categories('User Pages', 'Loans, Returns and Loan requests',
'ILLs', 'Libraries', 'Vendors' ...)
These orders should be maintained and when necessary, improved
for readability, as and when additional methods are added.
When applicable, methods should be renamed, refactored and
appropriate documentation added.
"""
__revision__ = "$Id$"
__lastupdated__ = """$Date$"""
import datetime, time, types
# Other Invenio imports
from invenio.config import \
CFG_SITE_LANG, \
CFG_SITE_URL, \
CFG_SITE_SECURE_URL, \
CFG_CERN_SITE
import invenio.access_control_engine as acce
from invenio.webpage import page
from invenio.webuser import getUid, page_not_authorized
from invenio.webstat import register_customevent
-from invenio.errorlib import register_exception
+from invenio.ext.logging import register_exception
from invenio.ext.email import send_email
from invenio.search_engine import perform_request_search, record_exists
from invenio.utils.url import create_html_link, create_url, redirect_to_url
from invenio.base.i18n import gettext_set_language
from invenio.config import \
CFG_BIBCIRCULATION_ITEM_STATUS_ON_LOAN, \
CFG_BIBCIRCULATION_ITEM_STATUS_ON_ORDER, \
CFG_BIBCIRCULATION_ITEM_STATUS_ON_SHELF, \
CFG_BIBCIRCULATION_ITEM_STATUS_IN_PROCESS, \
CFG_BIBCIRCULATION_ITEM_STATUS_UNDER_REVIEW, \
CFG_BIBCIRCULATION_LOAN_STATUS_ON_LOAN, \
CFG_BIBCIRCULATION_LOAN_STATUS_RETURNED, \
CFG_BIBCIRCULATION_REQUEST_STATUS_WAITING, \
CFG_BIBCIRCULATION_REQUEST_STATUS_PENDING, \
CFG_BIBCIRCULATION_REQUEST_STATUS_DONE, \
CFG_BIBCIRCULATION_REQUEST_STATUS_CANCELLED, \
CFG_BIBCIRCULATION_ILL_STATUS_NEW, \
CFG_BIBCIRCULATION_ILL_STATUS_ON_LOAN, \
CFG_BIBCIRCULATION_LIBRARY_TYPE_MAIN, \
CFG_BIBCIRCULATION_ACQ_STATUS_NEW, \
CFG_BIBCIRCULATION_ACQ_STATUS_RECEIVED, \
CFG_BIBCIRCULATION_PROPOSAL_STATUS_ON_ORDER, \
CFG_BIBCIRCULATION_PROPOSAL_STATUS_PUT_ASIDE, \
CFG_BIBCIRCULATION_PROPOSAL_STATUS_RECEIVED
# Bibcirculation imports
from invenio.bibcirculation_config import \
CFG_BIBCIRCULATION_TEMPLATES, CFG_BIBCIRCULATION_LIBRARIAN_EMAIL, \
CFG_BIBCIRCULATION_LOANS_EMAIL, CFG_BIBCIRCULATION_ILLS_EMAIL, \
CFG_BIBCIRCULATION_PROPOSAL_TYPE, CFG_BIBCIRCULATION_ACQ_STATUS
from invenio.bibcirculation_utils import book_title_from_MARC, \
update_status_if_expired, \
renew_loan_for_X_days, \
print_pending_hold_requests_information, \
print_new_loan_information, \
validate_date_format, \
generate_email_body, \
book_information_from_MARC, \
search_user, \
tag_all_requests_as_done, \
update_user_info_from_ldap, \
update_request_data, \
update_requests_statuses, \
has_date_format, \
generate_tmp_barcode, \
looks_like_dictionary
import invenio.bibcirculation_dblayer as db
import invenio.template
bc_templates = invenio.template.load('bibcirculation')
def is_adminuser(req):
"""check if user is a registered administrator. """
return acce.acc_authorize_action(req, "runbibcirculation")
def mustloginpage(req, message):
"""show a page asking the user to login."""
navtrail_previous_links = '<a class="navtrail" href="%s/admin/">' \
'Admin Area</a> &gt; ' \
'<a class="navtrail" href="%s/admin/bibcirculation/">' \
'BibCirculation Admin</a> ' % (CFG_SITE_SECURE_URL, CFG_SITE_SECURE_URL)
return page_not_authorized(req=req, text=message,
navtrail=navtrail_previous_links)
def load_template(template):
"""
Load a letter/notification template from
bibcirculation_config.py.
@type template: string.
@param template: template that will be used.
@return: template(string)
"""
if template == "overdue_letter":
output = CFG_BIBCIRCULATION_TEMPLATES['OVERDUE']
elif template == "reminder":
output = CFG_BIBCIRCULATION_TEMPLATES['REMINDER']
elif template == "notification":
output = CFG_BIBCIRCULATION_TEMPLATES['NOTIFICATION']
elif template == "ill_received":
output = CFG_BIBCIRCULATION_TEMPLATES['ILL_RECEIVED']
elif template == "ill_recall1":
output = CFG_BIBCIRCULATION_TEMPLATES['ILL_RECALL1']
elif template == "ill_recall2":
output = CFG_BIBCIRCULATION_TEMPLATES['ILL_RECALL2']
elif template == "ill_recall3":
output = CFG_BIBCIRCULATION_TEMPLATES['ILL_RECALL3']
elif template == "claim_return":
output = CFG_BIBCIRCULATION_TEMPLATES['SEND_RECALL']
elif template == "proposal_notification":
output = CFG_BIBCIRCULATION_TEMPLATES['PROPOSAL_NOTIFICATION']
elif template == "proposal_acceptance":
output = CFG_BIBCIRCULATION_TEMPLATES['PROPOSAL_ACCEPTANCE_NOTIFICATION']
elif template == "proposal_refusal":
output = CFG_BIBCIRCULATION_TEMPLATES['PROPOSAL_REFUSAL_NOTIFICATION']
elif template == "purchase_notification":
output = CFG_BIBCIRCULATION_TEMPLATES['PURCHASE_NOTIFICATION']
elif template == "purchase_received_tid":
output = CFG_BIBCIRCULATION_TEMPLATES['PURCHASE_RECEIVED_TID']
elif template == "purchase_received_cash":
output = CFG_BIBCIRCULATION_TEMPLATES['PURCHASE_RECEIVED_CASH']
else:
output = CFG_BIBCIRCULATION_TEMPLATES['EMPTY']
return output
def index(req, ln=CFG_SITE_LANG):
"""
main function to show pages for bibcirculationadmin
"""
navtrail_previous_links = '<a class="navtrail"' \
' href="%s/help/admin">Admin Area' \
'</a>' % (CFG_SITE_SECURE_URL,)
id_user = getUid(req)
(auth_code, auth_message) = is_adminuser(req)
if auth_code != 0:
return mustloginpage(req, auth_message)
_ = gettext_set_language(ln)
body = bc_templates.tmpl_index(ln=ln)
return page(title=_("BibCirculation Admin"),
uid=id_user,
req=req,
body=body, language=ln,
navtrail=navtrail_previous_links,
lastupdated=__lastupdated__)
###
### Loans, Loan Requests, Loan Returns related templates.
###
def loan_on_desk_step1(req, key, string, ln=CFG_SITE_LANG):
"""
Step 1/4 of loan procedure.
Search a user/borrower and return a list with all the possible results.
@type key: string.
@param key: attribute that will be considered during the search. Can be 'name',
'email' or 'ccid/id'.
@type string: string.
@param string: keyword used during the search.
@return: list of potential borrowers.
"""
id_user = getUid(req)
(auth_code, auth_message) = is_adminuser(req)
if auth_code != 0:
return mustloginpage(req, auth_message)
infos = []
_ = gettext_set_language(ln)
if key and not string:
infos.append(_('Empty string. Please, try again.'))
body = bc_templates.tmpl_loan_on_desk_step1(result=None, key=key,
string=string, infos=infos,
ln=ln)
navtrail_previous_links = '<a class="navtrail" ' \
'href="%s/help/admin">Admin Area' \
'</a>' % (CFG_SITE_SECURE_URL,)
return page(title=_("Loan on desk"),
uid=id_user,
req=req,
body=body, language=ln,
navtrail=navtrail_previous_links,
lastupdated=__lastupdated__)
result = search_user(key, string)
borrowers_list = []
if len(result) == 0 and key:
if CFG_CERN_SITE:
infos.append(_("0 borrowers found.") + ' ' +_("Search by CCID."))
else:
new_borrower_link = create_html_link(CFG_SITE_SECURE_URL +
'/admin2/bibcirculation/add_new_borrower_step1',
{'ln': ln}, _("Register new borrower."))
message = _("0 borrowers found.") + ' ' + new_borrower_link
infos.append(message)
elif len(result) == 1:
return loan_on_desk_step2(req, result[0][0], ln)
else:
for user in result:
borrower_data = db.get_borrower_data_by_id(user[0])
borrowers_list.append(borrower_data)
body = bc_templates.tmpl_loan_on_desk_step1(result=borrowers_list,
key=key,
string=string,
infos=infos,
ln=ln)
navtrail_previous_links = '<a class="navtrail" ' \
'href="%s/help/admin">Admin Area' \
'</a>' % (CFG_SITE_SECURE_URL,)
return page(title=_("Circulation management"),
uid=id_user,
req=req,
body=body,
language=ln,
navtrail=navtrail_previous_links,
lastupdated=__lastupdated__)
def loan_on_desk_step2(req, user_id, ln=CFG_SITE_LANG):
"""
Step 2/4 of loan procedure.
Display the user/borrower's information.
@type user_id: integer
@param user_id: identify the borrower. It is also the primary key of
the table crcBORROWER.
"""
navtrail_previous_links = '<a class="navtrail" ' \
'href="%s/help/admin">Admin Area' \
'</a>' % (CFG_SITE_SECURE_URL,)
_ = gettext_set_language(ln)
id_user = getUid(req)
(auth_code, auth_message) = is_adminuser(req)
if auth_code != 0:
return mustloginpage(req, auth_message)
infos = []
body = bc_templates.tmpl_loan_on_desk_step2(user_id=user_id,
infos=infos,
ln=ln)
return page(title=_("Circulation management"),
uid=id_user,
req=req,
body=body, language=ln,
navtrail=navtrail_previous_links,
lastupdated=__lastupdated__)
def loan_on_desk_step3(req, user_id, list_of_barcodes, ln=CFG_SITE_LANG):
"""
Step 3/4 of loan procedure.
Checks that the barcodes exist and that there are no request on these records.
Lets the librarian change the due dates and add notes.
@type user_id: integer
@param user_id: identify the borrower. It is also the primary key of
the table crcBORROWER.
@type list_of_barcodes: list
@param list_of_barcodes: list of strings with the barcodes
introduced by the librarian with the barcode reader
"""
navtrail_previous_links = '<a class="navtrail" ' \
'href="%s/help/admin">Admin Area' \
'</a>' % (CFG_SITE_SECURE_URL,)
_ = gettext_set_language(ln)
id_user = getUid(req)
(auth_code, auth_message) = is_adminuser(req)
if auth_code != 0:
return mustloginpage(req, auth_message)
infos = []
list_of_books = []
# to avoid duplicates
aux = []
for bc in list_of_barcodes:
if bc not in aux:
aux.append(bc)
list_of_barcodes = aux
for value in list_of_barcodes:
recid = db.get_id_bibrec(value)
loan_id = db.is_item_on_loan(value)
item_description = db.get_item_description(value)
if recid is None:
infos.append(_('%(x_strong_tag_open)s%(x_barcode)s%(x_strong_tag_close)s Unknown barcode.') % {'x_barcode': value, 'x_strong_tag_open': '<strong>', 'x_strong_tag_close': '</strong>'} + ' ' + _('Please, try again.'))
body = bc_templates.tmpl_loan_on_desk_step2(user_id=user_id,
infos=infos,
ln=ln)
elif loan_id:
infos.append('The item with the barcode %(x_strong_tag_open)s%(x_barcode)s%(x_strong_tag_close)s is on a loan. Cannot be checked out.' % {'x_barcode': value, 'x_strong_tag_open': '<strong>', 'x_strong_tag_close': '</strong>'})
body = bc_templates.tmpl_loan_on_desk_step2(user_id=user_id,
infos=infos,
ln=ln)
elif user_id is None:
infos.append(_('You must select one borrower.'))
body = bc_templates.tmpl_loan_on_desk_step1(result=None,
key='',
string='',
infos=infos,
ln=ln)
else:
queue = db.get_queue_request(recid, item_description)
(library_id, location) = db.get_lib_location(value)
tup = (recid, value, library_id, location)
list_of_books.append(tup)
book_details = db.get_item_info(value)
item_status = book_details[7]
if item_status != CFG_BIBCIRCULATION_ITEM_STATUS_ON_SHELF:
message = _("%(x_strong_tag_open)sWARNING:%(x_strong_tag_close)s Note that item %(x_strong_tag_open)s%(x_barcode)s%(x_strong_tag_close)s status is %(x_strong_tag_open)s%(x_status)s%(x_strong_tag_close)s") % {'x_barcode': value, 'x_strong_tag_open': '<strong>', 'x_strong_tag_close': '</strong>', 'x_status': item_status}
infos.append(message)
if CFG_CERN_SITE:
library_type = db.get_library_type(library_id)
if library_type != CFG_BIBCIRCULATION_LIBRARY_TYPE_MAIN:
library_name = db.get_library_name(library_id)
message = _("%(x_strong_tag_open)sWARNING:%(x_strong_tag_close)s Note that item %(x_strong_tag_open)s%(x_barcode)s%(x_strong_tag_close)s location is %(x_strong_tag_open)s%(x_location)s%(x_strong_tag_close)s") % {'x_barcode': value, 'x_strong_tag_open': '<strong>', 'x_strong_tag_close': '</strong>', 'x_location': library_name}
infos.append(message)
if len(queue) != 0 and queue[0][0] != user_id:
message = _("Another user is waiting for the book: %(x_strong_tag_open)s%(x_title)s%(x_strong_tag_close)s. \n\n If you want continue with this loan choose %(x_strong_tag_open)s[Continue]%(x_strong_tag_close)s.") % {'x_title': book_title_from_MARC(recid), 'x_strong_tag_open': '<strong>', 'x_strong_tag_close': '</strong>'}
infos.append(message)
body = bc_templates.tmpl_loan_on_desk_step3(user_id=user_id,
list_of_books=list_of_books,
infos=infos, ln=ln)
if list_of_barcodes == []:
infos.append(_('Empty barcode.') + ' ' + _('Please, try again.'))
body = bc_templates.tmpl_loan_on_desk_step2(user_id=user_id,
infos=infos,
ln=ln)
if infos == []:
# shortcut to simplify loan process
due_dates = []
for bc in list_of_barcodes:
due_dates.append(renew_loan_for_X_days(bc))
return loan_on_desk_step4(req, list_of_barcodes, user_id,
due_dates, None, ln)
else:
return page(title=_("Circulation management"),
uid=id_user,
req=req,
body=body,
metaheaderadd = "<link rel=\"stylesheet\" href=\"%s/img/jquery-ui.css\" type=\"text/css\" />" % CFG_SITE_SECURE_URL,
navtrail=navtrail_previous_links,
lastupdated=__lastupdated__)
def loan_on_desk_step4(req, list_of_barcodes, user_id,
due_date, note, ln=CFG_SITE_LANG):
"""
Step 4/4 of loan procedure.
Checks that items are not on loan and that the format of
the dates is correct and creates the loans
@type user_id: integer
@param user_id: identify the borrower. It is also the primary key of
the table crcBORROWER.
@type list_of_barcodes: list
@param list_of_barcodes: list of strings with the barcodes
introduced by the librarian with the barcode reader
@type due_date: list.
@param due_date: list of due dates.
@type note: string.
@param note: note about the new loan.
@return: page with the list 'Last Loans'
"""
navtrail_previous_links = '<a class="navtrail" ' \
'href="%s/help/admin">Admin Area' \
'</a>' % (CFG_SITE_SECURE_URL,)
_ = gettext_set_language(ln)
id_user = getUid(req)
(auth_code, auth_message) = is_adminuser(req)
if auth_code != 0:
return mustloginpage(req, auth_message)
infos = []
#loaned_on = datetime.date.today()
#Check if one of the given items is on loan.
on_loan = []
for barcode in list_of_barcodes:
is_on_loan = db.is_item_on_loan(barcode)
if is_on_loan:
on_loan.append(barcode)
if len(on_loan) != 0:
message = _("The items with barcode %(x_strong_tag_open)s%(x_barcode)s%(x_strong_tag_close)s are already on loan.") % {'x_barcode': on_loan, 'x_strong_tag_open': '<strong>', 'x_strong_tag_close': '</strong>'}
infos.append(message)
body = bc_templates.tmpl_loan_on_desk_step1(result=None, key='',
string='', infos=infos,
ln=ln)
return page(title=_("Loan on desk"),
uid=id_user,
req=req,
body=body, language=ln,
navtrail=navtrail_previous_links,
lastupdated=__lastupdated__)
# validate the period of interest given by the admin
for date in due_date:
if validate_date_format(date) is False:
infos = []
message = _("The given due date %(x_strong_tag_open)s%(x_date)s%(x_strong_tag_close)s is not a valid date or date format") % {'x_date': date, 'x_strong_tag_open': '<strong>', 'x_strong_tag_close': '</strong>'}
infos.append(message)
list_of_books = []
for bc in list_of_barcodes:
recid = db.get_id_bibrec(bc)
(library_id, location) = db.get_lib_location(bc)
tup = (recid, bc, library_id, location)
list_of_books.append(tup)
body = bc_templates.tmpl_loan_on_desk_step3(user_id=user_id,
list_of_books=list_of_books,
infos=infos, ln=ln)
return page(title=_("Circulation management"),
uid=id_user,
req=req,
body=body, language=ln,
navtrail=navtrail_previous_links,
lastupdated=__lastupdated__)
#if borrower_id == None:
# db.new_borrower(ccid, name, email, phone, address, mailbox, '')
# borrower_id = db.get_borrower_id_by_email(email)
for i in range(len(list_of_barcodes)):
note_format = {}
if note:
note_format[time.strftime("%Y-%m-%d %H:%M:%S")] = str(note)
barcode = list_of_barcodes[i]
recid = db.get_id_bibrec(barcode)
db.new_loan(user_id, recid, barcode, due_date[i],
CFG_BIBCIRCULATION_LOAN_STATUS_ON_LOAN,
'normal', note_format)
# Duplicate requests on items belonging to a single record has been disabled.
db.tag_requests_as_done(user_id, barcode)
# tag_all_requests_as_done(barcode, user_id)
db.update_item_status(CFG_BIBCIRCULATION_ITEM_STATUS_ON_LOAN, barcode)
update_requests_statuses(barcode)
infos.append(_("A loan for the item %(x_strong_tag_open)s%(x_title)s%(x_strong_tag_close)s, with barcode %(x_strong_tag_open)s%(x_barcode)s%(x_strong_tag_close)s, has been registered with success.") % {'x_title': book_title_from_MARC(recid), 'x_barcode': barcode, 'x_strong_tag_open': '<strong>', 'x_strong_tag_close': '</strong>'})
infos.append(_("You could enter the barcode for this user's next loan, if any."))
body = bc_templates.tmpl_loan_on_desk_step2(user_id=user_id,
infos=infos, ln=ln)
return page(title=_("Circulation management"),
uid=id_user,
req=req,
body=body,
metaheaderadd = "<link rel=\"stylesheet\" href=\"%s/img/jquery-ui.css\" type=\"text/css\" />" % CFG_SITE_SECURE_URL,
navtrail=navtrail_previous_links,
lastupdated=__lastupdated__)
def loan_on_desk_confirm(req, barcode=None, borrower_id=None, ln=CFG_SITE_LANG):
"""
*** Obsolete and unmantained function ***
Confirm the return of an item.
@type barcode: string.
@param barcode: identify the item. It is the primary key of the table
crcITEM.
@type borrower_id: integer.
@param borrower_id: identify the borrower. It is also the primary key of
the table crcBORROWER.
"""
navtrail_previous_links = '<a class="navtrail" ' \
'href="%s/help/admin">Admin Area' \
'</a>' % (CFG_SITE_SECURE_URL,)
id_user = getUid(req)
(auth_code, auth_message) = is_adminuser(req)
if auth_code != 0:
return mustloginpage(req, auth_message)
_ = gettext_set_language(ln)
result = db.loan_on_desk_confirm(barcode, borrower_id)
body = bc_templates.tmpl_loan_on_desk_confirm(result=result,
barcode=barcode,
borrower_id=borrower_id,
ln=ln)
return page(title=_("Loan on desk confirm"),
uid=id_user,
req=req,
body=body, language=ln,
navtrail=navtrail_previous_links,
lastupdated=__lastupdated__)
def register_new_loan(req, barcode, borrower_id,
request_id, new_note, print_data, ln=CFG_SITE_LANG):
"""
Register a new loan. This function is from the "Create Loan" pages.
@type barcode: string.
@param barcode: identify the item. It is the primary key of the table
crcITEM.
@type borrower_id: integer.
@param borrower_id: identify the borrower. It is also the primary key of
the table crcBORROWER.
@type request_id: integer.
@param request_id: identify the hold request. It is also the primary key
of the table crcLOANREQUEST.
@type new_note: string.
@param new_note: associate a note to this loan.
@type print_data: string.
@param print_data: print the information about this loan.
@return: new loan
"""
id_user = getUid(req)
(auth_code, auth_message) = is_adminuser(req)
if auth_code != 0:
return mustloginpage(req, auth_message)
_ = gettext_set_language(ln)
has_recid = db.get_id_bibrec(barcode)
loan_id = db.is_item_on_loan(barcode)
recid = db.get_request_recid(request_id)
req_barcode = db.get_requested_barcode(request_id)
req_description = db.get_item_description(req_barcode)
# Get all the items belonging to the record whose
# description is the same.
list_of_barcodes = db.get_barcodes(recid, req_description)
infos = []
if print_data == 'true':
return print_new_loan_information(req, ln)
else:
if has_recid is None:
message = _('%(x_strong_tag_open)s%(x_barcode)s%(x_strong_tag_close)s Unknown barcode.') % {'x_barcode': barcode, 'x_strong_tag_open': '<strong>', 'x_strong_tag_close': '</strong>'} + ' ' + _('Please, try again.')
infos.append(message)
borrower = db.get_borrower_details(borrower_id)
title = _("Create Loan")
body = bc_templates.tmpl_create_loan(request_id=request_id,
recid=recid,
borrower=borrower,
infos=infos,
ln=ln)
elif loan_id:
infos.append(_('The item with the barcode %(x_strong_tag_open)s%(x_barcode)s%(x_strong_tag_close)s is on loan.') % {'x_barcode': barcode, 'x_strong_tag_open': '<strong>', 'x_strong_tag_close': '</strong>'})
borrower = db.get_borrower_details(borrower_id)
title = _("Create Loan")
body = bc_templates.tmpl_create_loan(request_id=request_id,
recid=recid,
borrower=borrower,
infos=infos,
ln=ln)
elif barcode not in list_of_barcodes:
infos.append(_('The given barcode "%(x_barcode)s" does not correspond to requested item.') % {'x_barcode': barcode})
borrower = db.get_borrower_details(borrower_id)
title = _("Create Loan")
body = bc_templates.tmpl_create_loan(request_id=request_id,
recid=recid,
borrower=borrower,
infos=infos,
ln=ln)
else:
recid = db.get_id_bibrec(barcode)
#loaned_on = datetime.date.today()
due_date = renew_loan_for_X_days(barcode)
if new_note:
note_format = '[' + time.ctime() + '] ' + new_note + '\n'
else:
note_format = ''
last_id = db.new_loan(borrower_id, recid, barcode,
due_date, CFG_BIBCIRCULATION_LOAN_STATUS_ON_LOAN,
'normal', note_format)
# register event in webstat
try:
register_customevent("loanrequest", [request_id, last_id])
except:
register_exception(suffix="Do the webstat tables exists? Try with 'webstatadmin --load-config'")
tag_all_requests_as_done(barcode, borrower_id)
db.update_item_status(CFG_BIBCIRCULATION_ITEM_STATUS_ON_LOAN, barcode)
db.update_loan_request_status(CFG_BIBCIRCULATION_REQUEST_STATUS_DONE,
request_id)
db.update_request_barcode(barcode, request_id)
update_requests_statuses(barcode)
result = db.get_all_loans(20)
infos.append(_('A new loan has been registered with success.'))
title = _("Current loans")
body = bc_templates.tmpl_all_loans(result=result,
infos=infos,
ln=ln)
navtrail_previous_links = '<a class="navtrail" ' \
'href="%s/help/admin">Admin Area' \
'</a>' % (CFG_SITE_SECURE_URL,)
return page(title=title,
uid=id_user,
req=req,
body=body, language=ln,
navtrail=navtrail_previous_links,
lastupdated=__lastupdated__)
def create_loan(req, request_id, recid, borrower_id, ln=CFG_SITE_LANG):
"""
Create a new loan from a hold request.
@type request_id: integer.
@param request_id: identify the hold request. It is also the primary key
of the table crcLOANREQUEST.
@type recid: integer.
@param recid: identify the record. It is also the primary key of
the table bibrec.
@type borrower_id: integer.
@param borrower_id: identify the borrower. It is also the primary key of
the table crcBORROWER.
"""
id_user = getUid(req)
(auth_code, auth_message) = is_adminuser(req)
if auth_code != 0:
return mustloginpage(req, auth_message)
_ = gettext_set_language(ln)
borrower = db.get_borrower_details(borrower_id)
infos = []
navtrail_previous_links = '<a class="navtrail" ' \
'href="%s/help/admin">Admin Area' \
'</a>' % (CFG_SITE_SECURE_URL,)
body = bc_templates.tmpl_create_loan(request_id=request_id,
recid=recid,
borrower=borrower,
infos=infos,
ln=ln)
return page(title=_("Create Loan"),
uid=id_user,
req=req,
body=body, language=ln,
navtrail=navtrail_previous_links,
lastupdated=__lastupdated__)
def make_new_loan_from_request(req, check_id, barcode, ln=CFG_SITE_LANG):
"""
Turns a request into a loan.
@type check_id: integer.
@param check_id: identify the hold request. It is also the primary key
of the table crcLOANREQUEST.
@type barcode: string.
@param barcode: identify the item. It is the primary key of the table
crcITEM.
"""
id_user = getUid(req)
(auth_code, auth_message) = is_adminuser(req)
if auth_code != 0:
return mustloginpage(req, auth_message)
_ = gettext_set_language(ln)
infos = []
recid = db.get_request_recid(check_id)
borrower_id = db.get_request_borrower_id(check_id)
borrower_info = db.get_borrower_details(borrower_id)
due_date = renew_loan_for_X_days(barcode)
if db.is_item_on_loan(barcode):
infos.append('The item with the barcode %(x_strong_tag_open)s%(x_barcode)s%(x_strong_tag_close)s is on loan.' % {'x_barcode': barcode, 'x_strong_tag_open': '<strong>', 'x_strong_tag_close': '</strong>'})
return redirect_to_url(req,
'%s/admin2/bibcirculation/all_loans?ln=%s&msg=ok' % (CFG_SITE_SECURE_URL, ln))
else:
db.new_loan(borrower_id, recid, barcode, due_date,
CFG_BIBCIRCULATION_LOAN_STATUS_ON_LOAN, 'normal', '')
infos.append(_('A new loan has been registered with success.'))
#try:
# register_customevent("baskets", ["display", "", user_str])
#except:
# register_exception(suffix="Do the webstat tables exists? Try with 'webstatadmin --load-config'")
tag_all_requests_as_done(barcode, borrower_id)
db.update_item_status(CFG_BIBCIRCULATION_ITEM_STATUS_ON_LOAN, barcode)
update_requests_statuses(barcode)
navtrail_previous_links = '<a class="navtrail" ' \
'href="%s/help/admin">Admin Area' \
'</a> &gt; <a class="navtrail" ' \
'href="%s/admin2/bibcirculation/loan_on_desk_step1?ln=%s">' \
'Circulation Management' \
'</a> ' % (CFG_SITE_SECURE_URL, CFG_SITE_SECURE_URL, ln)
body = bc_templates.tmpl_register_new_loan(borrower_info=borrower_info,
infos=infos,
recid=recid,
ln=ln)
return page(title=_("New Loan"),
uid=id_user,
req=req,
body=body,
language=ln,
navtrail=navtrail_previous_links,
lastupdated=__lastupdated__)
def loan_return(req, ln=CFG_SITE_LANG):
"""
Page where is possible to register the return of an item.
"""
_ = gettext_set_language(ln)
infos = []
navtrail_previous_links = '<a class="navtrail" ' \
'href="%s/help/admin">Admin Area' \
'</a> &gt; <a class="navtrail" ' \
'href="%s/admin2/bibcirculation/loan_on_desk_step1?ln=%s">' \
'Circulation Management' \
'</a> ' % (CFG_SITE_SECURE_URL, CFG_SITE_SECURE_URL, ln)
id_user = getUid(req)
(auth_code, auth_message) = is_adminuser(req)
if auth_code != 0:
return mustloginpage(req, auth_message)
body = bc_templates.tmpl_loan_return(infos=infos, ln=ln)
return page(title=_("Loan return"),
uid=id_user,
req=req,
body=body, language=ln,
navtrail=navtrail_previous_links,
lastupdated=__lastupdated__)
def loan_return_confirm(req, barcode, ln=CFG_SITE_LANG):
"""
Performs the return of a loan and displays a confirmation page.
In case the book is requested, it is possible to select a request
and make a loan from it (make_new_loan_from_request)
@type barcode: string.
@param barcode: identify the item. It is the primary key of the table
crcITEM.
"""
id_user = getUid(req)
(auth_code, auth_message) = is_adminuser(req)
if auth_code != 0:
return mustloginpage(req, auth_message)
infos = []
_ = gettext_set_language(ln)
recid = db.get_id_bibrec(barcode)
loan_id = db.is_item_on_loan(barcode)
if recid is None:
infos.append(_('%(x_strong_tag_open)s%(x_barcode)s%(x_strong_tag_close)s Unknown barcode.') % {'x_barcode': barcode, 'x_strong_tag_open': '<strong>', 'x_strong_tag_close': '</strong>'} + ' ' + _('Please, try again.'))
body = bc_templates.tmpl_loan_return(infos=infos, ln=ln)
elif loan_id is None:
message = _("The item the with barcode %(x_strong_tag_open)s%(x_barcode)s%(x_strong_tag_close)s is not on loan. Please, try again.") % {'x_barcode': barcode, 'x_strong_tag_open': '<strong>', 'x_strong_tag_close': '</strong>'}
infos.append(message)
body = bc_templates.tmpl_loan_return(infos=infos, ln=ln)
else:
library_id = db.get_item_info(barcode)[1]
if CFG_CERN_SITE:
library_type = db.get_library_type(library_id)
if library_type != CFG_BIBCIRCULATION_LIBRARY_TYPE_MAIN:
library_name = db.get_library_name(library_id)
message = _("%(x_strong_tag_open)sWARNING:%(x_strong_tag_close)s Note that item %(x_strong_tag_open)s%(x_barcode)s%(x_strong_tag_close)s location is %(x_strong_tag_open)s%(x_location)s%(x_strong_tag_close)s") % {'x_barcode': barcode, 'x_strong_tag_open': '<strong>', 'x_strong_tag_close': '</strong>', 'x_location': library_name}
infos.append(message)
borrower_id = db.get_borrower_id(barcode)
borrower_name = db.get_borrower_name(borrower_id)
db.update_item_status(CFG_BIBCIRCULATION_ITEM_STATUS_ON_SHELF, barcode)
db.return_loan(barcode)
update_requests_statuses(barcode)
description = db.get_item_description(barcode)
result = db.get_pending_loan_request(recid, description)
body = bc_templates.tmpl_loan_return_confirm(
infos=infos,
borrower_name=borrower_name,
borrower_id=borrower_id,
recid=recid,
barcode=barcode,
return_date=datetime.date.today(),
result=result,
ln=ln)
navtrail_previous_links = '<a class="navtrail" ' \
'href="%s/help/admin">Admin Area' \
'</a>' % (CFG_SITE_SECURE_URL,)
return page(title=_("Loan return"),
uid=id_user,
req=req,
body=body,
language=ln,
navtrail=navtrail_previous_links,
lastupdated=__lastupdated__)
def claim_book_return(req, borrower_id, recid, loan_id,
template, ln=CFG_SITE_LANG):
"""
Claim the return of an item.
borrower_id: identify the borrower. It is also the primary key of
the table crcBORROWER.
recid: identify the record. It is also the primary key of
the table bibrec.
template: letter template.
"""
id_user = getUid(req)
(auth_code, auth_message) = is_adminuser(req)
if auth_code != 0:
return mustloginpage(req, auth_message)
_ = gettext_set_language(ln)
email_body = generate_email_body(load_template(template), loan_id)
email = db.get_borrower_email(borrower_id)
subject = book_title_from_MARC(int(recid))
navtrail_previous_links = '<a class="navtrail" ' \
'href="%s/help/admin">Admin Area' \
'</a>' % (CFG_SITE_SECURE_URL,)
body = bc_templates.tmpl_borrower_notification(email=email,
subject=subject,
email_body=email_body,
borrower_id=borrower_id,
from_address=CFG_BIBCIRCULATION_LOANS_EMAIL,
ln=ln)
return page(title=_("Claim return"),
uid=id_user,
req=req,
body=body, language=ln,
navtrail=navtrail_previous_links,
lastupdated=__lastupdated__)
def change_due_date_step1(req, barcode, borrower_id, ln=CFG_SITE_LANG):
"""
Change the due date of a loan, step1.
loan_id: identify a loan. It is the primery key of the table
crcLOAN.
borrower_id: identify the borrower. It is also the primary key of
the table crcBORROWER.
"""
id_user = getUid(req)
(auth_code, auth_message) = is_adminuser(req)
if auth_code != 0:
return mustloginpage(req, auth_message)
_ = gettext_set_language(ln)
loan_id = db.get_current_loan_id(barcode)
loan_details = db.get_loan_infos(loan_id)
navtrail_previous_links = '<a class="navtrail" ' \
'href="%s/help/admin">Admin Area' \
'</a>' % (CFG_SITE_SECURE_URL,)
body = bc_templates.tmpl_change_due_date_step1(loan_details=loan_details,
loan_id=loan_id,
borrower_id=borrower_id,
ln=ln)
return page(title=_("Change due date"),
uid=id_user,
req=req,
body=body, language=ln,
#metaheaderadd = '<link rel="stylesheet" '\
# 'href="%s/img/jquery-ui/themes/redmond/ui.theme.css" '\
# 'type="text/css" />' % CFG_SITE_SECURE_URL,
metaheaderadd = '<link rel="stylesheet" href="%s/img/jquery-ui.css" '\
'type="text/css" />' % CFG_SITE_SECURE_URL,
navtrail=navtrail_previous_links,
lastupdated=__lastupdated__)
def change_due_date_step2(req, new_due_date, loan_id, borrower_id,
ln=CFG_SITE_LANG):
"""
Change the due date of a loan, step2.
due_date: new due date.
loan_id: identify a loan. It is the primery key of the table
crcLOAN.
borrower_id: identify the borrower. It is also the primary key of
the table crcBORROWER.
"""
id_user = getUid(req)
(auth_code, auth_message) = is_adminuser(req)
if auth_code != 0:
return mustloginpage(req, auth_message)
_ = gettext_set_language(ln)
db.update_due_date(loan_id, new_due_date)
update_status_if_expired(loan_id)
navtrail_previous_links = '<a class="navtrail" ' \
'href="%s/help/admin">Admin Area' \
'</a>' % (CFG_SITE_SECURE_URL,)
body = bc_templates.tmpl_change_due_date_step2(new_due_date=new_due_date,
borrower_id=borrower_id,
ln=ln)
return page(title=_("Change due date"),
uid=id_user,
req=req,
body=body, language=ln,
navtrail=navtrail_previous_links,
lastupdated=__lastupdated__)
def place_new_request_step1(req, barcode, recid, key, string, ln=CFG_SITE_LANG):
"""
Place a new request from the item's page, step1.
barcode: identify the item. It is the primary key of the table
crcITEM.
recid: identify the record. It is also the primary key of
the table bibrec.
key: search field.
string: search pattern.
"""
id_user = getUid(req)
(auth_code, auth_message) = is_adminuser(req)
if auth_code != 0:
return mustloginpage(req, auth_message)
_ = gettext_set_language(ln)
recid = db.get_id_bibrec(barcode)
infos = []
if key and not string:
infos.append(_('Empty string.') + ' ' + _('Please, try again.'))
body = bc_templates.tmpl_place_new_request_step1(result=None,
key=key,
string=string,
barcode=barcode,
recid=recid,
infos=infos,
ln=ln)
navtrail_previous_links = '<a class="navtrail" ' \
'href="%s/help/admin">Admin Area' \
'</a>' % (CFG_SITE_SECURE_URL,)
return page(title=_("New request"),
uid=id_user,
req=req,
body=body,
language=ln,
navtrail=navtrail_previous_links,
lastupdated=__lastupdated__)
result = search_user(key, string)
borrowers_list = []
if len(result) == 0 and key:
if CFG_CERN_SITE:
infos.append(_("0 borrowers found.") + ' ' +_("Search by CCID."))
else:
new_borrower_link = create_html_link(CFG_SITE_SECURE_URL +
'/admin2/bibcirculation/add_new_borrower_step1',
{'ln': ln}, _("Register new borrower."))
message = _("0 borrowers found.") + ' ' + new_borrower_link
infos.append(message)
else:
for user in result:
borrower_data = db.get_borrower_data_by_id(user[0])
borrowers_list.append(borrower_data)
if len(result) == 1:
return place_new_request_step2(req, barcode, recid,
borrowers_list[0], ln)
else:
body = bc_templates.tmpl_place_new_request_step1(result=borrowers_list,
key=key,
string=string,
barcode=barcode,
recid=recid,
infos=infos,
ln=ln)
navtrail_previous_links = '<a class="navtrail" ' \
'href="%s/help/admin">Admin Area' \
'</a>' % (CFG_SITE_SECURE_URL,)
return page(title=_("New request"),
uid=id_user,
req=req,
body=body, language=ln,
navtrail=navtrail_previous_links,
lastupdated=__lastupdated__)
def place_new_request_step2(req, barcode, recid, user_info, ln=CFG_SITE_LANG):
"""
Place a new request from the item's page, step2.
@type barcode: string.
@param barcode: identify the item. It is the primary key of the table
crcITEM.
@type recid: integer.
@param recid: identify the record. It is also the primary key of
the table bibrec.
@type user_info: list.
@param user_info: information of the user/borrower who was selected.
"""
id_user = getUid(req)
(auth_code, auth_message) = is_adminuser(req)
if auth_code != 0:
return mustloginpage(req, auth_message)
_ = gettext_set_language(ln)
infos = []
body = bc_templates.tmpl_place_new_request_step2(barcode=barcode,
recid=recid,
user_info=user_info,
infos=infos,
ln=ln)
navtrail_previous_links = '<a class="navtrail" ' \
'href="%s/help/admin">Admin Area' \
'</a>' % (CFG_SITE_SECURE_URL,)
return page(title=_("New request"),
uid=id_user,
req=req,
body=body,
metaheaderadd = "<link rel=\"stylesheet\" href=\"%s/img/jquery-ui.css\" type=\"text/css\" />" % CFG_SITE_SECURE_URL,
navtrail=navtrail_previous_links,
lastupdated=__lastupdated__)
def place_new_request_step3(req, barcode, recid, user_info,
period_from, period_to, ln=CFG_SITE_LANG):
"""
Place a new request from the item's page, step3.
@type barcode: string.
@param barcode: identify the item. It is the primary key of the table
crcITEM.
@type recid: integer.
@param recid: identify the record. It is also the primary key of
the table bibrec.
@return: new request.
"""
navtrail_previous_links = '<a class="navtrail" ' \
'href="%s/help/admin">Admin Area' \
'</a>' % (CFG_SITE_SECURE_URL,)
id_user = getUid(req)
(auth_code, auth_message) = is_adminuser(req)
if auth_code != 0:
return mustloginpage(req, auth_message)
_ = gettext_set_language(ln)
(_id, ccid, name, email, phone, address, mailbox) = user_info
# validate the period of interest given by the admin
if validate_date_format(period_from) is False:
infos = []
infos.append(_("The period of interest %(x_strong_tag_open)sFrom: %(x_date)s%(x_strong_tag_close)s is not a valid date or date format") % {'x_date': period_from, 'x_strong_tag_open': '<strong>', 'x_strong_tag_close': '</strong>'})
body = bc_templates.tmpl_place_new_request_step2(barcode=barcode,
recid=recid,
user_info=user_info,
infos=infos,
ln=ln)
return page(title=_("New request"),
uid=id_user,
req=req,
body=body,
language=ln,
navtrail=navtrail_previous_links,
lastupdated=__lastupdated__)
elif validate_date_format(period_to) is False:
infos = []
infos.append(_("The period of interest %(x_strong_tag_open)sTo: %(x_date)s%(x_strong_tag_close)s is not a valid date or date format") % {'x_date': period_to, 'x_strong_tag_open': '<strong>', 'x_strong_tag_close': '</strong>'})
body = bc_templates.tmpl_place_new_request_step2(barcode=barcode,
recid=recid,
user_info=user_info,
infos=infos,
ln=ln)
# Register request
borrower_id = db.get_borrower_id_by_email(email)
if borrower_id == None:
db.new_borrower(ccid, name, email, phone, address, mailbox, '')
borrower_id = db.get_borrower_id_by_email(email)
req_id = db.new_hold_request(borrower_id, recid, barcode,
period_from, period_to,
CFG_BIBCIRCULATION_REQUEST_STATUS_WAITING)
pending_request = update_requests_statuses(barcode)
if req_id == pending_request:
(title, year, author,
isbn, publisher) = book_information_from_MARC(int(recid))
details = db.get_loan_request_details(req_id)
if details:
library = details[3]
location = details[4]
request_date = details[7]
else:
location = ''
library = ''
request_date = ''
link_to_holdings_details = CFG_SITE_URL + \
'/record/%s/holdings' % str(recid)
subject = _('New request')
message = load_template('notification')
message = message % (name, ccid, email, address, mailbox, title,
author, publisher, year, isbn, location, library,
link_to_holdings_details, request_date)
send_email(fromaddr = CFG_BIBCIRCULATION_LIBRARIAN_EMAIL,
toaddr = CFG_BIBCIRCULATION_LOANS_EMAIL,
subject = subject,
content = message,
header = '',
footer = '',
attempt_times=1,
attempt_sleeptime=10
)
send_email(fromaddr = CFG_BIBCIRCULATION_LIBRARIAN_EMAIL,
toaddr = email,
subject = subject,
content = message,
header = '',
footer = '',
attempt_times=1,
attempt_sleeptime=10
)
body = bc_templates.tmpl_place_new_request_step3(ln=ln)
return page(title=_("New request"),
uid=id_user,
req=req,
body=body, language=ln,
navtrail=navtrail_previous_links,
lastupdated=__lastupdated__)
def place_new_loan_step1(req, barcode, recid, key, string, ln=CFG_SITE_LANG):
"""
Place a new loan from the item's page, step1.
@type barcode: string.
@param barcode: identify the item. It is the primary key of the table
crcITEM.
@type recid: integer.
@param recid: identify the record. It is also the primary key of
the table bibrec.
@type key: string.
@param key: search field.
@type string: string.
@param string: search pattern.
@return: list of users/borrowers.
"""
id_user = getUid(req)
(auth_code, auth_message) = is_adminuser(req)
if auth_code != 0:
return mustloginpage(req, auth_message)
_ = gettext_set_language(ln)
recid = db.get_id_bibrec(barcode)
infos = []
if key and not string:
infos.append(_('Empty string.') + ' ' + _('Please, try again.'))
body = bc_templates.tmpl_place_new_loan_step1(result=None,
key=key,
string=string,
barcode=barcode,
recid=recid,
infos=infos,
ln=ln)
navtrail_previous_links = '<a class="navtrail" ' \
'href="%s/help/admin">Admin Area' \
'</a>' % (CFG_SITE_SECURE_URL,)
id_user = getUid(req)
(auth_code, auth_message) = is_adminuser(req)
if auth_code != 0:
return mustloginpage(req, auth_message)
return page(title=_("New loan"),
uid=id_user,
req=req,
body=body, language=ln,
navtrail=navtrail_previous_links,
lastupdated=__lastupdated__)
result = search_user(key, string)
borrowers_list = []
if len(result) == 0 and key:
if CFG_CERN_SITE:
infos.append(_("0 borrowers found.") + ' ' +_("Search by CCID."))
else:
new_borrower_link = create_html_link(CFG_SITE_SECURE_URL +
'/admin2/bibcirculation/add_new_borrower_step1',
{'ln': ln}, _("Register new borrower."))
message = _("0 borrowers found.") + ' ' + new_borrower_link
infos.append(message)
else:
for user in result:
borrower_data = db.get_borrower_data_by_id(user[0])
borrowers_list.append(borrower_data)
body = bc_templates.tmpl_place_new_loan_step1(result=borrowers_list,
key=key,
string=string,
barcode=barcode,
recid=recid,
infos=infos,
ln=ln)
navtrail_previous_links = '<a class="navtrail" ' \
'href="%s/help/admin">Admin Area' \
'</a>' % (CFG_SITE_SECURE_URL,)
return page(title=_("New loan"),
uid=id_user,
req=req,
body=body, language=ln,
navtrail=navtrail_previous_links,
lastupdated=__lastupdated__)
def place_new_loan_step2(req, barcode, recid, user_info, ln=CFG_SITE_LANG):
"""
Place a new loan from the item's page, step2.
@type barcode: string.
@param barcode: identify the item. It is the primary key of the table
crcITEM.
@type recid: integer.
@param recid: identify the record. It is also the primary key of
the table bibrec.
@type user_info: list.
@param user_info: information of the user/borrower who was selected.
"""
id_user = getUid(req)
(auth_code, auth_message) = is_adminuser(req)
if auth_code != 0:
return mustloginpage(req, auth_message)
_ = gettext_set_language(ln)
body = bc_templates.tmpl_place_new_loan_step2(barcode=barcode,
recid=recid,
user_info=user_info,
ln=ln)
navtrail_previous_links = '<a class="navtrail" ' \
'href="%s/help/admin">Admin Area' \
'</a>' % (CFG_SITE_SECURE_URL,)
return page(title=_("New loan"),
uid=id_user,
req=req,
body=body, language=ln,
navtrail=navtrail_previous_links,
lastupdated=__lastupdated__)
def place_new_loan_step3(req, barcode, recid, ccid, name, email, phone,
address, mailbox, due_date, notes, ln=CFG_SITE_LANG):
"""
Place a new loan from the item's page, step3.
@type barcode: string.
@param barcode: identify the item. It is the primary key of the table
crcITEM.
@type recid: integer.
@param recid: identify the record. It is also the primary key of
the table bibrec.
@type name: string.
@type email: string.
@type phone: string.
@type address: string.
@type mailbos: string.
@type due_date: string.
@type notes: string.
@return: new loan.
"""
id_user = getUid(req)
(auth_code, auth_message) = is_adminuser(req)
if auth_code != 0:
return mustloginpage(req, auth_message)
_ = gettext_set_language(ln)
infos = []
if notes:
notes_format = '[' + time.ctime() + '] ' + notes + '\n'
else:
notes_format = ''
#loaned_on = datetime.date.today()
borrower_id = db.get_borrower_id_by_email(email)
borrower_info = db.get_borrower_data(borrower_id)
if db.is_on_loan(barcode):
infos.append(_("Item with barcode %(x_strong_tag_open)s%(x_barcode)s%(x_strong_tag_close)s is already on loan.") % {'x_barcode': barcode, 'x_strong_tag_open': '<strong>', 'x_strong_tag_close': '</strong>'})
copies = db.get_item_copies_details(recid)
requests = db.get_item_requests(recid)
loans = db.get_item_loans(recid)
purchases = db.get_item_purchases(CFG_BIBCIRCULATION_ACQ_STATUS_NEW, recid)
req_hist_overview = db.get_item_requests_historical_overview(recid)
loans_hist_overview = db.get_item_loans_historical_overview(recid)
purchases_hist_overview = db.get_item_purchases(CFG_BIBCIRCULATION_ACQ_STATUS_RECEIVED, recid)
title = _("Item details")
body = bc_templates.tmpl_get_item_details(
recid=recid, copies=copies,
requests=requests, loans=loans,
purchases=purchases,
req_hist_overview=req_hist_overview,
loans_hist_overview=loans_hist_overview,
purchases_hist_overview=purchases_hist_overview,
infos=infos, ln=ln)
elif borrower_id != 0:
db.new_loan(borrower_id, recid, barcode,
due_date, CFG_BIBCIRCULATION_LOAN_STATUS_ON_LOAN,
'normal', notes_format)
tag_all_requests_as_done(barcode, borrower_id)
db.update_item_status(CFG_BIBCIRCULATION_ITEM_STATUS_ON_LOAN, barcode)
update_requests_statuses(barcode)
title = _("New loan")
body = bc_templates.tmpl_register_new_loan(borrower_info=borrower_info,
infos=infos,
recid=recid, ln=ln)
else:
db.new_borrower(ccid, name, email, phone, address, mailbox, '')
borrower_id = db.get_borrower_id_by_email(email)
db.new_loan(borrower_id, recid, barcode,
due_date, CFG_BIBCIRCULATION_LOAN_STATUS_ON_LOAN,
'normal', notes_format)
tag_all_requests_as_done(barcode, borrower_id)
db.update_item_status(CFG_BIBCIRCULATION_ITEM_STATUS_ON_LOAN, barcode)
update_requests_statuses(barcode)
title = _("New loan")
body = bc_templates.tmpl_register_new_loan(borrower_info=borrower_info,
infos=infos,
recid=recid,
ln=ln)
navtrail_previous_links = '<a class="navtrail" ' \
'href="%s/help/admin">Admin Area' \
'</a> &gt; <a class="navtrail" ' \
'href="%s/admin2/bibcirculation/loan_on_desk_step1?ln=%s">'\
'Circulation Management' \
'</a> ' % (CFG_SITE_SECURE_URL, CFG_SITE_SECURE_URL, ln)
return page(title=title,
uid=id_user,
req=req,
body=body, language=ln,
navtrail=navtrail_previous_links,
lastupdated=__lastupdated__)
def create_new_request_step1(req, borrower_id, p="", f="", search=None,
ln=CFG_SITE_LANG):
"""
Create a new request from the borrower's page, step1.
borrower_id: identify the borrower. It is also the primary key of
the table crcBORROWER.
p: search pattern.
f: field
search: search an item.
"""
id_user = getUid(req)
(auth_code, auth_message) = is_adminuser(req)
if auth_code != 0:
return mustloginpage(req, auth_message)
_ = gettext_set_language(ln)
infos = []
if borrower_id != None:
borrower = db.get_borrower_details(borrower_id)
else:
message = _('Empty borrower ID.')
return borrower_search(req, message, False, ln)
if search and p == '':
infos.append(_('Empty string.') + ' ' + _('Please, try again.'))
result = ''
elif search and f == 'barcode':
p = p.strip('\'" \t')
has_recid = db.get_id_bibrec(p)
if has_recid is None:
infos.append(_('The barcode %(x_strong_tag_open)s%(x_barcode)s%(x_strong_tag_close)s does not exist on BibCirculation database.') % {'x_barcode': p, 'x_strong_tag_open': '<strong>', 'x_strong_tag_close': '</strong>'})
result = ''
else:
result = has_recid
elif search:
result = perform_request_search(cc="Books", sc="1", p=p, f=f)
else:
result = ''
navtrail_previous_links = '<a class="navtrail" ' \
'href="%s/help/admin">Admin Area' \
'</a>' % (CFG_SITE_SECURE_URL,)
if type(result) is types.IntType or type(result) is types.LongType:
recid = result
holdings_information = db.get_holdings_information(recid)
user_info = db.get_borrower_details(borrower_id)
body = bc_templates.tmpl_create_new_request_step2(user_info=user_info,
holdings_information=holdings_information,
recid=recid, ln=ln)
else:
body = bc_templates.tmpl_create_new_request_step1(borrower=borrower,
infos=infos,
result=result,
p=p,
f=f,
ln=ln)
return page(title=_("New request"),
uid=id_user,
req=req,
body=body, language=ln,
navtrail=navtrail_previous_links,
lastupdated=__lastupdated__)
def create_new_request_step2(req, recid, borrower_id, ln=CFG_SITE_LANG):
"""
Create a new request from the borrower's page, step2.
recid: identify the record. It is also the primary key of
the table bibrec.
borrower_id: identify the borrower. It is also the primary key of
the table crcBORROWER.
"""
id_user = getUid(req)
(auth_code, auth_message) = is_adminuser(req)
if auth_code != 0:
return mustloginpage(req, auth_message)
_ = gettext_set_language(ln)
holdings_information = db.get_holdings_information(recid)
navtrail_previous_links = '<a class="navtrail" ' \
'href="%s/help/admin">Admin Area' \
'</a>' % (CFG_SITE_SECURE_URL,)
user_info = db.get_borrower_details(borrower_id)
body = bc_templates.tmpl_create_new_request_step2(user_info=user_info,
holdings_information=holdings_information,
recid=recid, ln=ln)
return page(title=_("New request"),
uid=id_user,
req=req,
body=body,
language=ln,
navtrail=navtrail_previous_links,
lastupdated=__lastupdated__)
def create_new_request_step3(req, borrower_id, barcode, recid,
ln=CFG_SITE_LANG):
"""
Create a new request from the borrower's page, step3.
borrower_id: identify the borrower. It is also the primary key of
the table crcBORROWER.
barcode: identify the item. It is the primary key of the table
crcITEM.
recid: identify the record. It is also the primary key of
the table bibrec.
"""
navtrail_previous_links = '<a class="navtrail" ' \
'href="%s/help/admin">Admin Area' \
'</a>' % (CFG_SITE_SECURE_URL,)
id_user = getUid(req)
(auth_code, auth_message) = is_adminuser(req)
if auth_code != 0:
return mustloginpage(req, auth_message)
_ = gettext_set_language(ln)
item_info = db.get_item_info(barcode)
if item_info[6] == 'Reference':
body = bc_templates.tmpl_book_not_for_loan(ln=ln)
else:
body = bc_templates.tmpl_create_new_request_step3(
borrower_id=borrower_id,
barcode=barcode,
recid=recid,
ln=ln)
return page(title=_("New request"),
uid=id_user,
req=req,
body=body,
metaheaderadd = "<link rel=\"stylesheet\" href=\"%s/img/jquery-ui.css\" type=\"text/css\" />" % CFG_SITE_SECURE_URL,
navtrail=navtrail_previous_links,
lastupdated=__lastupdated__)
def create_new_request_step4(req, period_from, period_to, barcode,
borrower_id, recid, ln=CFG_SITE_LANG):
"""
Create a new request from the borrower's page, step4.
period_from: begining of the period of interest.
period_to: end of the period of interest.
barcode: identify the item. It is the primary key of the table
crcITEM.
borrower_id: identify the borrower. It is also the primary key of
the table crcBORROWER.
recid: identify the record. It is also the primary key of
the table bibrec.
"""
id_user = getUid(req)
(auth_code, auth_message) = is_adminuser(req)
if auth_code != 0:
return mustloginpage(req, auth_message)
_ = gettext_set_language(ln)
db.new_hold_request(borrower_id, recid, barcode,
period_from, period_to,
CFG_BIBCIRCULATION_REQUEST_STATUS_WAITING)
update_requests_statuses(barcode)
navtrail_previous_links = '<a class="navtrail" ' \
'href="%s/help/admin">Admin Area' \
'</a>' % (CFG_SITE_SECURE_URL,)
body = bc_templates.tmpl_create_new_request_step4(ln=ln)
return page(title=_("New request"),
uid=id_user,
req=req,
body=body, language=ln,
navtrail=navtrail_previous_links,
lastupdated=__lastupdated__)
def create_new_loan_step1(req, borrower_id, ln=CFG_SITE_LANG):
"""
Create a new loan from the borrower's page, step1.
borrower_id: identify the borrower. It is also the primary key of
the table crcBORROWER.
"""
id_user = getUid(req)
(auth_code, auth_message) = is_adminuser(req)
if auth_code != 0:
return mustloginpage(req, auth_message)
_ = gettext_set_language(ln)
infos = []
borrower = db.get_borrower_details(borrower_id)
navtrail_previous_links = '<a class="navtrail" ' \
'href="%s/help/admin">Admin Area' \
'</a>' % (CFG_SITE_SECURE_URL,)
body = bc_templates.tmpl_create_new_loan_step1(borrower=borrower,
infos=infos,
ln=ln)
return page(title=_("New loan"),
uid=id_user,
req=req,
body=body, language=ln,
navtrail=navtrail_previous_links,
lastupdated=__lastupdated__)
def create_new_loan_step2(req, borrower_id, barcode, notes, ln=CFG_SITE_LANG):
"""
Create a new loan from the borrower's page, step2.
borrower_id: identify the borrower. It is also the primary key of
the table crcBORROWER.
barcode: identify the item. It is the primary key of the table
crcITEM.
notes: notes about the new loan.
"""
id_user = getUid(req)
(auth_code, auth_message) = is_adminuser(req)
if auth_code != 0:
return mustloginpage(req, auth_message)
_ = gettext_set_language(ln)
#borrower_info = db.get_borrower_data(borrower_id)
has_recid = db.get_id_bibrec(barcode)
loan_id = db.is_item_on_loan(barcode)
if notes:
notes_format = '[' + time.ctime() + '] ' + notes + '\n'
else:
notes_format = ''
infos = []
if has_recid is None:
infos.append(_('%(x_strong_tag_open)s%(x_barcode)s%(x_strong_tag_close)s Unknown barcode.') % {'x_barcode': barcode, 'x_strong_tag_open': '<strong>', 'x_strong_tag_close': '</strong>'} + ' ' + _('Please, try again.'))
borrower = db.get_borrower_details(borrower_id)
title = _("New loan")
body = bc_templates.tmpl_create_new_loan_step1(borrower=borrower,
infos=infos,
ln=ln)
elif loan_id:
infos.append(_('The item with the barcode %(x_strong_tag_open)s%(x_barcode)s%(x_strong_tag_close)s is on loan.') % {'x_barcode': barcode, 'x_strong_tag_open': '<strong>', 'x_strong_tag_close': '</strong>'})
borrower = db.get_borrower_details(borrower_id)
title = _("New loan")
body = bc_templates.tmpl_create_new_loan_step1(borrower=borrower,
infos=infos,
ln=ln)
else:
#loaned_on = datetime.date.today()
due_date = renew_loan_for_X_days(barcode)
db.new_loan(borrower_id, has_recid, barcode,
due_date, CFG_BIBCIRCULATION_LOAN_STATUS_ON_LOAN,
'normal', notes_format)
tag_all_requests_as_done(barcode, borrower_id)
db.update_item_status(CFG_BIBCIRCULATION_ITEM_STATUS_ON_LOAN, barcode)
update_requests_statuses(barcode)
result = db.get_all_loans(20)
title = _("Current loans")
infos.append(_('A new loan has been registered with success.'))
body = bc_templates.tmpl_all_loans(result=result, infos=infos, ln=ln)
navtrail_previous_links = '<a class="navtrail" ' \
'href="%s/help/admin">Admin Area' \
'</a>' % (CFG_SITE_SECURE_URL,)
return page(title=title,
uid=id_user,
req=req,
body=body,
language=ln,
navtrail=navtrail_previous_links,
lastupdated=__lastupdated__)
def all_requests(req, request_id, ln=CFG_SITE_LANG):
"""
Display all requests.
@type request_id: integer.
@param request_id: identify the hold request. It is also the primary key
of the table crcLOANREQUEST.
"""
id_user = getUid(req)
(auth_code, auth_message) = is_adminuser(req)
if auth_code != 0:
return mustloginpage(req, auth_message)
_ = gettext_set_language(ln)
if request_id:
db.update_loan_request_status(CFG_BIBCIRCULATION_REQUEST_STATUS_CANCELLED,
request_id)
result = db.get_all_requests()
else:
result = db.get_all_requests()
navtrail_previous_links = '<a class="navtrail" ' \
'href="%s/help/admin">Admin Area' \
'</a>' % (CFG_SITE_SECURE_URL,)
body = bc_templates.tmpl_all_requests(result=result, ln=ln)
return page(title=_("List of hold requests"),
uid=id_user,
req=req,
body=body, language=ln,
navtrail=navtrail_previous_links,
lastupdated=__lastupdated__)
def all_loans(req, msg=None, ln=CFG_SITE_LANG):
"""
Display all loans.
@type loans_per_page: integer.
@param loans_per_page: number of loans per page.
@type jloan: integer.
@param jloan: jump to next loan.
@return: list with all loans (current loans).
"""
id_user = getUid(req)
(auth_code, auth_message) = is_adminuser(req)
if auth_code != 0:
return mustloginpage(req, auth_message)
_ = gettext_set_language(ln)
infos = []
if msg == 'ok':
infos.append(_('A new loan has been registered with success.'))
result = db.get_all_loans(20)
navtrail_previous_links = '<a class="navtrail" ' \
'href="%s/help/admin">Admin Area' \
'</a> &gt; <a class="navtrail" ' \
'href="%s/admin2/bibcirculation/loan_on_desk_step1?ln=%s">' \
'Circulation Management' \
'</a> ' % (CFG_SITE_SECURE_URL, CFG_SITE_SECURE_URL, ln)
body = bc_templates.tmpl_all_loans(result=result, infos=infos, ln=ln)
return page(title=_("Current loans"),
uid=id_user,
req=req,
body=body, language=ln,
navtrail=navtrail_previous_links,
lastupdated=__lastupdated__)
def all_expired_loans(req, ln=CFG_SITE_LANG):
"""
Display all loans.
@type loans_per_page: integer.
@param loans_per_page: number of loans per page.
@return: list with all expired loans (overdue loans).
"""
id_user = getUid(req)
(auth_code, auth_message) = is_adminuser(req)
if auth_code != 0:
return mustloginpage(req, auth_message)
_ = gettext_set_language(ln)
result = db.get_all_expired_loans()
infos = []
navtrail_previous_links = '<a class="navtrail" ' \
'href="%s/help/admin">Admin Area' \
'</a> &gt; <a class="navtrail" ' \
'href="%s/admin2/bibcirculation/loan_on_desk_step1?ln=%s">' \
'Circulation Management' \
'</a> ' % (CFG_SITE_SECURE_URL, CFG_SITE_SECURE_URL, ln)
body = bc_templates.tmpl_all_expired_loans(result=result,
infos=infos,
ln=ln)
return page(title=_('Overdue loans'),
uid=id_user,
req=req,
body=body, language=ln,
navtrail=navtrail_previous_links,
lastupdated=__lastupdated__)
def get_pending_requests(req, request_id, print_data, ln=CFG_SITE_LANG):
"""
Retrun all loan requests that are pending. If request_id is not None,
cancel the request and then, return all loan requests that are pending.
@type request_id: integer.
@param request_id: identify the hold request. It is also the primary key
of the table crcLOANREQUEST.
@type print_data: string.
@param print_data: print requests information.
@return: list of pending requests (on shelf with hold).
"""
id_user = getUid(req)
(auth_code, auth_message) = is_adminuser(req)
if auth_code != 0:
return mustloginpage(req, auth_message)
_ = gettext_set_language(ln)
if print_data == 'true':
return print_pending_hold_requests_information(req, ln)
elif request_id:
# Cancel a request too.
db.update_loan_request_status(CFG_BIBCIRCULATION_REQUEST_STATUS_CANCELLED,
request_id)
barcode = db.get_request_barcode(request_id)
update_requests_statuses(barcode)
result = db.get_loan_request_by_status(CFG_BIBCIRCULATION_REQUEST_STATUS_PENDING)
else:
result = db.get_loan_request_by_status(CFG_BIBCIRCULATION_REQUEST_STATUS_PENDING)
navtrail_previous_links = '<a class="navtrail" ' \
'href="%s/help/admin">Admin Area' \
'</a>' % (CFG_SITE_SECURE_URL,)
body = bc_templates.tmpl_get_pending_requests(result=result, ln=ln)
return page(title=_("Items on shelf with holds"),
uid=id_user,
req=req,
body=body, language=ln,
navtrail=navtrail_previous_links,
lastupdated=__lastupdated__)
def get_waiting_requests(req, request_id, print_data, ln=CFG_SITE_LANG):
"""
Get all loans requests that are waiting.
@type request_id: integer.
@param request_id: identify the hold request. It is also the primary key
of the table crcLOANREQUEST.
@type print_data: string.
@param print_data: print requests information.
@return: list of waiting requests (on loan with hold).
"""
id_user = getUid(req)
(auth_code, auth_message) = is_adminuser(req)
if auth_code != 0:
return mustloginpage(req, auth_message)
_ = gettext_set_language(ln)
if print_data == 'true':
return print_pending_hold_requests_information(req, ln)
elif request_id:
db.update_loan_request_status(CFG_BIBCIRCULATION_REQUEST_STATUS_CANCELLED,
request_id)
result = db.get_loan_request_by_status(CFG_BIBCIRCULATION_REQUEST_STATUS_WAITING)
aux = ()
for request in result:
if db.get_nb_copies_on_loan(request[1]):
aux += request,
result = aux
navtrail_previous_links = '<a class="navtrail" ' \
'href="%s/help/admin">Admin Area' \
'</a>' % (CFG_SITE_SECURE_URL,)
body = bc_templates.tmpl_get_waiting_requests(result=result, ln=ln)
return page(title=_("Items on loan with holds"),
uid=id_user,
req=req,
body=body, language=ln,
navtrail=navtrail_previous_links,
lastupdated=__lastupdated__)
def get_expired_loans_with_waiting_requests(req, request_id, ln=CFG_SITE_LANG):
id_user = getUid(req)
(auth_code, auth_message) = is_adminuser(req)
if auth_code != 0:
return mustloginpage(req, auth_message)
_ = gettext_set_language(ln)
if request_id:
db.update_loan_request_status(CFG_BIBCIRCULATION_REQUEST_STATUS_CANCELLED,
request_id)
result = db.get_expired_loans_with_waiting_requests()
else:
result = db.get_expired_loans_with_waiting_requests()
navtrail_previous_links = '<a class="navtrail" ' \
'href="%s/help/admin">Admin Area' \
'</a> &gt; <a class="navtrail" ' \
'href="%s/admin2/bibcirculation/loan_on_desk_step1?ln=%s">'\
'Circulation Management' \
'</a> ' % (CFG_SITE_SECURE_URL, CFG_SITE_SECURE_URL, ln)
body = bc_templates.tmpl_get_expired_loans_with_waiting_requests(result=result,
ln=ln)
return page(title=_("Overdue loans with holds"),
uid=id_user,
req=req,
body=body, language=ln,
navtrail=navtrail_previous_links,
lastupdated=__lastupdated__)
def get_loans_notes(req, loan_id, delete_key,
library_notes, back, ln=CFG_SITE_LANG):
"""
Get loan's note(s).
@type loan_id: integer.
@param loan_id: identify a loan. It is the primery key of the table
crcLOAN.
"""
id_user = getUid(req)
(auth_code, auth_message) = is_adminuser(req)
if auth_code != 0:
return mustloginpage(req, auth_message)
_ = gettext_set_language(ln)
if delete_key and loan_id:
if looks_like_dictionary(db.get_loan_notes(loan_id)):
loans_notes = eval(db.get_loan_notes(loan_id))
if delete_key in loans_notes.keys():
del loans_notes[delete_key]
db.update_loan_notes(loan_id, loans_notes)
elif library_notes:
if db.get_loan_notes(loan_id):
if looks_like_dictionary(db.get_loan_notes(loan_id)):
loans_notes = eval(db.get_loan_notes(loan_id))
else:
loans_notes = {}
else:
loans_notes = {}
note_time = time.strftime("%Y-%m-%d %H:%M:%S")
if note_time not in loans_notes.keys():
loans_notes[note_time] = str(library_notes)
db.update_loan_notes(loan_id, loans_notes)
loans_notes = db.get_loan_notes(loan_id)
navtrail_previous_links = '<a class="navtrail" ' \
'href="%s/help/admin">Admin Area' \
'</a> &gt; <a class="navtrail" ' \
'href="%s/admin2/bibcirculation/loan_on_desk_step1?ln=%s">' \
'Circulation Management' \
'</a> ' % (CFG_SITE_SECURE_URL, CFG_SITE_SECURE_URL, ln)
referer = req.headers_in.get('referer')
body = bc_templates.tmpl_get_loans_notes(loans_notes=loans_notes,
loan_id=loan_id,
referer=referer, back=back,
ln=ln)
return page(title=_("Loan notes"),
uid=id_user,
req=req,
body=body,
language=ln,
navtrail=navtrail_previous_links,
lastupdated=__lastupdated__)
def get_item_loans_notes(req, loan_id, add_notes, new_note, ln=CFG_SITE_LANG):
"""
Get loan's notes.
@param loan_id: identify a loan. It is the primery key of the table
crcLOAN.
@param recid: identify the record. It is also the primary key of
the table bibrec.
@param borrower_id: identify the borrower. It is also the primary key of
the table crcBORROWER.
@param add_notes: display the textarea where will be written a new notes.
@param new_notes: note that will be added to the others library's notes.
"""
id_user = getUid(req)
(auth_code, auth_message) = is_adminuser(req)
if auth_code != 0:
return mustloginpage(req, auth_message)
_ = gettext_set_language(ln)
if new_note:
date = '[' + time.ctime() + '] '
new_line = '\n'
new_note = date + new_note + new_line
db.add_new_loan_note(new_note, loan_id)
loans_notes = db.get_loan_notes(loan_id)
navtrail_previous_links = '<a class="navtrail" ' \
'href="%s/help/admin">Admin Area' \
'</a>' % (CFG_SITE_SECURE_URL,)
body = bc_templates.tmpl_get_loans_notes(loans_notes=loans_notes,
loan_id=loan_id,
add_notes=add_notes,
ln=ln)
return page(title=_("Loan notes"),
uid=id_user,
req=req,
body=body, language=ln,
navtrail=navtrail_previous_links,
lastupdated=__lastupdated__)
###
### Items and their copies' related .
###
def get_item_details(req, recid, ln=CFG_SITE_LANG):
"""
Display the details of an item.
@type recid: integer.
@param recid: identify the record. It is also the primary key of
the table bibrec.
@return: item details.
"""
id_user = getUid(req)
(auth_code, auth_message) = is_adminuser(req)
if auth_code != 0:
return mustloginpage(req, auth_message)
_ = gettext_set_language(ln)
id_user = 1
infos = []
if recid == None:
infos.append(_("Record id not valid"))
copies = db.get_item_copies_details(recid)
requests = db.get_item_requests(recid)
loans = db.get_item_loans(recid)
purchases = db.get_item_purchases(CFG_BIBCIRCULATION_ACQ_STATUS_NEW, recid)
req_hist_overview = db.get_item_requests_historical_overview(recid)
loans_hist_overview = db.get_item_loans_historical_overview(recid)
purchases_hist_overview = db.get_item_purchases(CFG_BIBCIRCULATION_ACQ_STATUS_RECEIVED, recid)
navtrail_previous_links = '<a class="navtrail" ' \
'href="%s/help/admin">Admin Area' \
'</a>' % (CFG_SITE_SECURE_URL,)
body = bc_templates.tmpl_get_item_details(recid=recid,
copies=copies,
requests=requests,
loans=loans,
purchases=purchases,
req_hist_overview=req_hist_overview,
loans_hist_overview=loans_hist_overview,
purchases_hist_overview=purchases_hist_overview,
infos=infos,
ln=ln)
return page(title=_("Item details"),
uid=id_user,
req=req,
body=body, language=ln,
navtrail=navtrail_previous_links,
lastupdated=__lastupdated__)
def get_item_requests_details(req, recid, request_id, ln=CFG_SITE_LANG):
"""
Display all requests for a specific item.
@type recid: integer.
@param recid: identify the record. It is also the primary key of
the table bibrec.
@type request_id: integer.
@param request_id: identify the hold request. It is also the primary key
of the table crcLOANREQUEST.
@return: Item requests details.
"""
id_user = getUid(req)
(auth_code, auth_message) = is_adminuser(req)
if auth_code != 0:
return mustloginpage(req, auth_message)
_ = gettext_set_language(ln)
if request_id:
db.cancel_request(request_id)
update_request_data(request_id)
result = db.get_item_requests(recid)
navtrail_previous_links = '<a class="navtrail" ' \
'href="%s/help/admin">Admin Area' \
'</a>' % (CFG_SITE_SECURE_URL,)
body = bc_templates.tmpl_get_item_requests_details(result=result,
ln=ln)
return page(title=_("Hold requests") + \
" - %s" % (book_title_from_MARC(recid)),
uid=id_user,
req=req,
body=body, language=ln,
navtrail=navtrail_previous_links,
lastupdated=__lastupdated__)
def get_item_loans_details(req, recid, barcode, loan_id, force,
ln=CFG_SITE_LANG):
"""
Show all the details about all current loans related with a record.
@type recid: integer.
@param recid: identify the record. It is also the primary key of
the table bibrec.
@type barcode: string.
@param barcode: identify the item. It is the primary key of the table
crcITEM.
@type loan_id: integer.
@param loan_id: identify a loan. It is the primery key of the table
crcLOAN.
@type force: string.
@param force: force the renew of a loan, when usually this is not possible.
@return: item loans details.
"""
id_user = getUid(req)
(auth_code, auth_message) = is_adminuser(req)
if auth_code != 0:
return mustloginpage(req, auth_message)
_ = gettext_set_language(ln)
infos = []
if loan_id and barcode and force == 'true':
new_due_date = renew_loan_for_X_days(barcode)
#db.update_due_date(loan_id, new_due_date)
db.renew_loan(loan_id, new_due_date)
update_status_if_expired(loan_id)
infos.append(_("Loan renewed with success."))
elif barcode:
recid = db.get_id_bibrec(barcode)
item_description = db.get_item_description(barcode)
queue = db.get_queue_request(recid, item_description)
new_due_date = renew_loan_for_X_days(barcode)
force_renew_link = create_html_link(CFG_SITE_SECURE_URL +
'/admin2/bibcirculation/get_item_loans_details',
{'barcode': barcode, 'loan_id': loan_id, 'force': 'true',
'recid': recid, 'ln': ln}, (_("Yes")))
no_renew_link = create_html_link(CFG_SITE_SECURE_URL +
'/admin2/bibcirculation/get_item_loans_details',
{'recid': recid, 'ln': ln},
(_("No")))
if len(queue) != 0:
title = book_title_from_MARC(recid)
message = _("Another user is waiting for this book %(x_strong_tag_open)s%(x_title)s%(x_strong_tag_close)s.") % {'x_title': title, 'x_strong_tag_open': '<strong>', 'x_strong_tag_close': '</strong>'}
message += '\n\n'
message += _("Do you want renew this loan anyway?")
message += '\n\n'
message += "[%s] [%s]" % (force_renew_link, no_renew_link)
infos.append(message)
else:
db.renew_loan(loan_id, new_due_date)
#db.update_due_date(loan_id, new_due_date)
update_status_if_expired(loan_id)
infos.append(_("Loan renewed with success."))
result = db.get_item_loans(recid)
navtrail_previous_links = '<a class="navtrail" ' \
'href="%s/help/admin">Admin Area' \
'</a>' % (CFG_SITE_SECURE_URL,)
body = bc_templates.tmpl_get_item_loans_details(result=result,
recid=recid,
infos=infos,
ln=ln)
return page(title=_("Loans details") + \
" - %s" % (book_title_from_MARC(int(recid))),
uid=id_user,
req=req,
body=body, language=ln,
navtrail=navtrail_previous_links,
lastupdated=__lastupdated__)
def get_item_req_historical_overview(req, recid, ln=CFG_SITE_LANG):
"""
Display the requests historical overview of an item.
@type recid: integer.
@param recid: identify the record. It is also the primary key of
the table bibrec.
@return: Item requests - historical overview.
"""
id_user = getUid(req)
(auth_code, auth_message) = is_adminuser(req)
if auth_code != 0:
return mustloginpage(req, auth_message)
_ = gettext_set_language(ln)
req_hist_overview = db.get_item_requests_historical_overview(recid)
navtrail_previous_links = '<a class="navtrail" ' \
'href="%s/help/admin">Admin Area' \
'</a>' % (CFG_SITE_SECURE_URL,)
body = bc_templates.tmpl_get_item_req_historical_overview(
req_hist_overview=req_hist_overview,
ln=ln)
return page(title=_("Requests") + " - " + _("historical overview"),
uid=id_user,
req=req,
body=body, language=ln,
navtrail=navtrail_previous_links,
lastupdated=__lastupdated__)
def get_item_loans_historical_overview(req, recid, ln=CFG_SITE_LANG):
"""
Display the loans historical overview of an item.
@type recid: integer.
@param recid: identify the record. It is also the primary key of
the table bibrec.
@return: Item loans - historical overview.
"""
id_user = getUid(req)
(auth_code, auth_message) = is_adminuser(req)
if auth_code != 0:
return mustloginpage(req, auth_message)
_ = gettext_set_language(ln)
loans_hist_overview = db.get_item_loans_historical_overview(recid)
navtrail_previous_links = '<a class="navtrail" ' \
'href="%s/help/admin">Admin Area' \
'</a>' % (CFG_SITE_SECURE_URL,)
body = bc_templates.tmpl_get_item_loans_historical_overview(
loans_hist_overview=loans_hist_overview,
ln=ln)
return page(title=_("Loans") + " - " + _("historical overview"),
uid=id_user,
req=req,
body=body, language=ln,
navtrail=navtrail_previous_links,
lastupdated=__lastupdated__)
def add_new_copy_step1(req, ln=CFG_SITE_LANG):
"""
Add a new copy.
"""
navtrail_previous_links = '<a class="navtrail"' \
' href="%s/help/admin">Admin Area' \
'</a>' % (CFG_SITE_SECURE_URL,)
id_user = getUid(req)
(auth_code, auth_message) = is_adminuser(req)
if auth_code != 0:
return mustloginpage(req, auth_message)
_ = gettext_set_language(ln)
body = bc_templates.tmpl_add_new_copy_step1(ln)
return page(title=_("Add new copy") + " - I",
uid=id_user,
req=req,
body=body, language=ln,
navtrail=navtrail_previous_links,
lastupdated=__lastupdated__)
def add_new_copy_step2(req, p, f, ln=CFG_SITE_LANG):
"""
Add a new copy.
"""
id_user = getUid(req)
(auth_code, auth_message) = is_adminuser(req)
if auth_code != 0:
return mustloginpage(req, auth_message)
_ = gettext_set_language(ln)
result = perform_request_search(cc="Books", sc="1", p=p, f=f)
navtrail_previous_links = '<a class="navtrail" ' \
'href="%s/help/admin">Admin Area' \
'</a>' % (CFG_SITE_SECURE_URL,)
body = bc_templates.tmpl_add_new_copy_step2(result=result, ln=ln)
return page(title=_("Add new copy") + " - II",
uid=id_user,
req=req,
body=body, language=ln,
navtrail=navtrail_previous_links,
lastupdated=__lastupdated__)
def add_new_copy_step3(req, recid, barcode, ln=CFG_SITE_LANG):
"""
Add a new copy.
"""
id_user = getUid(req)
(auth_code, auth_message) = is_adminuser(req)
if auth_code != 0:
return mustloginpage(req, auth_message)
_ = gettext_set_language(ln)
infos = []
result = db.get_item_copies_details(recid)
libraries = db.get_internal_libraries()
navtrail_previous_links = '<a class="navtrail"' \
' href="%s/help/admin">Admin Area' \
'</a>' % (CFG_SITE_SECURE_URL,)
if barcode is not None:
if not db.barcode_in_use(barcode):
barcode = None
tmp_barcode = generate_tmp_barcode()
body = bc_templates.tmpl_add_new_copy_step3(recid=recid,
result=result,
libraries=libraries,
original_copy_barcode=barcode,
tmp_barcode=tmp_barcode,
infos=infos,
ln=ln)
return page(title=_("Add new copy") + " - III",
uid=id_user,
req=req,
body=body, language=ln,
navtrail=navtrail_previous_links,
lastupdated=__lastupdated__)
def add_new_copy_step4(req, barcode, library, location, collection, description,
loan_period, status, expected_arrival_date, recid,
ln=CFG_SITE_LANG):
"""
Add a new copy.
"""
id_user = getUid(req)
(auth_code, auth_message) = is_adminuser(req)
if auth_code != 0:
return mustloginpage(req, auth_message)
_ = gettext_set_language(ln)
navtrail_previous_links = '<a class="navtrail"' \
' href="%s/help/admin">Admin Area' \
'</a>' % (CFG_SITE_SECURE_URL,)
infos = []
result = db.get_item_copies_details(recid)
libraries = db.get_internal_libraries()
if db.barcode_in_use(barcode):
infos.append(_("The given barcode <strong>%s</strong> is already in use." % barcode))
title = _("Add new copy") + " - III"
body = bc_templates.tmpl_add_new_copy_step3(recid=recid,
result=result,
libraries=libraries,
original_copy_barcode=None,
tmp_barcode=None,
infos=infos,
ln=ln)
elif not barcode:
infos.append(_("The given barcode is empty."))
title = _("Add new copy") + " - III"
body = bc_templates.tmpl_add_new_copy_step3(recid=recid,
result=result,
libraries=libraries,
original_copy_barcode=None,
tmp_barcode=None,
infos=infos,
ln=ln)
elif barcode[:3] == 'tmp' \
and status in [CFG_BIBCIRCULATION_ITEM_STATUS_ON_SHELF,
CFG_BIBCIRCULATION_ITEM_STATUS_ON_LOAN,
CFG_BIBCIRCULATION_ITEM_STATUS_IN_PROCESS]:
infos.append(_("The status selected does not accept tamporary barcodes."))
title = _("Add new copy") + " - III"
tmp_barcode = generate_tmp_barcode()
body = bc_templates.tmpl_add_new_copy_step3(recid=recid,
result=result,
libraries=libraries,
original_copy_barcode=None,
tmp_barcode=tmp_barcode,
infos=infos,
ln=ln)
else:
library_name = db.get_library_name(library)
tup_infos = (barcode, library, library_name, location, collection,
description, loan_period, status, expected_arrival_date,
recid)
title = _("Add new copy") + " - IV"
body = bc_templates.tmpl_add_new_copy_step4(tup_infos=tup_infos, ln=ln)
return page(title=title,
uid=id_user,
req=req,
body=body,
metaheaderadd='<link rel="stylesheet" href="%s/img/jquery-ui.css" '\
'type="text/css" />' % CFG_SITE_SECURE_URL,
language=ln,
navtrail=navtrail_previous_links,
lastupdated=__lastupdated__)
def add_new_copy_step5(req, barcode, library, location, collection, description,
loan_period, status, expected_arrival_date, recid,
ln=CFG_SITE_LANG):
"""
Add a new copy.
"""
id_user = getUid(req)
(auth_code, auth_message) = is_adminuser(req)
if auth_code != 0:
return mustloginpage(req, auth_message)
_ = gettext_set_language(ln)
infos = []
if not db.barcode_in_use(barcode):
db.add_new_copy(barcode, recid, library, collection, location, description.strip() or '-',
loan_period, status, expected_arrival_date)
update_requests_statuses(barcode)
else:
infos.append(_("The given barcode <strong>%s</strong> is already in use.") % barcode)
navtrail_previous_links = '<a class="navtrail" ' \
'href="%s/help/admin">Admin Area' \
'</a>' % (CFG_SITE_SECURE_URL,)
body = bc_templates.tmpl_add_new_copy_step5(infos=infos, recid=recid, ln=ln)
return page(title=_("Add new copy") + " - V",
uid=id_user,
req=req,
body=body, language=ln,
navtrail=navtrail_previous_links,
lastupdated=__lastupdated__)
def delete_copy_step1(req, barcode, ln):
#id_user = getUid(req)
(auth_code, auth_message) = is_adminuser(req)
if auth_code != 0:
return mustloginpage(req, auth_message)
_ = gettext_set_language(ln)
infos = []
navtrail_previous_links = '<a class="navtrail" ' \
'href="%s/help/admin">Admin Area' \
'</a>' % (CFG_SITE_SECURE_URL,)
barcode = barcode.strip('\'" \t')
recid = db.get_id_bibrec(barcode)
if recid:
#recid = recid[0]
infos.append(_("Do you really want to delete this copy of the book?"))
copies = db.get_item_copies_details(recid)
title = _("Delete copy")
body = bc_templates.tmpl_delete_copy_step1(barcode_to_delete=barcode,
recid=recid,
result=copies,
infos=infos,
ln=ln)
else:
message = _("""The barcode <strong>%s</strong> was not found""") % (barcode)
infos.append(message)
title = _("Item search")
body = bc_templates.tmpl_item_search(infos=infos, ln=ln)
return page(title=title,
req=req,
body=body, language=ln,
navtrail=navtrail_previous_links,
lastupdated=__lastupdated__)
def delete_copy_step2(req, barcode, ln):
#id_user = getUid(req)
(auth_code, auth_message) = is_adminuser(req)
if auth_code != 0:
return mustloginpage(req, auth_message)
_ = gettext_set_language(ln)
infos = []
barcode = barcode.strip('\'" \t')
recid = db.get_id_bibrec(barcode)
if recid:
#recid = recid[0]
if db.delete_copy(barcode)==1:
message = _("The copy with barcode <strong>%s</strong> has been deleted.") % (barcode)
else:
message = _('It was NOT possible to delete the copy with barcode <strong>%s</strong>') % (barcode)
infos.append(message)
copies = db.get_item_copies_details(recid)
requests = db.get_item_requests(recid)
loans = db.get_item_loans(recid)
purchases = db.get_item_purchases(CFG_BIBCIRCULATION_ACQ_STATUS_NEW, recid)
req_hist_overview = db.get_item_requests_historical_overview(recid)
loans_hist_overview = db.get_item_loans_historical_overview(recid)
purchases_hist_overview = db.get_item_purchases(CFG_BIBCIRCULATION_ACQ_STATUS_RECEIVED, recid)
title = _("Item details")
body = bc_templates.tmpl_get_item_details(
recid=recid, copies=copies,
requests=requests, loans=loans,
purchases=purchases,
req_hist_overview=req_hist_overview,
loans_hist_overview=loans_hist_overview,
purchases_hist_overview=purchases_hist_overview,
infos=infos, ln=ln)
else:
message = _("The barcode <strong>%s</strong> was not found") % (barcode)
infos.append(message)
title = _("Item search")
body = bc_templates.tmpl_item_search(infos=infos, ln=ln)
navtrail_previous_links = '<a class="navtrail" ' \
'href="%s/help/admin">Admin Area' \
'</a>' % (CFG_SITE_SECURE_URL,)
return page(title=title,
req=req,
body=body, language=ln,
navtrail=navtrail_previous_links,
lastupdated=__lastupdated__)
def update_item_info_step1(req, ln=CFG_SITE_LANG):
"""
Update the item's information.
"""
navtrail_previous_links = '<a class="navtrail"' \
' href="%s/help/admin">Admin Area' \
'</a>' % (CFG_SITE_SECURE_URL,)
id_user = getUid(req)
(auth_code, auth_message) = is_adminuser(req)
if auth_code != 0:
return mustloginpage(req, auth_message)
_ = gettext_set_language(ln)
body = bc_templates.tmpl_update_item_info_step1(ln=ln)
return page(title=_("Update item information"),
uid=id_user,
req=req,
body=body, language=ln,
navtrail=navtrail_previous_links,
lastupdated=__lastupdated__)
def update_item_info_step2(req, p, f, ln=CFG_SITE_LANG):
"""
Update the item's information.
"""
navtrail_previous_links = '<a class="navtrail" ' \
'href="%s/help/admin">Admin Area' \
'</a>' % (CFG_SITE_SECURE_URL,)
id_user = getUid(req)
(auth_code, auth_message) = is_adminuser(req)
if auth_code != 0:
return mustloginpage(req, auth_message)
_ = gettext_set_language(ln)
result = perform_request_search(cc="Books", sc="1", p=p, f=f)
body = bc_templates.tmpl_update_item_info_step2(result=result, ln=ln)
return page(title="Update item information",
uid=id_user,
req=req,
body=body, language=ln,
navtrail=navtrail_previous_links,
lastupdated=__lastupdated__)
def update_item_info_step3(req, recid, ln=CFG_SITE_LANG):
"""
Update the item's information.
"""
navtrail_previous_links = '<a class="navtrail"' \
' href="%s/help/admin">Admin Area' \
'</a>' % (CFG_SITE_SECURE_URL,)
id_user = getUid(req)
(auth_code, auth_message) = is_adminuser(req)
if auth_code != 0:
return mustloginpage(req, auth_message)
_ = gettext_set_language(ln)
result = db.get_item_copies_details(recid)
body = bc_templates.tmpl_update_item_info_step3(recid=recid, result=result,
ln=ln)
return page(title=_("Update item information"),
uid=id_user,
req=req,
body=body,
language=ln,
navtrail=navtrail_previous_links,
lastupdated=__lastupdated__)
def update_item_info_step4(req, barcode, ln=CFG_SITE_LANG):
"""
Update the item's information.
"""
id_user = getUid(req)
(auth_code, auth_message) = is_adminuser(req)
if auth_code != 0:
return mustloginpage(req, auth_message)
_ = gettext_set_language(ln)
recid = db.get_id_bibrec(barcode)
result = db.get_item_info(barcode)
libraries = db.get_internal_libraries()
libraries += db.get_hidden_libraries()
navtrail_previous_links = '<a class="navtrail" ' \
'href="%s/help/admin">Admin Area' \
'</a>' % (CFG_SITE_SECURE_URL,)
if recid == None:
_ = gettext_set_language(ln)
infos = []
infos.append(_("Barcode <strong>%s</strong> not found" % barcode))
return item_search(req, infos, ln)
body = bc_templates.tmpl_update_item_info_step4(recid=recid,
result=result,
libraries=libraries,
ln=ln)
return page(title=_("Update item information"),
uid=id_user,
req=req,
body=body,
language=ln,
navtrail=navtrail_previous_links,
lastupdated=__lastupdated__)
def update_item_info_step5(req, barcode, old_barcode, library, location,
collection, description, loan_period, status,
expected_arrival_date, recid, ln=CFG_SITE_LANG):
"""
Update the item's information.
"""
id_user = getUid(req)
(auth_code, auth_message) = is_adminuser(req)
if auth_code != 0:
return mustloginpage(req, auth_message)
_ = gettext_set_language(ln)
library_name = db.get_library_name(library)
tup_infos = (barcode, old_barcode, library, library_name, location,
collection, description, loan_period, status,
expected_arrival_date, recid)
navtrail_previous_links = '<a class="navtrail"' \
' href="%s/help/admin">Admin Area' \
'</a>' % (CFG_SITE_SECURE_URL,)
body = bc_templates.tmpl_update_item_info_step5(tup_infos=tup_infos, ln=ln)
return page(title=_("Update item information"),
uid=id_user,
req=req,
body=body, language=ln,
navtrail=navtrail_previous_links,
lastupdated=__lastupdated__)
def update_item_info_step6(req, tup_infos, ln=CFG_SITE_LANG):
"""
Update the item's information.
"""
id_user = getUid(req)
(auth_code, auth_message) = is_adminuser(req)
if auth_code != 0:
return mustloginpage(req, auth_message)
_ = gettext_set_language(ln)
infos = []
# tuple containing information for the update process.
(barcode, old_barcode, library_id, location, collection,
description, loan_period, status, expected_arrival_date, recid) = tup_infos
is_on_loan = db.is_on_loan(old_barcode)
#is_requested = db.is_requested(old_barcode)
# if item on loan and new status is CFG_BIBCIRCULATION_ITEM_STATUS_ON_SHELF,
# item has to be returned.
if is_on_loan and status == CFG_BIBCIRCULATION_ITEM_STATUS_ON_SHELF:
db.update_item_status(CFG_BIBCIRCULATION_ITEM_STATUS_ON_SHELF, old_barcode)
db.return_loan(old_barcode)
if not is_on_loan and status == CFG_BIBCIRCULATION_ITEM_STATUS_ON_LOAN:
status = db.get_copy_details(barcode)[7]
infos.append(_("Item <strong>[%s]</strong> updated, but the <strong>status was not modified</strong>.") % (old_barcode))
# update item information.
db.update_item_info(old_barcode, library_id, collection, location, description.strip(),
loan_period, status, expected_arrival_date)
update_requests_statuses(old_barcode)
navtrail_previous_links = '<a class="navtrail"' \
'href="%s/help/admin">Admin Area' \
'</a>' % (CFG_SITE_SECURE_URL,)
if barcode != old_barcode:
if db.barcode_in_use(barcode):
infos.append(_("Item <strong>[%s]</strong> updated, but the <strong>barcode was not modified</strong> because it is already in use.") % (old_barcode))
else:
if db.update_barcode(old_barcode, barcode):
infos.append(_("Item <strong>[%s]</strong> updated to <strong>[%s]</strong> with success.") % (old_barcode, barcode))
else:
infos.append(_("Item <strong>[%s]</strong> updated, but the <strong>barcode was not modified</strong> because it was not found (!?).") % (old_barcode))
copies = db.get_item_copies_details(recid)
requests = db.get_item_requests(recid)
loans = db.get_item_loans(recid)
purchases = db.get_item_purchases(CFG_BIBCIRCULATION_ACQ_STATUS_NEW, recid)
req_hist_overview = db.get_item_requests_historical_overview(recid)
loans_hist_overview = db.get_item_loans_historical_overview(recid)
purchases_hist_overview = db.get_item_purchases(CFG_BIBCIRCULATION_ACQ_STATUS_RECEIVED, recid)
body = bc_templates.tmpl_get_item_details(recid=recid,
copies=copies,
requests=requests,
loans=loans,
purchases=purchases,
req_hist_overview=req_hist_overview,
loans_hist_overview=loans_hist_overview,
purchases_hist_overview=purchases_hist_overview,
infos=infos,
ln=ln)
return page(title=_("Update item information"),
uid=id_user,
req=req,
body=body, language=ln,
navtrail=navtrail_previous_links,
lastupdated=__lastupdated__)
else:
return redirect_to_url(req, CFG_SITE_SECURE_URL +
"/record/edit/#state=edit&recid=" + str(recid))
def item_search(req, infos=[], ln=CFG_SITE_LANG):
"""
Display a form where is possible to searh for an item.
"""
navtrail_previous_links = '<a class="navtrail" ' \
'href="%s/help/admin">Admin Area' \
'</a> &gt; <a class="navtrail" ' \
'href="%s/admin2/bibcirculation/loan_on_desk_step1?ln=%s">' \
'Circulation Management' \
'</a> ' % (CFG_SITE_SECURE_URL, CFG_SITE_SECURE_URL, ln)
_ = gettext_set_language(ln)
id_user = getUid(req)
(auth_code, auth_message) = is_adminuser(req)
if auth_code != 0:
return mustloginpage(req, auth_message)
body = bc_templates.tmpl_item_search(infos=infos, ln=ln)
return page(title=_("Item search"),
uid=id_user,
req=req,
body=body, language=ln,
navtrail=navtrail_previous_links,
lastupdated=__lastupdated__)
def item_search_result(req, p, f, ln=CFG_SITE_LANG):
"""
Search an item and return a list with all the possible results. To retrieve
the information desired, we use the method 'perform_request_search' (from
search_engine.py). In the case of BibCirculation, we are just looking for
books (items) inside the collection 'Books'.
@type p: string
@param p: search pattern
@type f: string
@param f: search field
"""
id_user = getUid(req)
(auth_code, auth_message) = is_adminuser(req)
if auth_code != 0:
return mustloginpage(req, auth_message)
_ = gettext_set_language(ln)
infos = []
if p == '':
infos.append(_('Empty string.') + ' ' + _('Please, try again.'))
return item_search(req, infos, ln)
if f == 'barcode':
p = p.strip('\'" \t')
recid = db.get_id_bibrec(p)
if recid is None:
infos.append(_('The barcode %(x_strong_tag_open)s%(x_barcode)s%(x_strong_tag_close)s does not exist on BibCirculation database.') % {'x_barcode': p, 'x_strong_tag_open': '<strong>', 'x_strong_tag_close': '</strong>'})
body = bc_templates.tmpl_item_search(infos=infos, ln=ln)
else:
return get_item_details(req, recid, ln=ln)
elif f == 'recid':
p = p.strip('\'" \t')
recid = p
if not record_exists(recid):
infos.append(_("Requested record does not seem to exist."))
body = bc_templates.tmpl_item_search(infos=infos, ln=ln)
else:
return get_item_details(req, recid, ln=ln)
else:
result = perform_request_search(cc="Books", sc="1", p=p, f=f)
body = bc_templates.tmpl_item_search_result(result=result, ln=ln)
navtrail_previous_links = '<a class="navtrail" ' \
'href="%s/help/admin">Admin Area' \
'</a> &gt; <a class="navtrail" ' \
'href="%s/admin2/bibcirculation/loan_on_desk_step1?ln=%s">' \
'Circulation Management' \
'</a> ' % (CFG_SITE_SECURE_URL, CFG_SITE_SECURE_URL, ln)
return page(title=_("Item search result"),
uid=id_user,
req=req,
body=body, language=ln,
navtrail=navtrail_previous_links,
lastupdated=__lastupdated__)
###
### "Borrower" related templates
###
def get_borrower_details(req, borrower_id, update, ln=CFG_SITE_LANG):
"""
Display the details of a borrower.
@type borrower_id: integer.
@param borrower_id: identify the borrower. It is also the primary key of
the table crcBORROWER.
"""
id_user = getUid(req)
(auth_code, auth_message) = is_adminuser(req)
if auth_code != 0:
return mustloginpage(req, auth_message)
_ = gettext_set_language(ln)
if update and CFG_CERN_SITE:
update_user_info_from_ldap(borrower_id)
borrower = db.get_borrower_details(borrower_id)
if borrower == None:
info = _('Borrower not found.') + ' ' + _('Please, try again.')
return borrower_search(req, info, False, ln)
else:
requests = db.get_borrower_request_details(borrower_id)
loans = db.get_borrower_loan_details(borrower_id)
notes = db.get_borrower_notes(borrower_id)
ill = db.get_ill_requests_details(borrower_id)
proposals = db.get_proposal_requests_details(borrower_id)
req_hist = db.bor_requests_historical_overview(borrower_id)
loans_hist = db.bor_loans_historical_overview(borrower_id)
ill_hist = db.bor_ill_historical_overview(borrower_id)
proposal_hist = db.bor_proposal_historical_overview(borrower_id)
navtrail_previous_links = '<a class="navtrail" ' \
'href="%s/help/admin">Admin Area' \
'</a>' % (CFG_SITE_SECURE_URL,)
body = bc_templates.tmpl_borrower_details(borrower=borrower,
requests=requests,
loans=loans,
notes=notes,
ill=ill,
proposals=proposals,
req_hist=req_hist,
loans_hist=loans_hist,
ill_hist=ill_hist,
proposal_hist=proposal_hist,
ln=ln)
return page(title=_("Borrower details"),
uid=id_user,
req=req,
body=body, language=ln,
navtrail=navtrail_previous_links,
lastupdated=__lastupdated__)
def add_new_borrower_step1(req, ln=CFG_SITE_LANG):
"""
Add new borrower. Step 1
"""
navtrail_previous_links = '<a class="navtrail" ' \
'href="%s/help/admin">Admin Area' \
'</a>' % (CFG_SITE_SECURE_URL,)
id_user = getUid(req)
(auth_code, auth_message) = is_adminuser(req)
if auth_code != 0:
return mustloginpage(req, auth_message)
_ = gettext_set_language(ln)
body = bc_templates.tmpl_add_new_borrower_step1(ln=ln)
return page(title=_("Add new borrower") + " - I",
uid=id_user,
req=req,
body=body, language=ln,
navtrail=navtrail_previous_links,
lastupdated=__lastupdated__)
def add_new_borrower_step2(req, name, email, phone, address, mailbox,
notes, ln=CFG_SITE_LANG):
"""
Add new borrower. Step 2.
@type name: string.
@type email: string.
@type phone: string.
@type address: string.
@type mailbox: string.
@type notes: string.
"""
id_user = getUid(req)
(auth_code, auth_message) = is_adminuser(req)
if auth_code != 0:
return mustloginpage(req, auth_message)
_ = gettext_set_language(ln)
infos = []
if name == '':
infos.append(_("Please, insert a name"))
if email == '':
infos.append(_("Please, insert a valid email address"))
else:
borrower_id = db.get_borrower_id_by_email(email)
if borrower_id is not None:
infos.append(_("There is already a borrower using the following email:")
+ " <strong>%s</strong>" % (email))
tup_infos = (name, email, phone, address, mailbox, notes)
navtrail_previous_links = '<a class="navtrail" ' \
'href="%s/help/admin">Admin Area' \
'</a>' % (CFG_SITE_SECURE_URL,)
if len(infos) > 0:
body = bc_templates.tmpl_add_new_borrower_step1(tup_infos=tup_infos,
infos=infos, ln=ln)
title = _("Add new borrower") + " - I"
else:
if notes != '':
borrower_notes = {}
note_time = time.strftime("%Y-%m-%d %H:%M:%S")
borrower_notes[note_time] = notes
else:
borrower_notes = ''
borrower_id = db.new_borrower(None, name, email, phone,
address, mailbox, borrower_notes)
return redirect_to_url(req,
'%s/admin2/bibcirculation/get_borrower_details?ln=%s&borrower_id=%s' \
% (CFG_SITE_SECURE_URL, ln, borrower_id))
#body = bc_templates.tmpl_add_new_borrower_step2(tup_infos=tup_infos,
# infos=infos, ln=ln)
#title = _("Add new borrower") + " - II"
return page(title=title,
uid=id_user,
req=req,
body=body,
language=ln,
navtrail=navtrail_previous_links,
lastupdated=__lastupdated__)
def add_new_borrower_step3(req, tup_infos, ln=CFG_SITE_LANG):
"""
Add new borrower. Step 3.
@type tup_infos: tuple.
@param tup_infos: tuple containing borrower information.
"""
id_user = getUid(req)
(auth_code, auth_message) = is_adminuser(req)
if auth_code != 0:
return mustloginpage(req, auth_message)
_ = gettext_set_language(ln)
if tup_infos[5] != '':
borrower_notes = {}
note_time = time.strftime("%Y-%m-%d %H:%M:%S")
borrower_notes[note_time] = str(tup_infos[5])
else:
borrower_notes = ''
db.new_borrower(None, tup_infos[0], tup_infos[1], tup_infos[2],
tup_infos[3], tup_infos[4], str(borrower_notes))
navtrail_previous_links = '<a class="navtrail" ' \
'href="%s/help/admin">Admin Area' \
'</a>' % (CFG_SITE_SECURE_URL,)
body = bc_templates.tmpl_add_new_borrower_step3(ln=ln)
return page(title=_("Add new borrower") + " - III",
uid=id_user,
req=req,
body=body, language=ln,
navtrail=navtrail_previous_links,
lastupdated=__lastupdated__)
def update_borrower_info_step1(req, borrower_id, ln=CFG_SITE_LANG):
"""
Update the borrower's information.
@param borrower_id: identify the borrower. It is also the primary key of
the table crcBORROWER.
"""
navtrail_previous_links = '<a class="navtrail"' \
' href="%s/help/admin">Admin Area' \
'</a>' % (CFG_SITE_SECURE_URL,)
id_user = getUid(req)
(auth_code, auth_message) = is_adminuser(req)
if auth_code != 0:
return mustloginpage(req, auth_message)
_ = gettext_set_language(ln)
borrower_details = db.get_borrower_details(borrower_id)
tup_infos = (borrower_details[0], borrower_details[2], borrower_details[3],
borrower_details[4], borrower_details[5], borrower_details[6])
body = bc_templates.tmpl_update_borrower_info_step1(tup_infos=tup_infos,
ln=ln)
return page(title=_("Update borrower information"),
uid=id_user,
req=req,
body=body,
language=ln,
navtrail=navtrail_previous_links,
lastupdated=__lastupdated__)
def update_borrower_info_step2(req, borrower_id, name, email, phone, address,
mailbox, ln=CFG_SITE_LANG):
"""
Update the borrower's information.
"""
id_user = getUid(req)
(auth_code, auth_message) = is_adminuser(req)
if auth_code != 0:
return mustloginpage(req, auth_message)
_ = gettext_set_language(ln)
infos = []
if name == '':
infos.append(_("Please, insert a name"))
if email == '':
infos.append(_("Please, insert a valid email address"))
else:
borrower_email_id = db.get_borrower_id_by_email(email)
if borrower_email_id is not None and borrower_id != borrower_email_id:
infos.append(_("There is already a borrower using the following email:")
+ " <strong>%s</strong>" % (email))
tup_infos = (borrower_id, name, email, phone, address, mailbox)
navtrail_previous_links = '<a class="navtrail" ' \
'href="%s/help/admin">Admin Area' \
'</a>' % (CFG_SITE_SECURE_URL,)
if len(infos) > 0:
body = bc_templates.tmpl_update_borrower_info_step1(tup_infos=tup_infos,
infos=infos, ln=ln)
else:
db.update_borrower_info(borrower_id, name, email,
phone, address, mailbox)
return redirect_to_url(req,
'%s/admin2/bibcirculation/get_borrower_details?ln=%s&borrower_id=%s' \
% (CFG_SITE_SECURE_URL, ln, borrower_id))
return page(title=_("Update borrower information"),
uid=id_user,
req=req,
body=body,
language=ln,
navtrail=navtrail_previous_links,
lastupdated=__lastupdated__)
def get_borrower_requests_details(req, borrower_id, request_id,
ln=CFG_SITE_LANG):
"""
Display loans details of a borrower.
@type borrower_id: integer.
@param borrower_id: identify the borrower. It is also the primary key of
the table crcBORROWER.
@type request_id: integer.
@param request_id: identify the hold request to be cancelled
@return: borrower requests details.
"""
id_user = getUid(req)
(auth_code, auth_message) = is_adminuser(req)
if auth_code != 0:
return mustloginpage(req, auth_message)
_ = gettext_set_language(ln)
if request_id:
db.cancel_request(request_id)
update_request_data(request_id)
result = db.get_borrower_request_details(borrower_id)
navtrail_previous_links = '<a class="navtrail" ' \
'href="%s/help/admin">Admin Area' \
'</a>' % (CFG_SITE_SECURE_URL,)
name = db.get_borrower_name(borrower_id)
title = _("Hold requests details") + " - %s" % (name)
body = bc_templates.tmpl_borrower_request_details(result=result,
borrower_id=borrower_id,
ln=ln)
return page(title=title,
uid=id_user,
req=req,
body=body, language=ln,
navtrail=navtrail_previous_links,
lastupdated=__lastupdated__)
def get_borrower_loans_details(req, recid, barcode, borrower_id,
renewal, force, loan_id, ln=CFG_SITE_LANG):
"""
Show borrower's loans details.
@type recid: integer.
@param recid: identify the record. It is also the primary key of
the table bibrec.
@type barcode: string.
@param barcode: identify the item. It is the primary key of the table
crcITEM.
@type borrower_id: integer.
@param borrower_id: identify the borrower. It is also the primary key of
the table crcBORROWER.
@type renewal: string.
@param renewal: renew all loans.
@type force: string.
@param force: force the renew of a loan, when usually this is not possible.
@type loan_id: integer.
@param loan_id: identify a loan. It is the primery key of the table
crcLOAN.
@return: borrower loans details.
"""
id_user = getUid(req)
(auth_code, auth_message) = is_adminuser(req)
if auth_code != 0:
return mustloginpage(req, auth_message)
_ = gettext_set_language(ln)
infos = []
force_renew_link = create_html_link(CFG_SITE_SECURE_URL +
'/admin2/bibcirculation/get_borrower_loans_details',
{'barcode': barcode, 'borrower_id': borrower_id,
'loan_id': loan_id, 'force': 'true', 'ln': ln},
(_("Yes")))
no_renew_link = create_html_link(CFG_SITE_SECURE_URL +
'/admin2/bibcirculation/get_borrower_loans_details',
{'borrower_id': borrower_id, 'ln': ln},
(_("No")))
if barcode and loan_id and recid:
item_description = db.get_item_description(barcode)
queue = db.get_queue_request(recid, item_description)
new_due_date = renew_loan_for_X_days(barcode)
if len(queue) != 0:
title = book_title_from_MARC(recid)
message = _("Another user is waiting for this book %(x_strong_tag_open)s%(x_title)s%(x_strong_tag_close)s.") % {'x_title': title, 'x_strong_tag_open': '<strong>', 'x_strong_tag_close': '</strong>'}
message += '\n\n'
message += _("Do you want renew this loan anyway?")
message += '\n\n'
message += "[%s] [%s]" % (force_renew_link, no_renew_link)
infos.append(message)
else:
#db.update_due_date(loan_id, new_due_date)
db.renew_loan(loan_id, new_due_date)
#update_status_if_expired(loan_id)
infos.append(_("Loan renewed with success."))
elif loan_id and barcode and force == 'true':
new_due_date = renew_loan_for_X_days(barcode)
db.renew_loan(loan_id, new_due_date)
update_status_if_expired(loan_id)
infos.append(_("Loan renewed with success."))
elif borrower_id and renewal=='true':
list_of_loans = db.get_recid_borrower_loans(borrower_id)
for (loan_id, recid, barcode) in list_of_loans:
item_description = db.get_item_description(barcode)
queue = db.get_queue_request(recid, item_description)
new_due_date = renew_loan_for_X_days(barcode)
force_renewall_link = create_html_link(CFG_SITE_SECURE_URL +
'/admin2/bibcirculation/get_borrower_loans_details',
{'barcode': barcode, 'borrower_id': borrower_id,
'loan_id': loan_id, 'force': 'true', 'ln': ln},
(_("Yes")))
if len(queue) != 0:
title = book_title_from_MARC(recid)
message = _("Another user is waiting for this book %(x_strong_tag_open)s%(x_title)s%(x_strong_tag_close)s.") % {'x_title': title, 'x_strong_tag_open': '<strong>', 'x_strong_tag_close': '</strong>'}
message += '\n\n'
message += _("Do you want renew this loan anyway?")
message += '\n\n'
message += "[%s] [%s]" % (force_renewall_link, no_renew_link)
infos.append(message)
else:
db.renew_loan(loan_id, new_due_date)
update_status_if_expired(loan_id)
if infos == []:
infos.append(_("All loans renewed with success."))
borrower_loans = db.get_borrower_loan_details(borrower_id)
navtrail_previous_links = '<a class="navtrail" ' \
'href="%s/help/admin">Admin Area' \
'</a>' % (CFG_SITE_SECURE_URL,)
body = bc_templates.tmpl_borrower_loans_details(
borrower_loans=borrower_loans,
borrower_id=borrower_id,
infos=infos, ln=ln)
return page(title=_("Loans details") + \
" - %s" %(db.get_borrower_name(borrower_id)),
uid=id_user,
req=req,
body=body, language=ln,
navtrail=navtrail_previous_links,
lastupdated=__lastupdated__)
def bor_loans_historical_overview(req, borrower_id, ln=CFG_SITE_LANG):
"""
Display the loans historical overview of a borrower.
@type borrower_id: integer.
@param borrower_id: identify the borrower. It is also the primary key of
the table crcBORROWER.
@return: borrower loans - historical overview.
"""
id_user = getUid(req)
(auth_code, auth_message) = is_adminuser(req)
if auth_code != 0:
return mustloginpage(req, auth_message)
_ = gettext_set_language(ln)
loans_hist_overview = db.bor_loans_historical_overview(borrower_id)
navtrail_previous_links = '<a class="navtrail" ' \
'href="%s/help/admin">Admin Area' \
'</a>' % (CFG_SITE_SECURE_URL,)
body = bc_templates.tmpl_bor_loans_historical_overview(
loans_hist_overview = loans_hist_overview,
ln=ln)
return page(title=_("Loans") + " - " + _("historical overview"),
uid=id_user,
req=req,
body=body, language=ln,
navtrail=navtrail_previous_links,
lastupdated=__lastupdated__)
def bor_requests_historical_overview(req, borrower_id, ln=CFG_SITE_LANG):
"""
Display the requests historical overview of a borrower.
@type borrower_id: integer.
@param borrower_id: identify the borrower. It is also the primary key of
the table crcBORROWER.
@return: borrower requests - historical overview.
"""
id_user = getUid(req)
(auth_code, auth_message) = is_adminuser(req)
if auth_code != 0:
return mustloginpage(req, auth_message)
_ = gettext_set_language(ln)
req_hist_overview = db.bor_requests_historical_overview(borrower_id)
navtrail_previous_links = '<a class="navtrail" ' \
'href="%s/help/admin">Admin Area' \
'</a>' % (CFG_SITE_SECURE_URL,)
body = bc_templates.tmpl_bor_requests_historical_overview(
req_hist_overview = req_hist_overview,
ln=ln)
return page(title=_("Requests") + " - " + _("historical overview"),
uid=id_user,
req=req,
body=body, language=ln,
navtrail=navtrail_previous_links,
lastupdated=__lastupdated__)
def get_borrower_ill_details(req, borrower_id, request_type='', ln=CFG_SITE_LANG):
"""
Display ILL details of a borrower.
@type borrower_id: integer.
@param borrower_id: identify the borrower. It is also the primary key of
the table crcBORROWER.
@type ill_id: integer.
@param ill_id: identify the ILL request. It is also the primary key
of the table crcILLREQUEST.
@return: borrower ILL details.
"""
id_user = getUid(req)
(auth_code, auth_message) = is_adminuser(req)
if auth_code != 0:
return mustloginpage(req, auth_message)
_ = gettext_set_language(ln)
if request_type == 'proposal-book':
result = db.get_proposal_requests_details(borrower_id)
else:
result = db.get_ill_requests_details(borrower_id)
navtrail_previous_links = '<a class="navtrail" ' \
'href="%s/help/admin">Admin Area' \
'</a>' % (CFG_SITE_SECURE_URL,)
name = db.get_borrower_name(borrower_id)
title = _("ILL details") + "- %s" % (name)
body = bc_templates.tmpl_borrower_ill_details(result=result,
borrower_id=borrower_id,
ln=ln)
return page(title=title,
uid=id_user,
req=req,
body=body, language=ln,
navtrail=navtrail_previous_links,
lastupdated=__lastupdated__)
def bor_ill_historical_overview(req, borrower_id, request_type='', ln=CFG_SITE_LANG):
id_user = getUid(req)
(auth_code, auth_message) = is_adminuser(req)
if auth_code != 0:
return mustloginpage(req, auth_message)
_ = gettext_set_language(ln)
if request_type == 'proposal-book':
result = db.bor_proposal_historical_overview(borrower_id)
else:
result = db.bor_ill_historical_overview(borrower_id)
navtrail_previous_links = '<a class="navtrail" ' \
'href="%s/help/admin">Admin Area' \
'</a>' % (CFG_SITE_SECURE_URL,)
name = db.get_borrower_name(borrower_id)
title = _("ILL historical overview") + " - %s" % (name)
body = bc_templates.tmpl_borrower_ill_details(result=result,
borrower_id=borrower_id,
ln=ln)
return page(title=title,
uid=id_user,
req=req,
body=body, language=ln,
navtrail=navtrail_previous_links,
lastupdated=__lastupdated__)
def borrower_notification(req, borrower_id, template, message, load_msg_template,
subject, send_message, from_address, ln=CFG_SITE_LANG):
"""
Send an email to a borrower or simply load and display an editable email
template.
@type borrower_id: integer.
@param borrower_id: identify the borrower. It is also the primary key of
the table crcBORROWER.
@type borrower_email: string.
@param borrower_email: The librarian can change the email manually.
In that case, this value will be taken instead
of the that in borrower details.
@type template: string.
@param template: The name of the notification template to be loaded.
If the @param load_msg_template holds True, the
template is not loaded.
@type message: string.
@param message: Message to be sent if the flag @param send_message is set.
@type subject: string.
@param subject: Subject of the message.
@type from_address: string.
@param from_address: From address in the message sent.
@return: Display the email template or send an email to a borrower.
"""
id_user = getUid(req)
(auth_code, auth_message) = is_adminuser(req)
if auth_code != 0:
return mustloginpage(req, auth_message)
email = db.get_borrower_email(borrower_id)
if load_msg_template == 'False' and template is not None:
# Do not load the template. It is the email body itself.
body = bc_templates.tmpl_borrower_notification(email=email,
subject=subject,
email_body=template,
borrower_id=borrower_id,
from_address=from_address,
ln=ln)
elif send_message:
send_email(fromaddr = from_address,
toaddr = email,
subject = subject,
content = message,
header = '',
footer = '',
attempt_times = 1,
attempt_sleeptime = 10
)
body = bc_templates.tmpl_send_notification(ln=ln)
else:
show_template = load_template(template)
body = bc_templates.tmpl_borrower_notification(email=email,
subject=subject,
email_body=show_template,
borrower_id=borrower_id,
from_address=from_address,
ln=ln)
navtrail_previous_links = '<a class="navtrail" ' \
'href="%s/help/admin">Admin Area' \
'</a> &gt; <a class="navtrail" ' \
'href="%s/admin2/bibcirculation/loan_on_desk_step1?ln=%s">' \
'Circulation Management' \
'</a> ' % (CFG_SITE_SECURE_URL, CFG_SITE_SECURE_URL, ln)
return page(title="User Notification",
uid=id_user,
req=req,
body=body, language=ln,
navtrail=navtrail_previous_links,
lastupdated=__lastupdated__)
def get_borrower_notes(req, borrower_id, delete_key, library_notes,
ln=CFG_SITE_LANG):
"""
Retrieve the notes of a borrower.
@type borrower_id: integer.
@param borrower_id: identify the borrower. It is also the primary key of
the table crcBORROWER.
"""
id_user = getUid(req)
(auth_code, auth_message) = is_adminuser(req)
if auth_code != 0:
return mustloginpage(req, auth_message)
_ = gettext_set_language(ln)
if delete_key and borrower_id:
if looks_like_dictionary(db.get_borrower_notes(borrower_id)):
borrower_notes = eval(db.get_borrower_notes(borrower_id))
if delete_key in borrower_notes.keys():
del borrower_notes[delete_key]
db.update_borrower_notes(borrower_id, borrower_notes)
elif library_notes:
if db.get_borrower_notes(borrower_id):
if looks_like_dictionary(db.get_borrower_notes(borrower_id)):
borrower_notes = eval(db.get_borrower_notes(borrower_id))
else:
borrower_notes = {}
else:
borrower_notes = {}
note_time = time.strftime("%Y-%m-%d %H:%M:%S")
if note_time not in borrower_notes.keys():
borrower_notes[note_time] = str(library_notes)
db.update_borrower_notes(borrower_id, borrower_notes)
borrower_notes = db.get_borrower_notes(borrower_id)
navtrail_previous_links = '<a class="navtrail" ' \
'href="%s/help/admin">Admin Area' \
'</a> &gt; <a class="navtrail" ' \
'href="%s/admin2/bibcirculation/loan_on_desk_step1?ln=%s">' \
'Circulation Management' \
'</a> ' % (CFG_SITE_SECURE_URL, CFG_SITE_SECURE_URL, ln)
body = bc_templates.tmpl_borrower_notes(borrower_notes=borrower_notes,
borrower_id=borrower_id,
ln=ln)
return page(title=_("Borrower notes"),
uid=id_user,
req=req,
body=body, language=ln,
navtrail=navtrail_previous_links,
lastupdated=__lastupdated__)
def borrower_search(req, empty_barcode, redirect_to_new_request=False,
ln=CFG_SITE_LANG):
"""
Page (for administrator) where is it possible to search
for a borrower (who is on crcBORROWER table) using his/her name,
email, phone or id.
If redirect_to_new_request is False, the returned page will be "Borrower details"
If redirect_to_new_request is True, the returned page will be "New Request"
"""
id_user = getUid(req)
(auth_code, auth_message) = is_adminuser(req)
if auth_code != 0:
return mustloginpage(req, auth_message)
_ = gettext_set_language(ln)
infos = []
if empty_barcode:
infos.append(empty_barcode)
navtrail_previous_links = '<a class="navtrail" ' \
'href="%s/help/admin">Admin Area' \
'</a> &gt; <a class="navtrail" ' \
'href="%s/admin2/bibcirculation/loan_on_desk_step1?ln=%s">' \
'Circulation Management' \
'</a> ' % (CFG_SITE_SECURE_URL, CFG_SITE_SECURE_URL, ln)
body = bc_templates.tmpl_borrower_search(infos=infos,
redirect_to_new_request=redirect_to_new_request,
ln=ln)
if redirect_to_new_request:
title = _("New Request")
else:
title = _("Borrower Search")
return page(title=title,
uid=id_user,
req=req,
body=body, language=ln,
navtrail=navtrail_previous_links,
lastupdated=__lastupdated__)
def borrower_search_result(req, column, string, redirect_to_new_request=False,
ln=CFG_SITE_LANG):
"""
Search a borrower and return a list with all the possible results.
@type column: string
@param column: identify the column, of the table crcBORROWER, that will be
considered during the search. Can be 'name', 'email' or 'id'.
@type string: string
@param string: string used for the search process.
If redirect_to_new_request is True, the returned page will be "Borrower details"
If redirect_to_new_request is False, the returned page will be "New Request"
"""
id_user = getUid(req)
(auth_code, auth_message) = is_adminuser(req)
if auth_code != 0:
return mustloginpage(req, auth_message)
_ = gettext_set_language(ln)
if string == '':
message = _('Empty string.') + ' ' + _('Please, try again.')
return borrower_search(req, message, redirect_to_new_request, ln)
else:
result = search_user(column, string)
navtrail_previous_links = '<a class="navtrail" ' \
'href="%s/help/admin">Admin Area' \
'</a> &gt; <a class="navtrail" ' \
'href="%s/admin2/bibcirculation/loan_on_desk_step1?ln=%s">' \
'Circulation Management' \
'</a> ' % (CFG_SITE_SECURE_URL, CFG_SITE_SECURE_URL, ln)
if len(result) == 1:
if redirect_to_new_request:
return create_new_request_step1(req, result[0][0])
else:
return get_borrower_details(req, result[0][0], False, ln)
#return create_new_request_step1(req, borrower_id, p, f, search, ln)
else:
body = bc_templates.tmpl_borrower_search_result(result=result,
redirect_to_new_request=redirect_to_new_request,
ln=ln)
return page(title=_("Borrower search result"),
uid=id_user,
req=req,
body=body,
language=ln,
navtrail=navtrail_previous_links,
lastupdated=__lastupdated__)
###
### ILL/Purchase/Acquisition related functions.
### Naming of the methods is not intuitive. Should be improved
### and appropriate documentation added, when required.
### Also, methods could be refactored.
###
def register_ill_from_proposal(req, ill_request_id, bor_id=None, ln=CFG_SITE_LANG):
id_user = getUid(req)
(auth_code, auth_message) = is_adminuser(req)
if auth_code != 0:
return mustloginpage(req, auth_message)
_ = gettext_set_language(ln)
book_info = db.get_ill_book_info(ill_request_id)
infos = []
navtrail_previous_links = '<a class="navtrail" ' \
'href="%s/help/admin">Admin Area' \
'</a>' % (CFG_SITE_SECURE_URL,)
if looks_like_dictionary(book_info):
book_info = eval(book_info)
if not bor_id:
bid = db.get_ill_borrower(ill_request_id)
else:
bid = bor_id
if book_info.has_key('recid') and bid:
recid = book_info['recid']
if not db.has_loan_request(bid, recid, ill=1):
db.tag_requests_as_done(bid, recid=recid)
library_notes = {}
library_notes[time.strftime("%Y-%m-%d %H:%M:%S")] = \
_("This ILL has been created from a proposal.")
db.register_ill_from_proposal(ill_request_id,
bid, library_notes)
infos.append(_('An ILL has been created for the user.'))
else:
infos.append(_('An active ILL already exists for this user on this record.'))
else:
infos.append(_('Could not create an ILL from the proposal'))
else:
infos.append(_('Could not create an ILL from the proposal'))
ill_req = db.get_ill_requests(CFG_BIBCIRCULATION_ILL_STATUS_NEW)
body = bc_templates.tmpl_list_ill(ill_req, infos=infos, ln=ln)
return page(title=_("ILL requests"),
uid=id_user,
req=req,
body=body, language=ln,
navtrail=navtrail_previous_links,
lastupdated=__lastupdated__)
#return redirect_to_url(req,
# '%s/admin2/bibcirculation/list_proposal?status=%s' % \
# (CFG_SITE_SECURE_URL, CFG_BIBCIRCULATION_PROPOSAL_STATUS_PUT_ASIDE))
def register_ill_request_with_no_recid_step1(req, borrower_id,
ln=CFG_SITE_LANG):
id_user = getUid(req)
(auth_code, auth_message) = is_adminuser(req)
if auth_code != 0:
return mustloginpage(req, auth_message)
_ = gettext_set_language(ln)
infos = []
navtrail_previous_links = '<a class="navtrail" ' \
'href="%s/help/admin">Admin Area' \
'</a>' % (CFG_SITE_SECURE_URL,)
body = bc_templates.tmpl_register_ill_request_with_no_recid_step1(
infos=infos,
borrower_id=borrower_id,
admin=True, ln=ln)
return page(title=_("Register ILL request"),
uid=id_user,
req=req,
metaheaderadd = "<link rel=\"stylesheet\" href=\"%s/img/jquery-ui.css\" type=\"text/css\" />" % CFG_SITE_SECURE_URL,
body=body,
language=ln,
navtrail=navtrail_previous_links,
lastupdated=__lastupdated__)
def register_ill_request_with_no_recid_step2(req, title, authors, place,
publisher, year, edition, isbn, budget_code,
period_of_interest_from, period_of_interest_to,
additional_comments, only_edition, key, string,
borrower_id, ln=CFG_SITE_LANG):
id_user = getUid(req)
(auth_code, auth_message) = is_adminuser(req)
if auth_code != 0:
return mustloginpage(req, auth_message)
_ = gettext_set_language(ln)
infos = []
book_info = (title, authors, place, publisher, year, edition, isbn)
request_details = (budget_code, period_of_interest_from,
period_of_interest_to, additional_comments, only_edition)
if borrower_id in (None, '', 'None'):
body = None
if not key:
borrowers_list = None
elif not string:
infos.append(_('Empty string.') + ' ' + _('Please, try again.'))
borrowers_list = None
else:
if validate_date_format(period_of_interest_from) is False:
infos = []
infos.append(_("The period of interest %(x_strong_tag_open)sFrom: %(x_date)s%(x_strong_tag_close)s is not a valid date or date format") % {'x_date': period_of_interest_from, 'x_strong_tag_open': '<strong>', 'x_strong_tag_close': '</strong>'})
body = bc_templates.tmpl_register_ill_request_with_no_recid_step1(
infos=infos,
borrower_id=None,
admin=True,
ln=ln)
elif validate_date_format(period_of_interest_to) is False:
infos = []
infos.append(_("The period of interest %(x_strong_tag_open)sTo: %(x_date)s%(x_strong_tag_close)s is not a valid date or date format") % {'x_date': period_of_interest_to, 'x_strong_tag_open': '<strong>', 'x_strong_tag_close': '</strong>'})
body = bc_templates.tmpl_register_ill_request_with_no_recid_step1(
infos=infos,
ln=ln)
else:
result = search_user(key, string)
borrowers_list = []
if len(result) == 0:
infos.append(_("0 borrowers found."))
else:
for user in result:
borrower_data = db.get_borrower_data_by_id(user[0])
borrowers_list.append(borrower_data)
if body == None:
body = bc_templates.tmpl_register_ill_request_with_no_recid_step2(
book_info=book_info, request_details=request_details,
result=borrowers_list, key=key, string=string,
infos=infos, ln=ln)
else:
user_info = db.get_borrower_data_by_id(borrower_id)
return register_ill_request_with_no_recid_step3(req, title, authors,
place, publisher,year, edition,
isbn, user_info, budget_code,
period_of_interest_from,
period_of_interest_to,
additional_comments, only_edition,
ln)
navtrail_previous_links = '<a class="navtrail" ' \
'href="%s/help/admin">Admin Area' \
'</a> &gt; <a class="navtrail" ' \
'href="%s/admin2/bibcirculation/loan_on_desk_step1?ln=%s">'\
'Circulation Management' \
'</a> ' % (CFG_SITE_SECURE_URL, CFG_SITE_SECURE_URL, ln)
return page(title=_("Register ILL request"),
uid=id_user,
req=req,
body=body, language=ln,
navtrail=navtrail_previous_links,
lastupdated=__lastupdated__)
def register_ill_request_with_no_recid_step3(req, title, authors, place,
publisher, year, edition, isbn,
user_info, budget_code,
period_of_interest_from,
period_of_interest_to,
additional_comments,
only_edition, ln=CFG_SITE_LANG):
navtrail_previous_links = '<a class="navtrail" ' \
'href="%s/help/admin">Admin Area' \
'</a> &gt; <a class="navtrail" ' \
'href="%s/admin2/bibcirculation/loan_on_desk_step1?ln=%s">'\
'Circulation Management' \
'</a> ' % (CFG_SITE_SECURE_URL, CFG_SITE_SECURE_URL, ln)
id_user = getUid(req)
(auth_code, auth_message) = is_adminuser(req)
if auth_code != 0:
return mustloginpage(req, auth_message)
_ = gettext_set_language(ln)
request_details = (budget_code, period_of_interest_from,
period_of_interest_to, additional_comments, only_edition)
book_info = (title, authors, place, publisher, year, edition, isbn)
if user_info is None:
return register_ill_request_with_no_recid_step2(req, title, authors,
place, publisher, year, edition, isbn, budget_code,
period_of_interest_from, period_of_interest_to,
additional_comments, only_edition, 'name', None,
None, ln)
else:
body = bc_templates.tmpl_register_ill_request_with_no_recid_step3(
book_info=book_info,
user_info=user_info,
request_details=request_details,
admin=True,
ln=ln)
return page(title=_("Register ILL request"),
uid=id_user,
req=req,
body=body, language=ln,
navtrail=navtrail_previous_links,
lastupdated=__lastupdated__)
def register_ill_request_with_no_recid_step4(req, book_info, borrower_id,
request_details, ln):
id_user = getUid(req)
(auth_code, auth_message) = is_adminuser(req)
if auth_code != 0:
return mustloginpage(req, auth_message)
navtrail_previous_links = '<a class="navtrail" ' \
'href="%s/help/admin">Admin Area' \
'</a> &gt; <a class="navtrail" ' \
'href="%s/admin2/bibcirculation/loan_on_desk_step1?ln=%s">'\
'Circulation Management' \
'</a> ' % (CFG_SITE_SECURE_URL, CFG_SITE_SECURE_URL, ln)
_ = gettext_set_language(ln)
(title, authors, place, publisher, year, edition, isbn) = book_info
#create_ill_record(book_info))
(budget_code, period_of_interest_from,
period_of_interest_to, library_notes, only_edition) = request_details
ill_request_notes = {}
if library_notes:
ill_request_notes[time.strftime("%Y-%m-%d %H:%M:%S")] = \
str(library_notes)
### budget_code ###
if db.get_borrower_data_by_id(borrower_id) == None:
_ = gettext_set_language(ln)
infos = []
infos.append(_("<strong>Request not registered:</strong> wrong borrower id"))
body = bc_templates.tmpl_register_ill_request_with_no_recid_step2(
book_info=book_info,
request_details=request_details, result=[],
key='name', string=None, infos=infos, ln=ln)
return page(title=_("Register ILL request"),
uid=id_user,
req=req,
body=body, language=ln,
navtrail=navtrail_previous_links,
lastupdated=__lastupdated__)
else:
book_info = {'title': title, 'authors': authors, 'place': place,
'publisher': publisher,'year' : year, 'edition': edition,
'isbn' : isbn}
db.ill_register_request_on_desk(borrower_id, book_info,
period_of_interest_from,
period_of_interest_to,
CFG_BIBCIRCULATION_ILL_STATUS_NEW,
str(ill_request_notes),
only_edition, 'book', budget_code)
return list_ill_request(req, CFG_BIBCIRCULATION_ILL_STATUS_NEW, ln)
def register_ill_book_request(req, borrower_id, ln=CFG_SITE_LANG):
"""
Display a form where is possible to searh for an item.
"""
id_user = getUid(req)
(auth_code, auth_message) = is_adminuser(req)
if auth_code != 0:
return mustloginpage(req, auth_message)
navtrail_previous_links = '<a class="navtrail" ' \
'href="%s/help/admin">Admin Area' \
'</a> &gt; <a class="navtrail" ' \
'href="%s/admin2/bibcirculation/loan_on_desk_step1?ln=%s">'\
'Circulation Management' \
'</a> ' % (CFG_SITE_SECURE_URL, CFG_SITE_SECURE_URL, ln)
_ = gettext_set_language(ln)
infos = []
body = bc_templates.tmpl_register_ill_book_request(infos=infos,
borrower_id=borrower_id,
ln=ln)
return page(title=_("Register ILL Book request"),
uid=id_user,
req=req,
body=body, language=ln,
navtrail=navtrail_previous_links,
lastupdated=__lastupdated__)
def register_ill_book_request_result(req, borrower_id, p, f, ln=CFG_SITE_LANG):
"""
Search an item and return a list with all the possible results. To retrieve
the information desired, we use the method 'perform_request_search' (from
search_engine.py). In the case of BibCirculation, we are just looking for
books (items) inside the collection 'Books'.
@type p: string
@param p: search pattern
@type f: string
@param f: search field
@return: list of recids
"""
id_user = getUid(req)
(auth_code, auth_message) = is_adminuser(req)
if auth_code != 0:
return mustloginpage(req, auth_message)
_ = gettext_set_language(ln)
infos = []
if p == '':
infos.append(_('Empty string.') + ' ' + _('Please, try again.'))
body = bc_templates.tmpl_register_ill_book_request(infos=infos,
borrower_id=borrower_id,
ln=ln)
else:
if f == 'barcode':
p = p.strip('\'" \t')
recid = db.get_id_bibrec(p)
if recid is None:
infos.append(_('The barcode %(x_strong_tag_open)s%(x_barcode)s%(x_strong_tag_close)s does not exist on BibCirculation database.') % {'x_barcode': p, 'x_strong_tag_open': '<strong>', 'x_strong_tag_close': '</strong>'})
body = bc_templates.tmpl_register_ill_book_request(infos=infos,
borrower_id=borrower_id,
ln=ln)
else:
body = bc_templates.tmpl_register_ill_book_request_result(
result=[recid],
borrower_id=borrower_id,
ln=ln)
else:
result = perform_request_search(cc="Books", sc="1", p=p, f=f)
if len(result) == 0:
return register_ill_request_with_no_recid_step1(req,
borrower_id, ln)
else:
body = bc_templates.tmpl_register_ill_book_request_result(
result=result,
borrower_id=borrower_id,
ln=ln)
navtrail_previous_links = '<a class="navtrail" ' \
'href="%s/help/admin">Admin Area' \
'</a> &gt; <a class="navtrail" ' \
'href="%s/admin2/bibcirculation/loan_on_desk_step1?ln=%s">'\
'Circulation Management' \
'</a> ' % (CFG_SITE_SECURE_URL, CFG_SITE_SECURE_URL, ln)
return page(title=_("Register ILL Book request"),
uid=id_user,
req=req,
body=body, language=ln,
navtrail=navtrail_previous_links,
lastupdated=__lastupdated__)
def register_ill_article_request_step1(req, ln=CFG_SITE_LANG):
id_user = getUid(req)
(auth_code, auth_message) = is_adminuser(req)
if auth_code != 0:
return mustloginpage(req, auth_message)
_ = gettext_set_language(ln)
infos = []
navtrail_previous_links = '<a class="navtrail" ' \
'href="%s/help/admin">Admin Area' \
'</a> &gt; <a class="navtrail" ' \
'href="%s/admin2/bibcirculation/loan_on_desk_step1?ln=%s">' \
'Circulation Management' \
'</a> ' % (CFG_SITE_SECURE_URL, CFG_SITE_SECURE_URL, ln)
body = bc_templates.tmpl_register_ill_article_request_step1(infos=infos,
ln=ln)
return page(title=_("Register ILL Article request"),
uid=id_user,
req=req,
body=body,
metaheaderadd = "<link rel=\"stylesheet\" href=\"%s/img/jquery-ui.css\" type=\"text/css\" />"%(CFG_SITE_SECURE_URL),
navtrail=navtrail_previous_links,
lastupdated=__lastupdated__)
def register_ill_article_request_step2(req, periodical_title, article_title,
author, report_number, volume, issue,
pages, year, budget_code, issn,
period_of_interest_from,
period_of_interest_to,
additional_comments, key, string,
ln=CFG_SITE_LANG):
id_user = getUid(req)
(auth_code, auth_message) = is_adminuser(req)
if auth_code != 0:
return mustloginpage(req, auth_message)
_ = gettext_set_language(ln)
infos = []
if key and not string:
infos.append(_('Empty string.') + ' ' + _('Please, try again.'))
article_info = (periodical_title, article_title, author, report_number,
volume, issue, pages, year, issn)
request_details = (period_of_interest_from, period_of_interest_to,
budget_code, additional_comments)
body = bc_templates.tmpl_register_ill_article_request_step2(
article_info=article_info,
request_details=request_details,
result=None, key=key,
string=string, infos=infos,
ln=ln)
navtrail_previous_links = '<a class="navtrail" ' \
'href="%s/help/admin">Admin Area' \
'</a> &gt; <a class="navtrail" ' \
'href="%s/admin2/bibcirculation/loan_on_desk_step1?ln=%s">'\
'Circulation Management' \
'</a> ' % (CFG_SITE_SECURE_URL, CFG_SITE_SECURE_URL, ln)
return page(title=_("Register ILL request"),
uid=id_user,
req=req,
body=body, language=ln,
navtrail=navtrail_previous_links,
lastupdated=__lastupdated__)
result = search_user(key, string)
borrowers_list = []
if len(result) == 0 and key:
if CFG_CERN_SITE:
infos.append(_("0 borrowers found.") + ' ' +_("Search by CCID."))
else:
new_borrower_link = create_html_link(CFG_SITE_SECURE_URL +
'/admin2/bibcirculation/add_new_borrower_step1',
{'ln': ln}, _("Register new borrower."))
message = _("0 borrowers found.") + ' ' + new_borrower_link
infos.append(message)
else:
for user in result:
borrower_data = db.get_borrower_data_by_id(user[0])
borrowers_list.append(borrower_data)
if validate_date_format(period_of_interest_from) is False:
infos = []
infos.append(_("The period of interest %(x_strong_tag_open)sFrom: %(x_date)s%(x_strong_tag_close)s is not a valid date or date format") % {'x_date': period_of_interest_from, 'x_strong_tag_open': '<strong>', 'x_strong_tag_close': '</strong>'})
body = bc_templates.tmpl_register_ill_article_request_step1(infos=infos,
ln=ln)
elif validate_date_format(period_of_interest_to) is False:
infos = []
infos.append(_("The period of interest %(x_strong_tag_open)sTo: %(x_date)s%(x_strong_tag_close)s is not a valid date or date format") % {'x_date': period_of_interest_to, 'x_strong_tag_open': '<strong>', 'x_strong_tag_close': '</strong>'})
body = bc_templates.tmpl_register_ill_article_request_step1(infos=infos,
ln=ln)
else:
article_info = (periodical_title, article_title, author, report_number,
volume, issue, pages, year, issn)
request_details = (period_of_interest_from, period_of_interest_to,
budget_code, additional_comments)
body = bc_templates.tmpl_register_ill_article_request_step2(
article_info=article_info,
request_details=request_details,
result=borrowers_list,
key=key, string=string,
infos=infos, ln=ln)
navtrail_previous_links = '<a class="navtrail" ' \
'href="%s/help/admin">Admin Area' \
'</a> &gt; <a class="navtrail" ' \
'href="%s/admin2/bibcirculation/loan_on_desk_step1?ln=%s">'\
'Circulation Management' \
'</a> ' % (CFG_SITE_SECURE_URL, CFG_SITE_SECURE_URL, ln)
return invenio.webpage.page(title=_("Register ILL request"),
uid=id_user,
req=req,
body=body, language=ln,
navtrail=navtrail_previous_links,
lastupdated=__lastupdated__)
def register_ill_article_request_step3(req, periodical_title, title, authors,
report_number, volume, issue,
page_number, year, issn, user_info,
request_details, ln=CFG_SITE_LANG):
#id_user = getUid(req)
(auth_code, auth_message) = is_adminuser(req)
if auth_code != 0:
return mustloginpage(req, auth_message)
#info = (title, authors, "", "", year, "", issn)
#create_ill_record(info)
item_info = {'periodical_title': periodical_title, 'title': title,
'authors': authors, 'place': "", 'publisher': "",
'year' : year, 'edition': "", 'issn' : issn,
'volume': volume, 'issue': issue, 'page': page_number }
(period_of_interest_from, period_of_interest_to, budget_code,
library_notes) = request_details
only_edition = ""
if user_info is None:
return register_ill_article_request_step2(req, periodical_title, title,
authors, report_number, volume, issue,
page_number, year, budget_code, issn,
period_of_interest_from,
period_of_interest_to,
library_notes, 'name', None, ln)
else:
borrower_id = user_info[0]
ill_request_notes = {}
if library_notes:
ill_request_notes[time.strftime("%Y-%m-%d %H:%M:%S")] \
= str(library_notes)
db.ill_register_request_on_desk(borrower_id, item_info,
period_of_interest_from,
period_of_interest_to,
CFG_BIBCIRCULATION_ILL_STATUS_NEW,
str(ill_request_notes),
only_edition, 'article', budget_code)
return list_ill_request(req, CFG_BIBCIRCULATION_ILL_STATUS_NEW, ln)
def register_purchase_request_step1(req, request_type, recid, title, authors,
place, publisher, year, edition, this_edition_only,
isbn, standard_number,
budget_code, cash, period_of_interest_from,
period_of_interest_to, additional_comments,
ln=CFG_SITE_LANG):
id_user = getUid(req)
(auth_code, auth_message) = is_adminuser(req)
if auth_code != 0:
return mustloginpage(req, auth_message)
_ = gettext_set_language(ln)
infos = []
navtrail_previous_links = '<a class="navtrail" ' \
'href="%s/help/admin">Admin Area' \
'</a> &gt; <a class="navtrail" ' \
'href="%s/admin2/bibcirculation/loan_on_desk_step1?ln=%s">'\
'Circulation Management' \
'</a> ' % (CFG_SITE_SECURE_URL, CFG_SITE_SECURE_URL, ln)
if recid:
fields = (request_type, recid, budget_code, cash,
period_of_interest_from, period_of_interest_to,
additional_comments)
else:
fields = (request_type, title, authors, place, publisher, year, edition,
this_edition_only, isbn, standard_number, budget_code,
cash, period_of_interest_from, period_of_interest_to,
additional_comments)
body = bc_templates.tmpl_register_purchase_request_step1(infos=infos,
fields=fields, admin=True, ln=ln)
return page(title=_("Register purchase request"),
uid=id_user,
req=req,
body=body,
language=ln,
metaheaderadd='<link rel="stylesheet" ' \
'href="%s/img/jquery-ui.css" ' \
'type="text/css" />' % CFG_SITE_SECURE_URL,
navtrail=navtrail_previous_links,
lastupdated=__lastupdated__)
def register_purchase_request_step2(req, request_type, recid, title, authors,
place, publisher, year, edition, this_edition_only,
isbn, standard_number,
budget_code, cash, period_of_interest_from,
period_of_interest_to, additional_comments,
p, f, ln=CFG_SITE_LANG):
id_user = getUid(req)
(auth_code, auth_message) = is_adminuser(req)
if auth_code != 0:
return mustloginpage(req, auth_message)
_ = gettext_set_language(ln)
navtrail_previous_links = '<a class="navtrail" ' \
'href="%s/help/admin">Admin Area' \
'</a> &gt; <a class="navtrail" ' \
'href="%s/admin2/bibcirculation/loan_on_desk_step1?ln=%s">'\
'Circulation Management' \
'</a> ' % (CFG_SITE_SECURE_URL, CFG_SITE_SECURE_URL, ln)
infos = []
if cash and budget_code == '':
budget_code = 'cash'
if recid:
fields = (request_type, recid, budget_code, cash,
period_of_interest_from, period_of_interest_to,
additional_comments)
else:
fields = (request_type, title, authors, place, publisher, year, edition,
this_edition_only, isbn, standard_number, budget_code,
cash, period_of_interest_from, period_of_interest_to,
additional_comments)
if budget_code == '' and not cash:
infos.append(_("Payment method information is mandatory. \
Please, type your budget code or tick the 'cash' checkbox."))
body = bc_templates.tmpl_register_purchase_request_step1(infos=infos,
fields=fields, admin=True, ln=ln)
else:
########################
########################
if p and not f:
infos.append(_('Empty string.') + ' ' + _('Please, try again.'))
body = bc_templates.tmpl_register_purchase_request_step2(
infos=infos, fields=fields,
result=None, p=p, f=f, ln=ln)
navtrail_previous_links = '<a class="navtrail" ' \
'href="%s/help/admin">Admin Area' \
'</a> &gt; <a class="navtrail" ' \
'href="%s/admin2/bibcirculation/loan_on_desk_step1?ln=%s">'\
'Circulation Management' \
'</a> ' % (CFG_SITE_SECURE_URL, CFG_SITE_SECURE_URL, ln)
return page(title=_("Register ILL request"),
uid=id_user,
req=req,
body=body, language=ln,
navtrail=navtrail_previous_links,
lastupdated=__lastupdated__)
result = search_user(f, p)
borrowers_list = []
if len(result) == 0 and f:
if CFG_CERN_SITE:
infos.append(_("0 borrowers found.") + ' ' +_("Search by CCID."))
else:
new_borrower_link = create_html_link(CFG_SITE_SECURE_URL +
'/admin2/bibcirculation/add_new_borrower_step1',
{'ln': ln}, _("Register new borrower."))
message = _("0 borrowers found.") + ' ' + new_borrower_link
infos.append(message)
else:
for user in result:
borrower_data = db.get_borrower_data_by_id(user[0])
borrowers_list.append(borrower_data)
body = bc_templates.tmpl_register_purchase_request_step2(
infos=infos, fields=fields,
result=borrowers_list, p=p,
f=f, ln=ln)
########################
########################
return page(title=_("Register purchase request"),
uid=id_user,
req=req,
body=body,
language=ln,
metaheaderadd='<link rel="stylesheet" ' \
'href="%s/img/jquery-ui.css" ' \
'type="text/css" />' % CFG_SITE_SECURE_URL,
navtrail=navtrail_previous_links,
lastupdated=__lastupdated__)
def register_purchase_request_step3(req, request_type, recid, title, authors,
place, publisher, year, edition, this_edition_only,
isbn, standard_number,
budget_code, cash, period_of_interest_from,
period_of_interest_to, additional_comments,
borrower_id, ln=CFG_SITE_LANG):
id_user = getUid(req)
(auth_code, auth_message) = is_adminuser(req)
if auth_code != 0:
return mustloginpage(req, auth_message)
_ = gettext_set_language(ln)
navtrail_previous_links = '<a class="navtrail" ' \
'href="%s/help/admin">Admin Area' \
'</a> &gt; <a class="navtrail" ' \
'href="%s/admin2/bibcirculation/loan_on_desk_step1?ln=%s">'\
'Circulation Management' \
'</a> ' % (CFG_SITE_SECURE_URL, CFG_SITE_SECURE_URL, ln)
infos = []
if recid:
fields = (request_type, recid, budget_code, cash,
period_of_interest_from, period_of_interest_to,
additional_comments)
else:
fields = (request_type, title, authors, place, publisher, year, edition,
this_edition_only, isbn, standard_number, budget_code,
cash, period_of_interest_from, period_of_interest_to,
additional_comments)
if budget_code == '' and not cash:
infos.append(_("Payment method information is mandatory. \
Please, type your budget code or tick the 'cash' checkbox."))
body = bc_templates.tmpl_register_purchase_request_step1(infos=infos,
fields=fields, admin=True, ln=ln)
else:
if recid:
item_info = "{'recid': " + str(recid) + "}"
title = book_title_from_MARC(recid)
else:
item_info = {'title': title, 'authors': authors, 'place': place,
'publisher': publisher, 'year' : year, 'edition': edition,
'isbn' : isbn, 'standard_number': standard_number}
ill_request_notes = {}
if additional_comments:
ill_request_notes[time.strftime("%Y-%m-%d %H:%M:%S")] \
= str(additional_comments)
if cash and budget_code == '':
budget_code = 'cash'
if borrower_id:
borrower_email = db.get_borrower_email(borrower_id)
else:
borrower_email = db.get_invenio_user_email(id_user)
borrower_id = db.get_borrower_id_by_email(borrower_email)
db.ill_register_request_on_desk(borrower_id, item_info,
period_of_interest_from,
period_of_interest_to,
CFG_BIBCIRCULATION_ACQ_STATUS_NEW,
str(ill_request_notes),
this_edition_only, request_type, budget_code)
msg_for_user = load_template('purchase_notification') % title
send_email(fromaddr = CFG_BIBCIRCULATION_ILLS_EMAIL,
toaddr = borrower_email,
subject = _("Your book purchase request"),
header = '', footer = '',
content = msg_for_user,
attempt_times=1,
attempt_sleeptime=10
)
return redirect_to_url(req,
'%s/admin2/bibcirculation/list_purchase?ln=%s&status=%s' % \
(CFG_SITE_SECURE_URL, ln,
CFG_BIBCIRCULATION_ACQ_STATUS_NEW))
return page(title=_("Register purchase request"),
uid=id_user,
req=req,
body=body,
language=ln,
metaheaderadd='<link rel="stylesheet" ' \
'href="%s/img/jquery-ui.css" ' \
'type="text/css" />' % CFG_SITE_SECURE_URL,
navtrail=navtrail_previous_links,
lastupdated=__lastupdated__)
def ill_request_details_step1(req, delete_key, ill_request_id, new_status,
ln=CFG_SITE_LANG):
id_user = getUid(req)
(auth_code, auth_message) = is_adminuser(req)
if auth_code != 0:
return mustloginpage(req, auth_message)
_ = gettext_set_language(ln)
infos = []
if delete_key and ill_request_id:
if looks_like_dictionary(db.get_ill_request_notes(ill_request_id)):
library_notes = eval(db.get_ill_request_notes(ill_request_id))
if delete_key in library_notes.keys():
del library_notes[delete_key]
db.update_ill_request_notes(ill_request_id, library_notes)
if new_status:
db.update_ill_request_status(ill_request_id, new_status)
ill_request_borrower_details = \
db.get_ill_request_borrower_details(ill_request_id)
if ill_request_borrower_details is None \
or len(ill_request_borrower_details) == 0:
infos.append(_("Borrower request details not found."))
ill_request_details = db.get_ill_request_details(ill_request_id)
if ill_request_details is None or len(ill_request_details) == 0:
infos.append(_("Request not found."))
libraries = db.get_external_libraries()
navtrail_previous_links = '<a class="navtrail" ' \
'href="%s/help/admin">Admin Area' \
'</a>' % (CFG_SITE_SECURE_URL,)
title = _("ILL request details")
if infos == []:
body = bc_templates.tmpl_ill_request_details_step1(
ill_request_id=ill_request_id,
ill_request_details=ill_request_details,
libraries=libraries,
ill_request_borrower_details=ill_request_borrower_details,
ln=ln)
else:
body = bc_templates.tmpl_display_infos(infos, ln)
return page(title=title,
uid=id_user,
req=req,
metaheaderadd='<link rel="stylesheet" ' \
'href="%s/img/jquery-ui.css" ' \
'type="text/css" />' % CFG_SITE_SECURE_URL,
body=body,
language=ln,
navtrail=navtrail_previous_links,
lastupdated=__lastupdated__)
def ill_request_details_step2(req, delete_key, ill_request_id, new_status,
library_id, request_date, expected_date,
arrival_date, due_date, return_date,
cost, _currency, barcode, library_notes,
book_info, article_info, ln=CFG_SITE_LANG):
#id_user = getUid(req)
(auth_code, auth_message) = is_adminuser(req)
if auth_code != 0:
return mustloginpage(req, auth_message)
_ = gettext_set_language(ln)
if delete_key and ill_request_id:
if looks_like_dictionary(db.get_ill_request_notes(ill_request_id)):
library_previous_notes = eval(db.get_ill_request_notes(ill_request_id))
if delete_key in library_previous_notes.keys():
del library_previous_notes[delete_key]
db.update_ill_request_notes(ill_request_id, library_previous_notes)
if db.get_ill_request_notes(ill_request_id):
if looks_like_dictionary(db.get_ill_request_notes(ill_request_id)):
library_previous_notes = eval(db.get_ill_request_notes(ill_request_id))
else:
library_previous_notes = {}
else:
library_previous_notes = {}
if library_notes:
library_previous_notes[time.strftime("%Y-%m-%d %H:%M:%S")] = \
str(library_notes)
if new_status == CFG_BIBCIRCULATION_LOAN_STATUS_RETURNED:
borrower_id = db.get_ill_borrower(ill_request_id)
barcode = db.get_ill_barcode(ill_request_id)
db.update_ill_loan_status(borrower_id, barcode, return_date, 'ill')
db.update_ill_request(ill_request_id, library_id, request_date,
expected_date, arrival_date, due_date, return_date,
new_status, cost, barcode,
str(library_previous_notes))
request_type = db.get_ill_request_type(ill_request_id)
if request_type == 'book':
item_info = book_info
else:
item_info = article_info
db.update_ill_request_item_info(ill_request_id, item_info)
if new_status == CFG_BIBCIRCULATION_ILL_STATUS_ON_LOAN:
# Redirect to an email template when the ILL 'book' arrives
# (Not for articles.)
subject = _("ILL received: ")
book_info = db.get_ill_book_info(ill_request_id)
if looks_like_dictionary(book_info):
book_info = eval(book_info)
if book_info.has_key('recid'):
subject += "'" + book_title_from_MARC(int(book_info['recid'])) + "'"
bid = db.get_ill_borrower(ill_request_id)
msg = load_template("ill_received")
return redirect_to_url(req,
create_url(CFG_SITE_SECURE_URL +
'/admin2/bibcirculation/borrower_notification',
{'borrower_id': bid,
'subject': subject,
'load_msg_template': False,
'template': msg,
'from_address': CFG_BIBCIRCULATION_ILLS_EMAIL
}
)
)
return list_ill_request(req, new_status, ln)
def purchase_details_step1(req, delete_key, ill_request_id, new_status,
ln=CFG_SITE_LANG):
id_user = getUid(req)
(auth_code, auth_message) = is_adminuser(req)
if auth_code != 0:
return mustloginpage(req, auth_message)
_ = gettext_set_language(ln)
infos = []
if delete_key and ill_request_id:
if looks_like_dictionary(db.get_ill_request_notes(ill_request_id)):
library_notes = eval(db.get_ill_request_notes(ill_request_id))
if delete_key in library_notes.keys():
del library_notes[delete_key]
db.update_ill_request_notes(ill_request_id, library_notes)
if new_status:
db.update_ill_request_status(ill_request_id, new_status)
ill_request_borrower_details = \
db.get_purchase_request_borrower_details(ill_request_id)
if ill_request_borrower_details is None \
or len(ill_request_borrower_details) == 0:
infos.append(_("Borrower request details not found."))
ill_request_details = db.get_ill_request_details(ill_request_id)
if ill_request_details is None or len(ill_request_details) == 0:
infos.append(_("Request not found."))
vendors = db.get_all_vendors()
navtrail_previous_links = '<a class="navtrail" ' \
'href="%s/help/admin">Admin Area' \
'</a>' % (CFG_SITE_SECURE_URL,)
if infos == []:
body = bc_templates.tmpl_purchase_details_step1(
ill_request_id=ill_request_id,
ill_request_details=ill_request_details,
libraries=vendors,
ill_request_borrower_details=ill_request_borrower_details,
ln=ln)
title = _("Purchase details")
else:
body = bc_templates.tmpl_display_infos(infos, ln)
return page(title=title,
uid=id_user,
req=req,
metaheaderadd = "<link rel=\"stylesheet\" href=\"%s/img/jquery-ui.css\" type=\"text/css\" />" % CFG_SITE_SECURE_URL,
body=body,
language=ln,
navtrail=navtrail_previous_links,
lastupdated=__lastupdated__)
def purchase_details_step2(req, delete_key, ill_request_id, new_status,
library_id, request_date, expected_date,
arrival_date, due_date, return_date,
cost, budget_code, library_notes,
item_info, ln=CFG_SITE_LANG):
#id_user = getUid(req)
(auth_code, auth_message) = is_adminuser(req)
if auth_code != 0:
return mustloginpage(req, auth_message)
_ = gettext_set_language(ln)
if delete_key and ill_request_id:
if looks_like_dictionary(db.get_ill_request_notes(ill_request_id)):
library_previous_notes = eval(db.get_ill_request_notes(ill_request_id))
if delete_key in library_previous_notes.keys():
del library_previous_notes[delete_key]
db.update_ill_request_notes(ill_request_id, library_previous_notes)
if db.get_ill_request_notes(ill_request_id):
if looks_like_dictionary(db.get_ill_request_notes(ill_request_id)):
library_previous_notes = eval(db.get_ill_request_notes(ill_request_id))
else:
library_previous_notes = {}
else:
library_previous_notes = {}
if library_notes:
library_previous_notes[time.strftime("%Y-%m-%d %H:%M:%S")] = \
str(library_notes)
if new_status == CFG_BIBCIRCULATION_LOAN_STATUS_RETURNED:
borrower_id = db.get_ill_borrower(ill_request_id)
db.update_purchase_request(ill_request_id, library_id, request_date,
expected_date, arrival_date, due_date, return_date,
new_status, cost, budget_code,
str(library_previous_notes))
request_type = db.get_ill_request_type(ill_request_id)
if request_type not in CFG_BIBCIRCULATION_PROPOSAL_TYPE:
db.update_ill_request_item_info(ill_request_id, item_info)
if new_status in (CFG_BIBCIRCULATION_PROPOSAL_STATUS_ON_ORDER,
CFG_BIBCIRCULATION_PROPOSAL_STATUS_PUT_ASIDE):
barcode = db.get_ill_barcode(ill_request_id)
if new_status == CFG_BIBCIRCULATION_PROPOSAL_STATUS_ON_ORDER:
db.update_item_status(CFG_BIBCIRCULATION_ITEM_STATUS_ON_ORDER, barcode)
subject = _("Book suggestion accepted: ")
template = "proposal_acceptance"
else:
db.update_item_status(CFG_BIBCIRCULATION_ITEM_STATUS_UNDER_REVIEW, barcode)
subject = _("Book suggestion refused: ")
template = "proposal_refusal"
book_info = db.get_ill_book_info(ill_request_id)
if looks_like_dictionary(book_info):
book_info = eval(book_info)
if book_info.has_key('recid'):
bid = db.get_ill_borrower(ill_request_id)
if db.has_loan_request(bid, book_info['recid']):
subject += "'" + book_title_from_MARC(int(book_info['recid'])) + "'"
return redirect_to_url(req,
create_url(CFG_SITE_SECURE_URL +
'/admin2/bibcirculation/borrower_notification',
{'borrower_id': bid,
'subject': subject,
'template': template,
'from_address': CFG_BIBCIRCULATION_ILLS_EMAIL
}
)
)
if new_status == CFG_BIBCIRCULATION_PROPOSAL_STATUS_RECEIVED:
barcode = db.get_ill_barcode(ill_request_id)
# Reset the item description to the default value.
db.set_item_description(barcode, '-')
#db.update_item_status(CFG_BIBCIRCULATION_ITEM_STATUS_IN_PROCESS, barcode)
borrower_id = db.get_ill_borrower(ill_request_id)
recid = db.get_id_bibrec(barcode)
if db.has_loan_request(borrower_id, recid):
#If an ILL has already been created(After the book had been put aside), there
#would be no waiting request by the proposer.
db.update_loan_request_status(CFG_BIBCIRCULATION_REQUEST_STATUS_WAITING,
barcode=barcode,
borrower_id=borrower_id)
return redirect_to_url(req,
'%s/admin2/bibcirculation/update_item_info_step4?barcode=%s' % \
(CFG_SITE_SECURE_URL, barcode))
if new_status == CFG_BIBCIRCULATION_ACQ_STATUS_RECEIVED:
subject = _("Purchase received: ")
book_info = db.get_ill_book_info(ill_request_id)
if looks_like_dictionary(book_info):
book_info = eval(book_info)
if book_info.has_key('recid'):
subject += "'" + book_title_from_MARC(int(book_info['recid'])) + "'"
bid = db.get_ill_borrower(ill_request_id)
if budget_code == 'cash':
msg = load_template("purchase_received_cash") % cost
else:
msg = load_template("purchase_received_tid") % cost
return redirect_to_url(req,
create_url(CFG_SITE_SECURE_URL +
'/admin2/bibcirculation/borrower_notification',
{'borrower_id': bid,
'subject': subject,
'load_msg_template': False,
'template': msg,
'from_address': CFG_BIBCIRCULATION_ILLS_EMAIL
}
)
)
if new_status in CFG_BIBCIRCULATION_ACQ_STATUS or \
new_status == CFG_BIBCIRCULATION_PROPOSAL_STATUS_ON_ORDER:
# The items 'on order' whether for acquisition for the library or purchase
# on behalf of the user are displayed in the same list.
return redirect_to_url(req,
'%s/admin2/bibcirculation/list_purchase?ln=%s&status=%s' % \
(CFG_SITE_SECURE_URL, ln, new_status))
else:
return redirect_to_url(req,
'%s/admin2/bibcirculation/list_proposal?ln=%s&status=%s' % \
(CFG_SITE_SECURE_URL, ln, new_status))
def get_ill_library_notes(req, ill_id, delete_key, library_notes,
ln=CFG_SITE_LANG):
id_user = getUid(req)
(auth_code, auth_message) = is_adminuser(req)
if auth_code != 0:
return mustloginpage(req, auth_message)
_ = gettext_set_language(ln)
if delete_key and ill_id:
if looks_like_dictionary(db.get_ill_notes(ill_id)):
ill_notes = eval(db.get_ill_notes(ill_id))
if delete_key in ill_notes.keys():
del ill_notes[delete_key]
db.update_ill_notes(ill_id, ill_notes)
elif library_notes:
if db.get_ill_notes(ill_id):
if looks_like_dictionary(db.get_ill_notes(ill_id)):
ill_notes = eval(db.get_ill_notes(ill_id))
else:
ill_notes = {}
else:
ill_notes = {}
ill_notes[time.strftime("%Y-%m-%d %H:%M:%S")] = str(library_notes)
db.update_ill_notes(ill_id, ill_notes)
ill_notes = db.get_ill_notes(ill_id)
navtrail_previous_links = '<a class="navtrail" ' \
'href="%s/help/admin">Admin Area' \
'</a>' % (CFG_SITE_SECURE_URL,)
body = bc_templates.tmpl_ill_notes(ill_notes=ill_notes,
ill_id=ill_id,
ln=ln)
return page(title=_("ILL notes"),
uid=id_user,
req=req,
body=body, language=ln,
navtrail=navtrail_previous_links,
lastupdated=__lastupdated__)
def list_ill_request(req, status, ln=CFG_SITE_LANG):
navtrail_previous_links = '<a class="navtrail" ' \
'href="%s/help/admin">Admin Area' \
'</a>' % (CFG_SITE_SECURE_URL,)
id_user = getUid(req)
(auth_code, auth_message) = is_adminuser(req)
if auth_code != 0:
return mustloginpage(req, auth_message)
_ = gettext_set_language(ln)
ill_req = db.get_ill_requests(status)
body = bc_templates.tmpl_list_ill(ill_req=ill_req, ln=ln)
return page(title=_("List of ILL requests"),
uid=id_user,
req=req,
body=body,
language=ln,
navtrail=navtrail_previous_links,
lastupdated=__lastupdated__)
def list_purchase(req, status, recid=None, ln=CFG_SITE_LANG):
navtrail_previous_links = '<a class="navtrail" ' \
'href="%s/help/admin">Admin Area' \
'</a>' % (CFG_SITE_SECURE_URL,)
id_user = getUid(req)
(auth_code, auth_message) = is_adminuser(req)
if auth_code != 0:
return mustloginpage(req, auth_message)
_ = gettext_set_language(ln)
if recid:
# Purchases of a particular item to be displayed in the item info page.
purchase_reqs = db.get_item_purchases(status, recid)
else:
purchase_reqs = db.get_purchases(status)
body = bc_templates.tmpl_list_purchase(purchase_reqs, ln=ln)
return page(title=_("List of purchase requests"),
uid=id_user,
req=req,
body=body,
language=ln,
navtrail=navtrail_previous_links,
lastupdated=__lastupdated__)
def list_proposal(req, status, ln=CFG_SITE_LANG):
navtrail_previous_links = '<a class="navtrail" ' \
'href="%s/help/admin">Admin Area' \
'</a>' % (CFG_SITE_SECURE_URL,)
id_user = getUid(req)
(auth_code, auth_message) = is_adminuser(req)
if auth_code != 0:
return mustloginpage(req, auth_message)
_ = gettext_set_language(ln)
if status == "requests-putaside":
requests = db.get_requests_on_put_aside_proposals()
body = bc_templates.tmpl_list_requests_on_put_aside_proposals(requests, ln=ln)
title=_("List of requests on put aside proposals")
else:
proposals = db.get_proposals(status)
body = bc_templates.tmpl_list_proposal(proposals, ln=ln)
title=_("List of proposals")
return page(title=title,
uid=id_user,
req=req,
body=body,
language=ln,
navtrail=navtrail_previous_links,
lastupdated=__lastupdated__)
def ill_search(req, ln=CFG_SITE_LANG):
infos = []
navtrail_previous_links = '<a class="navtrail" ' \
'href="%s/help/admin">Admin Area' \
'</a> &gt; <a class="navtrail" ' \
'href="%s/admin2/bibcirculation/loan_on_desk_step1?ln=%s">'\
'Circulation Management' \
'</a> ' % (CFG_SITE_SECURE_URL, CFG_SITE_SECURE_URL, ln)
id_user = getUid(req)
(auth_code, auth_message) = is_adminuser(req)
if auth_code != 0:
return mustloginpage(req, auth_message)
_ = gettext_set_language(ln)
body = bc_templates.tmpl_ill_search(infos=infos, ln=ln)
return page(title=_("ILL search"),
uid=id_user,
req=req,
body=body,
language=ln,
metaheaderadd='<link rel="stylesheet" href="%s/img/jquery-ui.css" '\
'type="text/css" />' % CFG_SITE_SECURE_URL,
navtrail=navtrail_previous_links,
lastupdated=__lastupdated__)
def ill_search_result(req, p, f, date_from, date_to, ln):
"""
Search an item and return a list with all the possible results. To retrieve
the information desired, we use the method 'perform_request_search' (from
search_engine.py). In the case of BibCirculation, we are just looking for
books (items) inside the collection 'Books'.
@type p: string
@param p: search pattern
@type f: string
@param f: search field
@return: list of recids
"""
navtrail_previous_links = '<a class="navtrail" ' \
'href="%s/help/admin">Admin Area' \
'</a> &gt; <a class="navtrail" ' \
'href="%s/admin2/bibcirculation/loan_on_desk_step1?ln=%s">'\
'Circulation Management' \
'</a> ' % (CFG_SITE_SECURE_URL, CFG_SITE_SECURE_URL, ln)
#id_user = getUid(req)
(auth_code, auth_message) = is_adminuser(req)
if auth_code != 0:
return mustloginpage(req, auth_message)
_ = gettext_set_language(ln)
if not has_date_format(date_from):
date_from = '0000-00-00'
if not has_date_format(date_to):
date_to = '9999-12-31'
if f == 'title':
ill_req = db.search_ill_requests_title(p, date_from, date_to)
body = bc_templates.tmpl_list_ill(ill_req=ill_req, ln=ln)
elif f == 'ILL_request_ID':
ill_req = db.search_ill_requests_id(p, date_from, date_to)
body = bc_templates.tmpl_list_ill(ill_req=ill_req, ln=ln)
elif f == 'cost':
purchase_reqs = db.search_requests_cost(p, date_from, date_to)
body = bc_templates.tmpl_list_purchase(purchase_reqs=purchase_reqs, ln=ln)
elif f == 'notes':
purchase_reqs = db.search_requests_notes(p, date_from, date_to)
body = bc_templates.tmpl_list_purchase(purchase_reqs=purchase_reqs, ln=ln)
return page(title=_("List of ILL requests"),
req=req,
body=body,
language=ln,
navtrail=navtrail_previous_links,
lastupdated=__lastupdated__)
###
### "Library" related templates ###
###
def get_library_details(req, library_id, ln=CFG_SITE_LANG):
"""
Display the details of a library.
@type library_id: integer.
@param library_id: identify the library. It is also the primary key of
the table crcLIBRARY.
@return: library details.
"""
id_user = getUid(req)
(auth_code, auth_message) = is_adminuser(req)
if auth_code != 0:
return mustloginpage(req, auth_message)
_ = gettext_set_language(ln)
navtrail_previous_links = '<a class="navtrail" ' \
' href="%s/help/admin">Admin Area' \
'</a>' % (CFG_SITE_SECURE_URL,)
library_details = db.get_library_details(library_id)
if library_details is None:
_ = gettext_set_language(ln)
infos = []
infos.append(_('Library ID not found.'))
return search_library_step1(req, infos, ln)
library_items = db.get_library_items(library_id)
body = bc_templates.tmpl_library_details(library_details=library_details,
library_items=library_items,
ln=ln)
return page(title=_("Library details"),
uid=id_user,
req=req,
body=body, language=ln,
navtrail=navtrail_previous_links,
lastupdated=__lastupdated__)
def merge_libraries_step1(req, library_id, f=None, p=None, ln=CFG_SITE_LANG):
"""
Step 1/3 of library merging procedure
@param library_id: ID of the library to be deleted
@param p: search pattern.
@param f: field
"""
id_user = getUid(req)
(auth_code, auth_message) = is_adminuser(req)
if auth_code != 0:
return mustloginpage(req, auth_message)
_ = gettext_set_language(ln)
navtrail_previous_links = '<a class="navtrail" ' \
' href="%s/help/admin">Admin Area' \
'</a>' % (CFG_SITE_SECURE_URL,)
library_details = db.get_library_details(library_id)
library_items = db.get_library_items(library_id)
result = None
if f is not None:
if p in (None, '', '*'):
result = db.get_all_libraries() #list of (id, name)
elif f == 'name':
result = db.search_library_by_name(p)
elif f == 'email':
result = db.search_library_by_email(p)
body = bc_templates.tmpl_merge_libraries_step1(
library_details=library_details,
library_items=library_items,
result=result,
p=p,
ln=ln)
return page(title=_("Merge libraries"),
uid=id_user,
req=req,
body=body, language=ln,
navtrail=navtrail_previous_links,
lastupdated=__lastupdated__)
def merge_libraries_step2(req, library_from, library_to, ln=CFG_SITE_LANG):
"""
Step 2/3 of library merging procedure
Confirm the libraries selected
@param library_from: ID of the library to be deleted
@param library_to: ID of the resulting library
"""
id_user = getUid(req)
(auth_code, auth_message) = is_adminuser(req)
if auth_code != 0:
return mustloginpage(req, auth_message)
_ = gettext_set_language(ln)
navtrail_previous_links = '<a class="navtrail" ' \
' href="%s/help/admin">Admin Area' \
'</a>' % (CFG_SITE_SECURE_URL,)
library_from_details = db.get_library_details(library_from)
library_from_items = db.get_library_items(library_from)
library_to_details = db.get_library_details(library_to)
library_to_items = db.get_library_items(library_to)
body = bc_templates.tmpl_merge_libraries_step2(
library_from_details=library_from_details,
library_from_items=library_from_items,
library_to_details=library_to_details,
library_to_items=library_to_items,
ln=ln)
return page(title=_("Merge libraries"),
uid=id_user,
req=req,
body=body, language=ln,
navtrail=navtrail_previous_links,
lastupdated=__lastupdated__)
def merge_libraries_step3(req, library_from, library_to, ln=CFG_SITE_LANG):
"""
Step 3/3 of library merging procedure
Perform the merge and display the details of the resulting library
@param library_from: ID of the library to be deleted
@param library_to: ID of the resulting library
"""
(auth_code, auth_message) = is_adminuser(req)
if auth_code != 0:
return mustloginpage(req, auth_message)
db.merge_libraries(library_from, library_to)
return get_library_details(req, library_to, ln)
def add_new_library_step1(req, ln=CFG_SITE_LANG):
"""
Add a new Library.
"""
navtrail_previous_links = '<a class="navtrail" ' \
'href="%s/help/admin">Admin Area' \
'</a>' % (CFG_SITE_SECURE_URL,)
id_user = getUid(req)
(auth_code, auth_message) = is_adminuser(req)
if auth_code != 0:
return mustloginpage(req, auth_message)
_ = gettext_set_language(ln)
body = bc_templates.tmpl_add_new_library_step1(ln=ln)
return page(title=_("Add new library"),
uid=id_user,
req=req,
body=body, language=ln,
navtrail=navtrail_previous_links,
lastupdated=__lastupdated__)
def add_new_library_step2(req, name, email, phone, address,
lib_type, notes, ln=CFG_SITE_LANG):
"""
Add a new Library.
"""
tup_infos = (name, email, phone, address, lib_type, notes)
navtrail_previous_links = '<a class="navtrail" ' \
'href="%s/help/admin">Admin Area' \
'</a>' % (CFG_SITE_SECURE_URL,)
_ = gettext_set_language(ln)
id_user = getUid(req)
(auth_code, auth_message) = is_adminuser(req)
if auth_code != 0:
return mustloginpage(req, auth_message)
body = bc_templates.tmpl_add_new_library_step2(tup_infos=tup_infos, ln=ln)
return page(title=_("Add new library"),
uid=id_user,
req=req,
body=body, language=ln,
navtrail=navtrail_previous_links,
lastupdated=__lastupdated__)
def add_new_library_step3(req, name, email, phone, address,
lib_type, notes, ln=CFG_SITE_LANG):
"""
Add a new Library.
"""
navtrail_previous_links = '<a class="navtrail" ' \
'href="%s/help/admin">Admin Area' \
'</a>' % (CFG_SITE_SECURE_URL,)
id_user = getUid(req)
(auth_code, auth_message) = is_adminuser(req)
if auth_code != 0:
return mustloginpage(req, auth_message)
_ = gettext_set_language(ln)
db.add_new_library(name, email, phone, address, lib_type, notes)
body = bc_templates.tmpl_add_new_library_step3(ln=ln)
return page(title=_("Add new library"),
uid=id_user,
req=req,
body=body, language=ln,
navtrail=navtrail_previous_links,
lastupdated=__lastupdated__)
def update_library_info_step1(req, ln=CFG_SITE_LANG):
"""
Update the library's information.
"""
infos = []
navtrail_previous_links = '<a class="navtrail"' \
' href="%s/help/admin">Admin Area' \
'</a>' % (CFG_SITE_SECURE_URL,)
id_user = getUid(req)
(auth_code, auth_message) = is_adminuser(req)
if auth_code != 0:
return mustloginpage(req, auth_message)
_ = gettext_set_language(ln)
body = bc_templates.tmpl_update_library_info_step1(infos=infos, ln=ln)
return page(title=_("Update library information"),
uid=id_user,
req=req,
body=body, language=ln,
navtrail=navtrail_previous_links,
lastupdated=__lastupdated__)
def update_library_info_step2(req, column, string, ln=CFG_SITE_LANG):
"""
Update the library's information.
"""
id_user = getUid(req)
(auth_code, auth_message) = is_adminuser(req)
if auth_code != 0:
return mustloginpage(req, auth_message)
_ = gettext_set_language(ln)
if not string:
infos = []
infos.append(_("Empty string.") + ' ' + _('Please, try again.'))
body = bc_templates.tmpl_update_library_info_step1(infos=infos, ln=ln)
elif string == '*':
result = db.get_all_libraries()
body = bc_templates.tmpl_update_library_info_step2(result=result, ln=ln)
else:
if column == 'name':
result = db.search_library_by_name(string)
else:
result = db.search_library_by_email(string)
body = bc_templates.tmpl_update_library_info_step2(result=result, ln=ln)
navtrail_previous_links = '<a class="navtrail" ' \
'href="%s/help/admin">Admin Area' \
'</a>' % (CFG_SITE_SECURE_URL,)
return page(title=_("Update library information"),
uid=id_user,
req=req,
body=body, language=ln,
navtrail=navtrail_previous_links,
lastupdated=__lastupdated__)
def update_library_info_step3(req, library_id, ln=CFG_SITE_LANG):
"""
Update the library's information.
library_id - identify the library. It is also the primary key of
the table crcLIBRARY.
"""
id_user = getUid(req)
(auth_code, auth_message) = is_adminuser(req)
if auth_code != 0:
return mustloginpage(req, auth_message)
_ = gettext_set_language(ln)
navtrail_previous_links = '<a class="navtrail"' \
' href="%s/help/admin">Admin Area' \
'</a>' % (CFG_SITE_SECURE_URL,)
library_info = db.get_library_details(library_id)
body = bc_templates.tmpl_update_library_info_step3(
library_info=library_info,
ln=ln)
return page(title=_("Update library information"),
uid=id_user,
req=req,
body=body, language=ln,
navtrail=navtrail_previous_links,
lastupdated=__lastupdated__)
def update_library_info_step4(req, name, email, phone, address, lib_type,
library_id, ln=CFG_SITE_LANG):
"""
Update the library's information.
"""
navtrail_previous_links = '<a class="navtrail" ' \
'href="%s/help/admin">Admin Area' \
'</a>' % (CFG_SITE_SECURE_URL,)
id_user = getUid(req)
(auth_code, auth_message) = is_adminuser(req)
if auth_code != 0:
return mustloginpage(req, auth_message)
_ = gettext_set_language(ln)
tup_infos = (library_id, name, email, phone, address, lib_type)
body = bc_templates.tmpl_update_library_info_step4(tup_infos=tup_infos,
ln=ln)
return page(title=_("Update library information"),
uid=id_user,
req=req,
body=body, language=ln,
navtrail=navtrail_previous_links,
lastupdated=__lastupdated__)
def update_library_info_step5(req, name, email, phone, address, lib_type,
library_id, ln=CFG_SITE_LANG):
"""
Update the library's information.
"""
navtrail_previous_links = '<a class="navtrail" ' \
'href="%s/help/admin">Admin Area' \
'</a>' % (CFG_SITE_SECURE_URL,)
id_user = getUid(req)
(auth_code, auth_message) = is_adminuser(req)
if auth_code != 0:
return mustloginpage(req, auth_message)
_ = gettext_set_language(ln)
#(library_id, name, email, phone, address) = tup_infos
db.update_library_info(library_id, name, email, phone, address, lib_type)
body = bc_templates.tmpl_update_library_info_step5(ln=ln)
return page(title=_("Update library information"),
uid=id_user,
req=req,
body=body, language=ln,
navtrail=navtrail_previous_links,
lastupdated=__lastupdated__)
def get_library_notes(req, library_id, delete_key,
library_notes, ln=CFG_SITE_LANG):
"""
Retrieve notes related with a library.
library_id - identify the library. It is also the primary key of
the table crcLIBRARY.
"""
id_user = getUid(req)
(auth_code, auth_message) = is_adminuser(req)
if auth_code != 0:
return mustloginpage(req, auth_message)
_ = gettext_set_language(ln)
if delete_key and library_id:
if looks_like_dictionary(db.get_library_notes(library_id)):
lib_notes = eval(db.get_library_notes(library_id))
if delete_key in lib_notes.keys():
del lib_notes[delete_key]
db.update_library_notes(library_id, lib_notes)
elif library_notes:
if db.get_library_notes(library_id):
if looks_like_dictionary(db.get_library_notes(library_id)):
lib_notes = eval(db.get_library_notes(library_id))
else:
lib_notes = {}
else:
lib_notes = {}
lib_notes[time.strftime("%Y-%m-%d %H:%M:%S")] = str(library_notes)
db.update_library_notes(library_id, lib_notes)
lib_notes = db.get_library_notes(library_id)
navtrail_previous_links = '<a class="navtrail" ' \
'href="%s/help/admin">Admin Area' \
'</a> &gt; <a class="navtrail" ' \
'href="%s/admin2/bibcirculation/loan_on_desk_step1?ln=%s">'\
'Circulation Management' \
'</a> ' % (CFG_SITE_SECURE_URL, CFG_SITE_SECURE_URL, ln)
body = bc_templates.tmpl_library_notes(library_notes=lib_notes,
library_id=library_id,
ln=ln)
return page(title=_("Library notes"),
uid=id_user,
req=req,
body=body, language=ln,
navtrail=navtrail_previous_links,
lastupdated=__lastupdated__)
def search_library_step1(req, infos=[], ln=CFG_SITE_LANG):
"""
Display the form where we can search a library (by name or email).
"""
navtrail_previous_links = '<a class="navtrail"' \
' href="%s/help/admin">Admin Area' \
'</a>' % (CFG_SITE_SECURE_URL,)
id_user = getUid(req)
(auth_code, auth_message) = is_adminuser(req)
if auth_code != 0:
return mustloginpage(req, auth_message)
_ = gettext_set_language(ln)
body = bc_templates.tmpl_search_library_step1(infos=infos,
ln=ln)
return page(title=_("Search library"),
uid=id_user,
req=req,
body=body, language=ln,
navtrail=navtrail_previous_links,
lastupdated=__lastupdated__)
def search_library_step2(req, column, string, ln=CFG_SITE_LANG):
"""
Search a library and return a list with all the possible results, using the
parameters received from the previous step.
column - identify the column, of the table crcLIBRARY, that will be
considered during the search. Can be 'name' or 'email'.
str - string used for the search process.
"""
id_user = getUid(req)
(auth_code, auth_message) = is_adminuser(req)
if auth_code != 0:
return mustloginpage(req, auth_message)
_ = gettext_set_language(ln)
if not string:
infos = []
infos.append(_("Emptry string.") + ' ' + _('Please, try again.'))
body = bc_templates.tmpl_search_library_step1(infos=infos, ln=ln)
elif string == '*':
result = db.get_all_libraries()
body = bc_templates.tmpl_search_library_step2(result=result, ln=ln)
else:
if column == 'name':
result = db.search_library_by_name(string)
else:
result = db.search_library_by_email(string)
body = bc_templates.tmpl_search_library_step2(result=result, ln=ln)
navtrail_previous_links = '<a class="navtrail" ' \
'href="%s/help/admin">Admin Area' \
'</a> &gt; <a class="navtrail" ' \
'href="%s/admin2/bibcirculation/loan_on_desk_step1?ln=%s">'\
'Circulation Management' \
'</a> ' % (CFG_SITE_SECURE_URL, CFG_SITE_SECURE_URL, ln)
return page(title=_("Search library"),
uid=id_user,
req=req,
body=body, language=ln,
navtrail=navtrail_previous_links,
lastupdated=__lastupdated__)
###
### "Vendor" related templates ###
###
def get_vendor_details(req, vendor_id, ln=CFG_SITE_LANG):
"""
Display the details of a vendor.
@type vendor_id: integer.
@param vendor_id: identify the vendor. It is also the primary key of
the table crcVENDOR.
@return: vendor details.
"""
id_user = getUid(req)
(auth_code, auth_message) = is_adminuser(req)
if auth_code != 0:
return mustloginpage(req, auth_message)
_ = gettext_set_language(ln)
vendor_details = db.get_vendor_details(vendor_id)
navtrail_previous_links = '<a class="navtrail" ' \
' href="%s/help/admin">Admin Area' \
'</a>' % (CFG_SITE_SECURE_URL,)
body = bc_templates.tmpl_vendor_details(vendor_details=vendor_details,
ln=ln)
return page(title=_("Vendor details"),
uid=id_user,
req=req,
body=body, language=ln,
navtrail=navtrail_previous_links,
lastupdated=__lastupdated__)
def add_new_vendor_step1(req, ln=CFG_SITE_LANG):
"""
Add a new Vendor.
"""
navtrail_previous_links = '<a class="navtrail" ' \
'href="%s/help/admin">Admin Area' \
'</a>' % (CFG_SITE_SECURE_URL,)
id_user = getUid(req)
(auth_code, auth_message) = is_adminuser(req)
if auth_code != 0:
return mustloginpage(req, auth_message)
_ = gettext_set_language(ln)
body = bc_templates.tmpl_add_new_vendor_step1(ln=ln)
return page(title=_("Add new vendor"),
uid=id_user,
req=req,
body=body, language=ln,
navtrail=navtrail_previous_links,
lastupdated=__lastupdated__)
def add_new_vendor_step2(req, name, email, phone, address,
notes, ln=CFG_SITE_LANG):
"""
Add a new Vendor.
"""
id_user = getUid(req)
(auth_code, auth_message) = is_adminuser(req)
if auth_code != 0:
return mustloginpage(req, auth_message)
_ = gettext_set_language(ln)
tup_infos = (name, email, phone, address, notes)
navtrail_previous_links = '<a class="navtrail" ' \
'href="%s/help/admin">Admin Area' \
'</a>' % (CFG_SITE_SECURE_URL,)
body = bc_templates.tmpl_add_new_vendor_step2(tup_infos=tup_infos, ln=ln)
return page(title=_("Add new vendor"),
uid=id_user,
req=req,
body=body, language=ln,
navtrail=navtrail_previous_links,
lastupdated=__lastupdated__)
def add_new_vendor_step3(req, name, email, phone, address,
notes, ln=CFG_SITE_LANG):
"""
Add a new Vendor.
"""
id_user = getUid(req)
(auth_code, auth_message) = is_adminuser(req)
if auth_code != 0:
return mustloginpage(req, auth_message)
_ = gettext_set_language(ln)
db.add_new_vendor(name, email, phone, address, notes)
navtrail_previous_links = '<a class="navtrail" ' \
'href="%s/help/admin">Admin Area' \
'</a>' % (CFG_SITE_SECURE_URL,)
body = bc_templates.tmpl_add_new_vendor_step3(ln=ln)
return page(title=_("Add new vendor"),
uid=id_user,
req=req,
body=body, language=ln,
navtrail=navtrail_previous_links,
lastupdated=__lastupdated__)
def update_vendor_info_step1(req, ln=CFG_SITE_LANG):
"""
Update the vendor's information.
"""
infos = []
navtrail_previous_links = '<a class="navtrail"' \
' href="%s/help/admin">Admin Area' \
'</a>' % (CFG_SITE_SECURE_URL,)
_ = gettext_set_language(ln)
id_user = getUid(req)
(auth_code, auth_message) = is_adminuser(req)
if auth_code != 0:
return mustloginpage(req, auth_message)
body = bc_templates.tmpl_update_vendor_info_step1(infos=infos, ln=ln)
return page(title=_("Update vendor information"),
uid=id_user,
req=req,
body=body, language=ln,
navtrail=navtrail_previous_links,
lastupdated=__lastupdated__)
def update_vendor_info_step2(req, column, string, ln=CFG_SITE_LANG):
"""
Update the vendor's information.
"""
id_user = getUid(req)
(auth_code, auth_message) = is_adminuser(req)
if auth_code != 0:
return mustloginpage(req, auth_message)
_ = gettext_set_language(ln)
if not string:
infos = []
infos.append(_('Empty string.') + ' ' + _('Please, try again.'))
body = bc_templates.tmpl_update_vendor_info_step1(infos=infos, ln=ln)
elif string == '*':
result = db.get_all_vendors()
body = bc_templates.tmpl_update_vendor_info_step2(result=result, ln=ln)
else:
if column == 'name':
result = db.search_vendor_by_name(string)
else:
result = db.search_vendor_by_email(string)
body = bc_templates.tmpl_update_vendor_info_step2(result=result, ln=ln)
navtrail_previous_links = '<a class="navtrail" ' \
'href="%s/help/admin">Admin Area' \
'</a>' % (CFG_SITE_SECURE_URL,)
body = bc_templates.tmpl_update_vendor_info_step2(result=result, ln=ln)
return page(title=_("Update vendor information"),
uid=id_user,
req=req,
body=body, language=ln,
navtrail=navtrail_previous_links,
lastupdated=__lastupdated__)
def update_vendor_info_step3(req, vendor_id, ln=CFG_SITE_LANG):
"""
Update the library's information.
vendor_id - identify the vendor. It is also the primary key of
the table crcVENDOR.
"""
id_user = getUid(req)
(auth_code, auth_message) = is_adminuser(req)
if auth_code != 0:
return mustloginpage(req, auth_message)
_ = gettext_set_language(ln)
vendor_info = db.get_vendor_details(vendor_id)
navtrail_previous_links = '<a class="navtrail"' \
' href="%s/help/admin">Admin Area' \
'</a>' % (CFG_SITE_SECURE_URL,)
body = bc_templates.tmpl_update_vendor_info_step3(vendor_info=vendor_info,
ln=ln)
return page(title=_("Update vendor information"),
uid=id_user,
req=req,
body=body, language=ln,
navtrail=navtrail_previous_links,
lastupdated=__lastupdated__)
def update_vendor_info_step4(req, name, email, phone, address,
vendor_id, ln=CFG_SITE_LANG):
"""
Update the vendor's information.
"""
id_user = getUid(req)
(auth_code, auth_message) = is_adminuser(req)
if auth_code != 0:
return mustloginpage(req, auth_message)
_ = gettext_set_language(ln)
tup_infos = (vendor_id, name, email, phone, address)
navtrail_previous_links = '<a class="navtrail" ' \
'href="%s/help/admin">Admin Area' \
'</a>' % (CFG_SITE_SECURE_URL,)
body = bc_templates.tmpl_update_vendor_info_step4(tup_infos=tup_infos,
ln=ln)
return page(title=_("Update vendor information"),
uid=id_user,
req=req,
body=body, language=ln,
navtrail=navtrail_previous_links,
lastupdated=__lastupdated__)
def update_vendor_info_step5(req, name, email, phone, address,
vendor_id, ln=CFG_SITE_LANG):
"""
Update the library's information.
"""
id_user = getUid(req)
(auth_code, auth_message) = is_adminuser(req)
if auth_code != 0:
return mustloginpage(req, auth_message)
_ = gettext_set_language(ln)
db.update_vendor_info(vendor_id, name, email, phone, address)
navtrail_previous_links = '<a class="navtrail" ' \
'href="%s/help/admin">Admin Area' \
'</a>' % (CFG_SITE_SECURE_URL,)
body = bc_templates.tmpl_update_vendor_info_step5(ln=ln)
return page(title=_("Update vendor information"),
uid=id_user,
req=req,
body=body, language=ln,
navtrail=navtrail_previous_links,
lastupdated=__lastupdated__)
def get_vendor_notes(req, vendor_id, add_notes, new_note, ln=CFG_SITE_LANG):
"""
Retrieve notes related with a vendor.
vendor_id - identify the vendor. It is also the primary key of
the table crcVENDOR.
@param add_notes: display the textarea where will be written a new notes.
@param new_notes: note that will be added to the others vendor's notes.
"""
id_user = getUid(req)
(auth_code, auth_message) = is_adminuser(req)
if auth_code != 0:
return mustloginpage(req, auth_message)
_ = gettext_set_language(ln)
if new_note:
date = '[' + time.ctime() + '] '
new_line = '\n'
new_note = date + new_note + new_line
db.add_new_vendor_note(new_note, vendor_id)
vendor_notes = db.get_vendor_notes(vendor_id)
navtrail_previous_links = '<a class="navtrail" ' \
'href="%s/help/admin">Admin Area' \
'</a>' % (CFG_SITE_SECURE_URL,)
body = bc_templates.tmpl_vendor_notes(vendor_notes=vendor_notes,
vendor_id=vendor_id,
add_notes=add_notes,
ln=ln)
return page(title=_("Vendor notes"),
uid=id_user,
req=req,
body=body, language=ln,
navtrail=navtrail_previous_links,
lastupdated=__lastupdated__)
def search_vendor_step1(req, ln=CFG_SITE_LANG):
"""
Display the form where we can search a vendor (by name or email).
"""
id_user = getUid(req)
(auth_code, auth_message) = is_adminuser(req)
if auth_code != 0:
return mustloginpage(req, auth_message)
_ = gettext_set_language(ln)
infos = []
navtrail_previous_links = '<a class="navtrail"' \
' href="%s/help/admin">Admin Area' \
'</a>' % (CFG_SITE_SECURE_URL,)
body = bc_templates.tmpl_search_vendor_step1(infos=infos,
ln=ln)
return page(title=_("Search vendor"),
uid=id_user,
req=req,
body=body, language=ln,
navtrail=navtrail_previous_links,
lastupdated=__lastupdated__)
def search_vendor_step2(req, column, string, ln=CFG_SITE_LANG):
"""
Search a vendor and return a list with all the possible results, using the
parameters received from the previous step.
column - identify the column, of the table crcVENDOR, that will be
considered during the search. Can be 'name' or 'email'.
str - string used for the search process.
"""
id_user = getUid(req)
(auth_code, auth_message) = is_adminuser(req)
if auth_code != 0:
return mustloginpage(req, auth_message)
_ = gettext_set_language(ln)
if not string:
infos = []
infos.append(_('Empty string.') + ' ' + _('Please, try again.'))
body = bc_templates.tmpl_search_vendor_step1(infos=infos,
ln=ln)
elif string == '*':
result = db.get_all_vendors()
body = bc_templates.tmpl_search_vendor_step2(result=result, ln=ln)
else:
if column == 'name':
result = db.search_vendor_by_name(string)
else:
result = db.search_vendor_by_email(string)
body = bc_templates.tmpl_search_vendor_step2(result=result, ln=ln)
navtrail_previous_links = '<a class="navtrail" ' \
'href="%s/help/admin">Admin Area' \
'</a>' % (CFG_SITE_SECURE_URL,)
return page(title=_("Search vendor"),
uid=id_user,
req=req,
body=body, language=ln,
navtrail=navtrail_previous_links,
lastupdated=__lastupdated__)
diff --git a/invenio/legacy/bibdocfile/api.py b/invenio/legacy/bibdocfile/api.py
index efeac0953..aa58e2eb6 100644
--- a/invenio/legacy/bibdocfile/api.py
+++ b/invenio/legacy/bibdocfile/api.py
@@ -1,4843 +1,4843 @@
## This file is part of Invenio.
## Copyright (C) 2007, 2008, 2009, 2010, 2011, 2012, 2013 CERN.
##
## Invenio is free software; you can redistribute it and/or
## modify it under the terms of the GNU General Public License as
## published by the Free Software Foundation; either version 2 of the
## License, or (at your option) any later version.
##
## Invenio is distributed in the hope that it will be useful, but
## WITHOUT ANY WARRANTY; without even the implied warranty of
## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
## General Public License for more details.
##
## You should have received a copy of the GNU General Public License
## along with Invenio; if not, write to the Free Software Foundation, Inc.,
## 59 Temple Place, Suite 330, Boston, MA 02111-1307, USA.
"""
This module implements the low-level API for dealing with fulltext files.
- All the files associated to a I{record} (identified by a I{recid}) can be
managed via an instance of the C{BibRecDocs} class.
- A C{BibRecDocs} is a wrapper of the list of I{documents} attached to the
record.
- Each document is represented by an instance of the C{BibDoc} class.
- A document is identified by a C{docid} and name (C{docname}). The docname
must be unique within the record. A document is the set of all the
formats and revisions of a piece of information.
- A document has a type called C{doctype} and can have a restriction.
- Each physical file, i.e. the concretization of a document into a
particular I{version} and I{format} is represented by an instance of the
C{BibDocFile} class.
- The format is infact the extension of the physical file.
- A comment and a description and other information can be associated to a
BibDocFile.
- A C{bibdoc} is a synonim for a document, while a C{bibdocfile} is a
synonim for a physical file.
@group Main classes: BibRecDocs,BibDoc,BibDocFile
@group Other classes: BibDocMoreInfo,Md5Folder,InvenioBibDocFileError
@group Main functions: decompose_file,stream_file,bibdocfile_*,download_url
@group Configuration Variables: CFG_*
"""
__revision__ = "$Id$"
import os
import re
import shutil
import filecmp
import time
import random
import socket
import urllib2
import urllib
import tempfile
import cPickle
import base64
import binascii
import cgi
import sys
try:
import magic
if hasattr(magic, "open"):
CFG_HAS_MAGIC = 1
if not hasattr(magic, "MAGIC_MIME_TYPE"):
## Patching RHEL6/CentOS6 version
magic.MAGIC_MIME_TYPE = 16
elif hasattr(magic, "Magic"):
CFG_HAS_MAGIC = 2
except ImportError:
CFG_HAS_MAGIC = 0
from datetime import datetime
from mimetypes import MimeTypes
from thread import get_ident
from invenio.utils import apache
## Let's set a reasonable timeout for URL request (e.g. FFT)
socket.setdefaulttimeout(40)
if sys.hexversion < 0x2040000:
# pylint: disable=W0622
from sets import Set as set
# pylint: enable=W0622
from invenio.shellutils import escape_shell_arg
from invenio.dbquery import run_sql, DatabaseError
-from invenio.errorlib import register_exception
+from invenio.ext.logging import register_exception
from invenio.legacy.bibrecord import record_get_field_instances, \
field_get_subfield_values, field_get_subfield_instances, \
encode_for_xml
from invenio.utils.url import create_url, make_user_agent_string
from invenio.utils.text import nice_size
from invenio.access_control_engine import acc_authorize_action
from invenio.access_control_admin import acc_is_user_in_role, acc_get_role_id
from invenio.access_control_firerole import compile_role_definition, acc_firerole_check_user
from invenio.access_control_config import SUPERADMINROLE, CFG_WEBACCESS_WARNING_MSGS
from invenio.config import CFG_SITE_URL, \
CFG_WEBDIR, CFG_BIBDOCFILE_FILEDIR,\
CFG_BIBDOCFILE_ADDITIONAL_KNOWN_FILE_EXTENSIONS, \
CFG_BIBDOCFILE_FILESYSTEM_BIBDOC_GROUP_LIMIT, CFG_SITE_SECURE_URL, \
CFG_BIBUPLOAD_FFT_ALLOWED_LOCAL_PATHS, \
CFG_TMPDIR, CFG_TMPSHAREDDIR, CFG_PATH_MD5SUM, \
CFG_WEBSUBMIT_STORAGEDIR, \
CFG_BIBDOCFILE_USE_XSENDFILE, \
CFG_BIBDOCFILE_MD5_CHECK_PROBABILITY, \
CFG_SITE_RECORD, CFG_PYLIBDIR, \
CFG_BIBUPLOAD_FFT_ALLOWED_EXTERNAL_URLS, \
CFG_BIBDOCFILE_ENABLE_BIBDOCFSINFO_CACHE, \
CFG_BIBINDEX_PERFORM_OCR_ON_DOCNAMES, \
CFG_BIBDOCFILE_ADDITIONAL_KNOWN_MIMETYPES
from invenio.bibdocfile_config import CFG_BIBDOCFILE_ICON_SUBFORMAT_RE, \
CFG_BIBDOCFILE_DEFAULT_ICON_SUBFORMAT
from invenio.base.utils import import_submodules_from_packages
from invenio.utils.hash import md5
import invenio.template
bibdocfile_templates = invenio.template.load('bibdocfile')
## The above flag controls whether HTTP range requests are supported or not
## when serving static files via Python. This is disabled by default as
## it currently breaks support for opening PDF files on Windows platforms
## using Acrobat reader brower plugin.
CFG_ENABLE_HTTP_RANGE_REQUESTS = False
#: block size when performing I/O.
CFG_BIBDOCFILE_BLOCK_SIZE = 1024 * 8
#: threshold used do decide when to use Python MD5 of CLI MD5 algorithm.
CFG_BIBDOCFILE_MD5_THRESHOLD = 256 * 1024
#: chunks loaded by the Python MD5 algorithm.
CFG_BIBDOCFILE_MD5_BUFFER = 1024 * 1024
#: whether to normalize e.g. ".JPEG" and ".jpg" into .jpeg.
CFG_BIBDOCFILE_STRONG_FORMAT_NORMALIZATION = False
#: flags that can be associated to files.
CFG_BIBDOCFILE_AVAILABLE_FLAGS = (
'PDF/A',
'STAMPED',
'PDFOPT',
'HIDDEN',
'CONVERTED',
'PERFORM_HIDE_PREVIOUS',
'OCRED'
)
DBG_LOG_QUERIES = False
#: constant used if FFT correct with the obvious meaning.
KEEP_OLD_VALUE = 'KEEP-OLD-VALUE'
_CFG_BIBUPLOAD_FFT_ALLOWED_EXTERNAL_URLS = [(re.compile(_regex), _headers)
for _regex, _headers in CFG_BIBUPLOAD_FFT_ALLOWED_EXTERNAL_URLS]
_mimes = MimeTypes(strict=False)
_mimes.suffix_map.update({'.tbz2' : '.tar.bz2'})
_mimes.encodings_map.update({'.bz2' : 'bzip2'})
if CFG_BIBDOCFILE_ADDITIONAL_KNOWN_MIMETYPES:
for key, value in CFG_BIBDOCFILE_ADDITIONAL_KNOWN_MIMETYPES.iteritems():
_mimes.add_type(key, value)
del key, value
_magic_cookies = {}
if CFG_HAS_MAGIC == 1:
def _get_magic_cookies():
"""
@return: a tuple of magic object.
@rtype: (MAGIC_NONE, MAGIC_COMPRESS, MAGIC_MIME, MAGIC_COMPRESS + MAGIC_MIME)
@note: ... not real magic. Just see: man file(1)
"""
thread_id = get_ident()
if thread_id not in _magic_cookies:
_magic_cookies[thread_id] = {
magic.MAGIC_NONE: magic.open(magic.MAGIC_NONE),
magic.MAGIC_COMPRESS: magic.open(magic.MAGIC_COMPRESS),
magic.MAGIC_MIME: magic.open(magic.MAGIC_MIME),
magic.MAGIC_COMPRESS + magic.MAGIC_MIME: magic.open(magic.MAGIC_COMPRESS + magic.MAGIC_MIME),
magic.MAGIC_MIME_TYPE: magic.open(magic.MAGIC_MIME_TYPE),
}
for key in _magic_cookies[thread_id].keys():
_magic_cookies[thread_id][key].load()
return _magic_cookies[thread_id]
elif CFG_HAS_MAGIC == 2:
def _magic_wrapper(local_path, mime=True, mime_encoding=False):
thread_id = get_ident()
if (thread_id, mime, mime_encoding) not in _magic_cookies:
magic_object = _magic_cookies[thread_id, mime, mime_encoding] = magic.Magic(mime=mime, mime_encoding=mime_encoding)
else:
magic_object = _magic_cookies[thread_id, mime, mime_encoding]
return magic_object.from_file(local_path) # pylint: disable=E1103
def _generate_extensions():
"""
Generate the regular expression to match all the known extensions.
@return: the regular expression.
@rtype: regular expression object
"""
_tmp_extensions = _mimes.encodings_map.keys() + \
_mimes.suffix_map.keys() + \
_mimes.types_map[1].keys() + \
CFG_BIBDOCFILE_ADDITIONAL_KNOWN_FILE_EXTENSIONS
extensions = []
for ext in _tmp_extensions:
if ext.startswith('.'):
extensions.append(ext)
else:
extensions.append('.' + ext)
extensions.sort()
extensions.reverse()
extensions = set([ext.lower() for ext in extensions])
extensions = '\\' + '$|\\'.join(extensions) + '$'
extensions = extensions.replace('+', '\\+')
return re.compile(extensions, re.I)
#: Regular expression to recognized extensions.
_extensions = _generate_extensions()
class InvenioBibDocFileError(Exception):
"""
Exception raised in case of errors related to fulltext files.
"""
pass
class InvenioBibdocfileUnauthorizedURL(InvenioBibDocFileError):
"""
Exception raised in case of errors related to fulltext files.
"""
## NOTE: this is a legacy Exception
pass
def _val_or_null(val, eq_name = None, q_str = None, q_args = None):
"""
Auxiliary function helpful while building WHERE clauses of SQL queries
that should contain field=val or field is val
If optional parameters q_str and q_args are provided, lists are updated
if val == None, a statement of the form "eq_name is Null" is returned
otherwise, otherwise the function returns a parametrised comparison
"eq_name=%s" with val as an argument added to the query args list.
Using parametrised queries diminishes the likelihood of having
SQL injection.
@param val Value to compare with
@type val
@param eq_name The name of the database column
@type eq_name string
@param q_str Query string builder - list of clauses
that should be connected by AND operator
@type q_str list
@param q_args Query arguments list. This list will be applied as
a second argument of run_sql command
@type q_args list
@result string of a single part of WHERE clause
@rtype string
"""
res = ""
if eq_name != None:
res += eq_name
if val == None:
if eq_name != None:
res += " is "
res += "NULL"
if q_str != None:
q_str.append(res)
return res
else:
if eq_name != None:
res += "="
res += "%s"
if q_str != None:
q_str.append(res)
if q_args != None:
q_args.append(str(val))
return res
def _sql_generate_conjunctive_where(to_process):
"""Generating WHERE clause of a SQL statement, consisting of conjunction
of declared terms. Terms are defined by the to_process argument.
the method creates appropriate entries different in the case, value
should be NULL (None in the list) and in the case of not-none arguments.
In the second case, parametrised query is generated decreasing the
chance of an SQL-injection.
@param to_process List of tuples (value, database_column)
@type to_process list"""
q_str = []
q_args = []
for entry in to_process:
q_str.append(_val_or_null(entry[0], eq_name = entry[1], q_args = q_args))
return (" AND ".join(q_str), q_args)
def file_strip_ext(afile, skip_version=False, only_known_extensions=False, allow_subformat=True):
"""
Strip in the best way the extension from a filename.
>>> file_strip_ext("foo.tar.gz")
'foo'
>>> file_strip_ext("foo.buz.gz")
'foo.buz'
>>> file_strip_ext("foo.buz")
'foo'
>>> file_strip_ext("foo.buz", only_known_extensions=True)
'foo.buz'
>>> file_strip_ext("foo.buz;1", skip_version=False,
... only_known_extensions=True)
'foo.buz;1'
>>> file_strip_ext("foo.gif;icon")
'foo'
>>> file_strip_ext("foo.gif:icon", allow_subformat=False)
'foo.gif:icon'
@param afile: the path/name of a file.
@type afile: string
@param skip_version: whether to skip a trailing ";version".
@type skip_version: bool
@param only_known_extensions: whether to strip out only known extensions or
to consider as extension anything that follows a dot.
@type only_known_extensions: bool
@param allow_subformat: whether to consider also subformats as part of
the extension.
@type allow_subformat: bool
@return: the name/path without the extension (and version).
@rtype: string
"""
if skip_version or allow_subformat:
afile = afile.split(';')[0]
nextfile = _extensions.sub('', afile)
if nextfile == afile and not only_known_extensions:
nextfile = os.path.splitext(afile)[0]
while nextfile != afile:
afile = nextfile
nextfile = _extensions.sub('', afile)
return nextfile
def normalize_format(docformat, allow_subformat=True):
"""
Normalize the format, e.g. by adding a dot in front.
@param format: the format/extension to be normalized.
@type format: string
@param allow_subformat: whether to consider also subformats as part of
the extension.
@type allow_subformat: bool
@return: the normalized format.
@rtype; string
"""
if allow_subformat:
subformat = docformat[docformat.rfind(';'):]
docformat = docformat[:docformat.rfind(';')]
else:
subformat = ''
if docformat and docformat[0] != '.':
docformat = '.' + docformat
if CFG_BIBDOCFILE_STRONG_FORMAT_NORMALIZATION:
if docformat not in ('.Z', '.H', '.C', '.CC'):
docformat = docformat.lower()
docformat = {
'.jpg' : '.jpeg',
'.htm' : '.html',
'.tif' : '.tiff'
}.get(docformat, docformat)
return docformat + subformat
def guess_format_from_url(url):
"""
Given a URL tries to guess it's extension.
Different method will be used, including HTTP HEAD query,
downloading the resource and using mime
@param url: the URL for which the extension should be guessed.
@type url: string
@return: the recognized extension or '.bin' if it's impossible to
recognize it.
@rtype: string
"""
def guess_via_magic(local_path):
try:
if CFG_HAS_MAGIC == 1:
magic_cookie = _get_magic_cookies()[magic.MAGIC_MIME_TYPE]
mimetype = magic_cookie.file(local_path)
elif CFG_HAS_MAGIC == 2:
mimetype = _magic_wrapper(local_path, mime=True, mime_encoding=False)
if CFG_HAS_MAGIC:
ext = _mimes.guess_extension(mimetype)
if ext:
## Normalize some common magic mis-interpreation
ext = {'.asc': '.txt', '.obj': '.bin'}.get(ext, ext)
return normalize_format(ext)
except Exception:
pass
## Let's try to guess the extension by considering the URL as a filename
ext = decompose_file(url, skip_version=True, only_known_extensions=True)[2]
if ext.startswith('.'):
return ext
if is_url_a_local_file(url):
## The URL corresponds to a local file, so we can safely consider
## traditional extensions after the dot.
ext = decompose_file(url, skip_version=True, only_known_extensions=False)[2]
if ext.startswith('.'):
return ext
## No extensions? Let's use Magic.
ext = guess_via_magic(url)
if ext:
return ext
else:
## Since the URL is remote, let's try to perform a HEAD request
## and see the corresponding headers
try:
response = open_url(url, head_request=True)
except (InvenioBibdocfileUnauthorizedURL, urllib2.URLError):
return ".bin"
ext = get_format_from_http_response(response)
if ext:
return ext
if CFG_HAS_MAGIC:
## Last solution: let's download the remote resource
## and use the Python magic library to guess the extension
filename = ""
try:
try:
filename = download_url(url, docformat='')
ext = guess_via_magic(filename)
if ext:
return ext
except Exception:
pass
finally:
if os.path.exists(filename):
## Let's free space
os.remove(filename)
return ".bin"
_docname_re = re.compile(r'[^-\w.]*')
def normalize_docname(docname):
"""
Normalize the docname.
At the moment the normalization is just returning the same string.
@param docname: the docname to be normalized.
@type docname: string
@return: the normalized docname.
@rtype: string
"""
#return _docname_re.sub('', docname)
return docname
def normalize_version(version):
"""
Normalize the version.
The version can be either an integer or the keyword 'all'. Any other
value will be transformed into the empty string.
@param version: the version (either a number or 'all').
@type version: integer or string
@return: the normalized version.
@rtype: string
"""
try:
int(version)
except ValueError:
if version.lower().strip() == 'all':
return 'all'
else:
return ''
return str(version)
def compose_file(dirname, extension, subformat=None, version=None, storagename=None):
"""
Construct back a fullpath given the separate components.
@param
@param storagename Name under which the file should be stored in the filesystem
@type storagename string
@return a fullpath to the file
@rtype string
"""
if version:
version = ";%i" % int(version)
else:
version = ""
if subformat:
if not subformat.startswith(";"):
subformat = ";%s" % subformat
else:
subformat = ""
if extension and not extension.startswith("."):
extension = ".%s" % extension
if not storagename:
storagename = "content"
return os.path.join(dirname, storagename + extension + subformat + version)
def compose_format(extension, subformat=None):
"""
Construct the format string
"""
if not extension.startswith("."):
extension = ".%s" % extension
if subformat:
if not subformat.startswith(";"):
subformat = ";%s" % subformat
else:
subformat = ""
return extension + subformat
def decompose_file(afile, skip_version=False, only_known_extensions=False,
allow_subformat=True):
"""
Decompose a file/path into its components dirname, basename and extension.
>>> decompose_file('/tmp/foo.tar.gz')
('/tmp', 'foo', '.tar.gz')
>>> decompose_file('/tmp/foo.tar.gz;1', skip_version=True)
('/tmp', 'foo', '.tar.gz')
>>> decompose_file('http://www.google.com/index.html')
('http://www.google.com', 'index', '.html')
@param afile: the path/name of a file.
@type afile: string
@param skip_version: whether to skip a trailing ";version".
@type skip_version: bool
@param only_known_extensions: whether to strip out only known extensions or
to consider as extension anything that follows a dot.
@type only_known_extensions: bool
@param allow_subformat: whether to consider also subformats as part of
the extension.
@type allow_subformat: bool
@return: a tuple with the directory name, the basename and extension.
@rtype: (dirname, basename, extension)
@note: if a URL is provided, the scheme will be part of the dirname.
@see: L{file_strip_ext} for the algorithm used to retrieve the extension.
"""
if skip_version:
version = afile.split(';')[-1]
try:
int(version)
afile = afile[:-len(version)-1]
except ValueError:
pass
basename = os.path.basename(afile)
dirname = afile[:-len(basename)-1]
base = file_strip_ext(
basename,
only_known_extensions=only_known_extensions,
allow_subformat=allow_subformat)
extension = basename[len(base) + 1:]
if extension:
extension = '.' + extension
return (dirname, base, extension)
def decompose_file_with_version(afile):
"""
Decompose a file into dirname, basename, extension and version.
>>> decompose_file_with_version('/tmp/foo.tar.gz;1')
('/tmp', 'foo', '.tar.gz', 1)
@param afile: the path/name of a file.
@type afile: string
@return: a tuple with the directory name, the basename, extension and
version.
@rtype: (dirname, basename, extension, version)
@raise ValueError: in case version does not exist it will.
@note: if a URL is provided, the scheme will be part of the dirname.
"""
version_str = afile.split(';')[-1]
version = int(version_str)
afile = afile[:-len(version_str)-1]
basename = os.path.basename(afile)
dirname = afile[:-len(basename)-1]
base = file_strip_ext(basename)
extension = basename[len(base) + 1:]
if extension:
extension = '.' + extension
return (dirname, base, extension, version)
def get_subformat_from_format(docformat):
"""
@return the subformat if any.
@rtype: string
>>> get_subformat_from_format('foo;bar')
'bar'
>>> get_subformat_from_format('foo')
''
"""
try:
return docformat[docformat.rindex(';') + 1:]
except ValueError:
return ''
def get_superformat_from_format(docformat):
"""
@return the superformat if any.
@rtype: string
>>> get_superformat_from_format('foo;bar')
'foo'
>>> get_superformat_from_format('foo')
'foo'
"""
try:
return docformat[:docformat.rindex(';')]
except ValueError:
return docformat
def propose_next_docname(docname):
"""
Given a I{docname}, suggest a new I{docname} (useful when trying to generate
a unique I{docname}).
>>> propose_next_docname('foo')
'foo_1'
>>> propose_next_docname('foo_1')
'foo_2'
>>> propose_next_docname('foo_10')
'foo_11'
@param docname: the base docname.
@type docname: string
@return: the next possible docname based on the given one.
@rtype: string
"""
if '_' in docname:
split_docname = docname.split('_')
try:
split_docname[-1] = str(int(split_docname[-1]) + 1)
docname = '_'.join(split_docname)
except ValueError:
docname += '_1'
else:
docname += '_1'
return docname
class BibRecDocs(object):
"""
This class represents all the files attached to one record.
@param recid: the record identifier.
@type recid: integer
@param deleted_too: whether to consider deleted documents as normal
documents (useful when trying to recover deleted information).
@type deleted_too: bool
@param human_readable: whether numbers should be printed in human readable
format (e.g. 2048 bytes -> 2Kb)
@ivar id: the record identifier as passed to the constructor.
@type id: integer
@ivar human_readable: the human_readable flag as passed to the constructor.
@type human_readable: bool
@ivar deleted_too: the deleted_too flag as passed to the constructor.
@type deleted_too: bool
@ivar bibdocs: the list of documents attached to the record.
@type bibdocs: list of BibDoc
"""
def __init__(self, recid, deleted_too=False, human_readable=False):
try:
self.id = int(recid)
except ValueError:
raise ValueError("BibRecDocs: recid is %s but must be an integer." % repr(recid))
self.human_readable = human_readable
self.deleted_too = deleted_too
self.bibdocs = {}
self.attachment_types = {} # dictionary docname->attachment type
self.build_bibdoc_list()
def __repr__(self):
"""
@return: the canonical string representation of the C{BibRecDocs}.
@rtype: string
"""
return 'BibRecDocs(%s%s%s)' % (self.id,
self.deleted_too and ', True' or '',
self.human_readable and ', True' or ''
)
def __str__(self):
"""
@return: an easy to be I{grepped} string representation of the
whole C{BibRecDocs} content.
@rtype: string
"""
out = '%i::::total bibdocs attached=%i\n' % (self.id, len(self.bibdocs))
out += '%i::::total size latest version=%s\n' % (self.id, nice_size(self.get_total_size_latest_version()))
out += '%i::::total size all files=%s\n' % (self.id, nice_size(self.get_total_size()))
for (docname, (bibdoc, dummy)) in self.bibdocs.items():
out += str(docname) + ":" + str(bibdoc)
return out
def empty_p(self):
"""
@return: True when the record has no attached documents.
@rtype: bool
"""
return len(self.bibdocs) == 0
def deleted_p(self):
"""
@return: True if the correxsponding record has been deleted.
@rtype: bool
"""
from invenio.search_engine import record_exists
return record_exists(self.id) == -1
def get_xml_8564(self):
"""
Return a snippet of I{MARCXML} representing the I{8564} fields
corresponding to the current state.
@return: the MARCXML representation.
@rtype: string
"""
from invenio.search_engine import get_record
out = ''
record = get_record(self.id)
fields = record_get_field_instances(record, '856', '4', ' ')
for field in fields:
urls = field_get_subfield_values(field, 'u')
if urls and not bibdocfile_url_p(urls[0]):
out += '\t<datafield tag="856" ind1="4" ind2=" ">\n'
for subfield, value in field_get_subfield_instances(field):
out += '\t\t<subfield code="%s">%s</subfield>\n' % (subfield, encode_for_xml(value))
out += '\t</datafield>\n'
for afile in self.list_latest_files(list_hidden=False):
out += '\t<datafield tag="856" ind1="4" ind2=" ">\n'
url = afile.get_url()
description = afile.get_description()
comment = afile.get_comment()
if url:
out += '\t\t<subfield code="u">%s</subfield>\n' % encode_for_xml(url)
if description:
out += '\t\t<subfield code="y">%s</subfield>\n' % encode_for_xml(description)
if comment:
out += '\t\t<subfield code="z">%s</subfield>\n' % encode_for_xml(comment)
out += '\t</datafield>\n'
return out
def get_total_size_latest_version(self):
"""
Returns the total size used on disk by all the files belonging
to this record and corresponding to the latest version.
@return: the total size.
@rtype: integer
"""
size = 0
for (bibdoc, _) in self.bibdocs.values():
size += bibdoc.get_total_size_latest_version()
return size
def get_total_size(self):
"""
Return the total size used on disk of all the files belonging
to this record of any version (not only the last as in
L{get_total_size_latest_version}).
@return: the total size.
@rtype: integer
"""
size = 0
for (bibdoc, _) in self.bibdocs.values():
size += bibdoc.get_total_size()
return size
def build_bibdoc_list(self):
"""
This method must be called everytime a I{bibdoc} is added, removed or
modified.
"""
self.bibdocs = {}
if self.deleted_too:
res = run_sql("""SELECT brbd.id_bibdoc, brbd.docname, brbd.type FROM bibrec_bibdoc as brbd JOIN
bibdoc as bd ON bd.id=brbd.id_bibdoc WHERE brbd.id_bibrec=%s
ORDER BY brbd.docname ASC""", (self.id,))
else:
res = run_sql("""SELECT brbd.id_bibdoc, brbd.docname, brbd.type FROM bibrec_bibdoc as brbd JOIN
bibdoc as bd ON bd.id=brbd.id_bibdoc WHERE brbd.id_bibrec=%s AND
bd.status<>'DELETED' ORDER BY brbd.docname ASC""", (self.id,))
for row in res:
cur_doc = BibDoc.create_instance(docid=row[0], recid=self.id,
human_readable=self.human_readable)
self.bibdocs[row[1]] = (cur_doc, row[2])
def list_bibdocs_by_names(self, doctype=None):
"""
Returns the dictionary of all bibdocs object belonging to a recid.
Keys in the dictionary are names of documetns and values are BibDoc objects.
If C{doctype} is set, it returns just the bibdocs of that doctype.
@param doctype: the optional doctype.
@type doctype: string
@return: the dictionary of bibdocs.
@rtype: dictionary of Dcname -> BibDoc
"""
if not doctype:
return dict((k,v) for (k,(v,_)) in self.bibdocs.iteritems())
res = {}
for (docname, (doc, attachmenttype)) in self.bibdocs.iteritems():
if attachmenttype == doctype:
res[docname] = doc
return res
def list_bibdocs(self, doctype=None):
"""
Returns the list all bibdocs object belonging to a recid.
If C{doctype} is set, it returns just the bibdocs of that doctype.
@param doctype: the optional doctype.
@type doctype: string
@return: the list of bibdocs.
@rtype: list of BibDoc
"""
if not doctype:
return [d for (d,_) in self.bibdocs.values()]
else:
return [bibdoc for (bibdoc, attype) in self.bibdocs.values() if doctype == attype]
def get_bibdoc_names(self, doctype=None):
"""
Returns all the names of the documents associated with the bibrec.
If C{doctype} is set, restrict the result to all the matching doctype.
@param doctype: the optional doctype.
@type doctype: string
@return: the list of document names.
@rtype: list of string
"""
return [docname for (docname, dummy) in self.list_bibdocs_by_names(doctype).items()]
def check_file_exists(self, path, f_format):
"""
Check if a file with the same content of the file pointed in C{path}
is already attached to this record.
@param path: the file to be checked against.
@type path: string
@return: True if a file with the requested content is already attached
to the record.
@rtype: bool
"""
size = os.path.getsize(path)
# Let's consider all the latest files
files = self.list_latest_files()
# Let's consider all the latest files with same size
potential = [afile for afile in files if afile.get_size() == size and afile.format == f_format]
if potential:
checksum = calculate_md5(path)
# Let's consider all the latest files with the same size and the
# same checksum
potential = [afile for afile in potential if afile.get_checksum() == checksum]
if potential:
potential = [afile for afile in potential if \
filecmp.cmp(afile.get_full_path(), path)]
if potential:
return True
else:
# Gosh! How unlucky, same size, same checksum but not same
# content!
pass
return False
def propose_unique_docname(self, docname):
"""
Given C{docname}, return a new docname that is not already attached to
the record.
@param docname: the reference docname.
@type docname: string
@return: a docname not already attached.
@rtype: string
"""
docname = normalize_docname(docname)
goodname = docname
i = 1
while goodname in self.get_bibdoc_names():
i += 1
goodname = "%s_%s" % (docname, i)
return goodname
def merge_bibdocs(self, docname1, docname2):
"""
This method merge C{docname2} into C{docname1}.
1. Given all the formats of the latest version of the files
attached to C{docname2}, these files are added as new formats
into C{docname1}.
2. C{docname2} is marked as deleted.
@raise InvenioBibDocFileError: if at least one format in C{docname2}
already exists in C{docname1}. (In this case the two bibdocs are
preserved)
@note: comments and descriptions are also copied.
@note: if C{docname2} has a I{restriction}(i.e. if the I{status} is
set) and C{docname1} doesn't, the restriction is imported.
"""
bibdoc1 = self.get_bibdoc(docname1)
bibdoc2 = self.get_bibdoc(docname2)
## Check for possibility
for bibdocfile in bibdoc2.list_latest_files():
docformat = bibdocfile.get_format()
if bibdoc1.format_already_exists_p(docformat):
raise InvenioBibDocFileError('Format %s already exists in bibdoc %s of record %s. It\'s impossible to merge bibdoc %s into it.' % (docformat, docname1, self.id, docname2))
## Importing restriction if needed.
restriction1 = bibdoc1.get_status()
restriction2 = bibdoc2.get_status()
if restriction2 and not restriction1:
bibdoc1.set_status(restriction2)
## Importing formats
for bibdocfile in bibdoc2.list_latest_files():
docformat = bibdocfile.get_format()
comment = bibdocfile.get_comment()
description = bibdocfile.get_description()
bibdoc1.add_file_new_format(bibdocfile.get_full_path(),
description=description,
comment=comment, docformat=docformat)
## Finally deleting old bibdoc2
bibdoc2.delete()
self.build_bibdoc_list()
def get_docid(self, docname):
"""
@param docname: the document name.
@type docname: string
@return: the identifier corresponding to the given C{docname}.
@rtype: integer
@raise InvenioBibDocFileError: if the C{docname} does not
corresponds to a document attached to this record.
"""
if docname in self.bibdocs:
return self.bibdocs[docname][0].id
raise InvenioBibDocFileError, "Recid '%s' is not connected with a " \
"docname '%s'" % (self.id, docname)
def get_docname(self, docid):
"""
@param docid: the document identifier.
@type docid: integer
@return: the name of the document corresponding to the given document
identifier.
@rtype: string
@raise InvenioBibDocFileError: if the C{docid} does not
corresponds to a document attached to this record.
"""
for (docname, (bibdoc, _)) in self.bibdocs.items():
if bibdoc.id == docid:
return docname
raise InvenioBibDocFileError, "Recid '%s' is not connected with a " \
"docid '%s'" % (self.id, docid)
def change_name(self, newname, oldname=None, docid=None):
"""
Renames document of a given name.
@param newname: the new name.
@type newname: string
@raise InvenioBibDocFileError: if the new name corresponds to
a document already attached to the record owning this document.
"""
if not oldname and not docid:
raise StandardError("Trying to rename unspecified document")
if not oldname:
oldname = self.get_docname(docid)
if not docid:
docid = self.get_docid(oldname)
doc, atttype = self.bibdocs[oldname]
try:
newname = normalize_docname(newname)
res = run_sql("SELECT id_bibdoc FROM bibrec_bibdoc WHERE id_bibrec=%s AND docname=%s", (self.id, newname))
if res:
raise InvenioBibDocFileError, "A bibdoc called %s already exists for recid %s" % (newname, self.id)
run_sql("update bibrec_bibdoc set docname=%s where id_bibdoc=%s and id_bibrec=%s", (newname, docid, self.id))
finally:
# updating the document
for a in doc.bibrec_links:
if a["recid"] == self.id:
a["docname"] = newname
# updating the record structure
del self.bibdocs[oldname]
self.bibdocs[newname] = (doc, atttype)
def has_docname_p(self, docname):
"""
@param docname: the document name,
@type docname: string
@return: True if a document with the given name is attached to this
record.
@rtype: bool
"""
return docname in self.bibdocs.keys()
def get_bibdoc(self, docname):
"""
@return: the bibdoc with a particular docname associated with
this recid"""
if docname in self.bibdocs:
return self.bibdocs[docname][0]
raise InvenioBibDocFileError, "Recid '%s' is not connected with " \
" docname '%s'" % (self.id, docname)
def delete_bibdoc(self, docname):
"""
Deletes the document with the specified I{docname}.
@param docname: the document name.
@type docname: string
"""
if docname in self.bibdocs:
self.bibdocs[docname][0].delete()
self.build_bibdoc_list()
def add_bibdoc(self, doctype="Main", docname='file', never_fail=False):
"""
Add a new empty document object (a I{bibdoc}) to the list of
documents of this record.
@param doctype: the document type.
@type doctype: string
@param docname: the document name.
@type docname: string
@param never_fail: if True, this procedure will not fail, even if
a document with the given name is already attached to this
record. In this case a new name will be generated (see
L{propose_unique_docname}).
@type never_fail: bool
@return: the newly created document object.
@rtype: BibDoc
@raise InvenioBibDocFileError: in case of any error.
"""
try:
docname = normalize_docname(docname)
if never_fail:
docname = self.propose_unique_docname(docname)
if docname in self.get_bibdoc_names():
raise InvenioBibDocFileError, \
"%s has already a bibdoc with docname %s" % (self.id, docname)
else:
bibdoc = BibDoc.create_instance(recid=self.id, doctype=doctype,
docname=docname,
human_readable=self.human_readable)
self.build_bibdoc_list()
return bibdoc
except Exception, e:
register_exception()
raise InvenioBibDocFileError(str(e))
def add_new_file(self, fullpath, doctype="Main", docname=None,
never_fail=False, description=None, comment=None,
docformat=None, flags=None, modification_date=None):
"""
Directly add a new file to this record.
Adds a new file with the following policy:
- if the C{docname} is not set it is retrieved from the name of the
file.
- If a bibdoc with the given docname doesn't already exist, it is
created and the file is added to it.
- It it exist but it doesn't contain the format that is being
added, the new format is added.
- If the format already exists then if C{never_fail} is True a new
bibdoc is created with a similar name but with a progressive
number as a suffix and the file is added to it (see
L{propose_unique_docname}).
@param fullpath: the filesystme path of the document to be added.
@type fullpath: string
@param doctype: the type of the document.
@type doctype: string
@param docname: the document name.
@type docname: string
@param never_fail: if True, this procedure will not fail, even if
a document with the given name is already attached to this
record. In this case a new name will be generated (see
L{propose_unique_docname}).
@type never_fail: bool
@param description: an optional description of the file.
@type description: string
@param comment: an optional comment to the file.
@type comment: string
@param format: the extension of the file. If not specified it will
be guessed (see L{guess_format_from_url}).
@type format: string
@param flags: a set of flags to be associated with the file (see
L{CFG_BIBDOCFILE_AVAILABLE_FLAGS})
@type flags: list of string
@return: the elaborated document object.
@rtype: BibDoc
@raise InvenioBibDocFileError: in case of error.
"""
if docname is None:
docname = decompose_file(fullpath)[1]
if docformat is None:
docformat = decompose_file(fullpath)[2]
docname = normalize_docname(docname)
try:
bibdoc = self.get_bibdoc(docname)
except InvenioBibDocFileError:
# bibdoc doesn't already exists!
bibdoc = self.add_bibdoc(doctype, docname, False)
bibdoc.add_file_new_version(fullpath, description=description, comment=comment, docformat=docformat, flags=flags, modification_date=modification_date)
self.build_bibdoc_list()
else:
try:
bibdoc.add_file_new_format(fullpath, description=description, comment=comment, docformat=docformat, flags=flags, modification_date=modification_date)
self.build_bibdoc_list()
except InvenioBibDocFileError, dummy:
# Format already exist!
if never_fail:
bibdoc = self.add_bibdoc(doctype, docname, True)
bibdoc.add_file_new_version(fullpath, description=description, comment=comment, docformat=docformat, flags=flags, modification_date=modification_date)
self.build_bibdoc_list()
else:
raise
return bibdoc
def add_new_version(self, fullpath, docname=None, description=None, comment=None, docformat=None, flags=None):
"""
Adds a new file to an already existent document object as a new
version.
@param fullpath: the filesystem path of the file to be added.
@type fullpath: string
@param docname: the document name. If not specified it will be
extracted from C{fullpath} (see L{decompose_file}).
@type docname: string
@param description: an optional description for the file.
@type description: string
@param comment: an optional comment to the file.
@type comment: string
@param format: the extension of the file. If not specified it will
be guessed (see L{guess_format_from_url}).
@type format: string
@param flags: a set of flags to be associated with the file (see
L{CFG_BIBDOCFILE_AVAILABLE_FLAGS})
@type flags: list of string
@return: the elaborated document object.
@rtype: BibDoc
@raise InvenioBibDocFileError: in case of error.
@note: previous files associated with the same document will be
considered obsolete.
"""
if docname is None:
docname = decompose_file(fullpath)[1]
if docformat is None:
docformat = decompose_file(fullpath)[2]
if flags is None:
flags = []
if 'pdfa' in get_subformat_from_format(docformat).split(';') and not 'PDF/A' in flags:
flags.append('PDF/A')
bibdoc = self.get_bibdoc(docname=docname)
bibdoc.add_file_new_version(fullpath, description=description, comment=comment, docformat=docformat, flags=flags)
self.build_bibdoc_list()
return bibdoc
def add_new_format(self, fullpath, docname=None, description=None, comment=None, docformat=None, flags=None, modification_date=None):
"""
Adds a new file to an already existent document object as a new
format.
@param fullpath: the filesystem path of the file to be added.
@type fullpath: string
@param docname: the document name. If not specified it will be
extracted from C{fullpath} (see L{decompose_file}).
@type docname: string
@param description: an optional description for the file.
@type description: string
@param comment: an optional comment to the file.
@type comment: string
@param format: the extension of the file. If not specified it will
be guessed (see L{guess_format_from_url}).
@type format: string
@param flags: a set of flags to be associated with the file (see
L{CFG_BIBDOCFILE_AVAILABLE_FLAGS})
@type flags: list of string
@return: the elaborated document object.
@rtype: BibDoc
@raise InvenioBibDocFileError: in case the same format already
exists.
"""
if docname is None:
docname = decompose_file(fullpath)[1]
if docformat is None:
docformat = decompose_file(fullpath)[2]
if flags is None:
flags = []
if 'pdfa' in get_subformat_from_format(docformat).split(';') and not 'PDF/A' in flags:
flags.append('PDF/A')
bibdoc = self.get_bibdoc(docname=docname)
bibdoc.add_file_new_format(fullpath, description=description, comment=comment, docformat=docformat, flags=flags, modification_date=modification_date)
self.build_bibdoc_list()
return bibdoc
def list_latest_files(self, doctype='', list_hidden=True):
"""
Returns a list of the latest files.
@param doctype: if set, only document of the given type will be listed.
@type doctype: string
@param list_hidden: if True, will list also files with the C{HIDDEN}
flag being set.
@type list_hidden: bool
@return: the list of latest files.
@rtype: list of BibDocFile
"""
docfiles = []
for bibdoc in self.list_bibdocs(doctype):
docfiles += bibdoc.list_latest_files(list_hidden=list_hidden)
return docfiles
def fix(self, docname):
"""
Algorithm that transform a broken/old bibdoc into a coherent one.
Think of it as being the fsck of BibDocs.
- All the files in the bibdoc directory will be renamed according
to the document name. Proper .recid, .type, .md5 files will be
created/updated.
- In case of more than one file with the same format version a new
bibdoc will be created in order to put does files.
@param docname: the document name that need to be fixed.
@type docname: string
@return: the list of newly created bibdocs if any.
@rtype: list of BibDoc
@raise InvenioBibDocFileError: in case of issues that can not be
fixed automatically.
"""
bibdoc = self.get_bibdoc(docname)
versions = {}
res = []
new_bibdocs = [] # List of files with the same version/format of
# existing file which need new bibdoc.
counter = 0
zero_version_bug = False
if os.path.exists(bibdoc.basedir):
for filename in os.listdir(bibdoc.basedir):
if filename[0] != '.' and ';' in filename:
name, version = filename.split(';')
try:
version = int(version)
except ValueError:
# Strange name
register_exception()
raise InvenioBibDocFileError, "A file called %s exists under %s. This is not a valid name. After the ';' there must be an integer representing the file version. Please, manually fix this file either by renaming or by deleting it." % (filename, bibdoc.basedir)
if version == 0:
zero_version_bug = True
docformat = name[len(file_strip_ext(name)):]
docformat = normalize_format(docformat)
if not versions.has_key(version):
versions[version] = {}
new_name = 'FIXING-%s-%s' % (str(counter), name)
try:
shutil.move('%s/%s' % (bibdoc.basedir, filename), '%s/%s' % (bibdoc.basedir, new_name))
except Exception, e:
register_exception()
raise InvenioBibDocFileError, "Error in renaming '%s' to '%s': '%s'" % ('%s/%s' % (bibdoc.basedir, filename), '%s/%s' % (bibdoc.basedir, new_name), e)
if versions[version].has_key(docformat):
new_bibdocs.append((new_name, version))
else:
versions[version][docformat] = new_name
counter += 1
elif filename[0] != '.':
# Strange name
register_exception()
raise InvenioBibDocFileError, "A file called %s exists under %s. This is not a valid name. There should be a ';' followed by an integer representing the file version. Please, manually fix this file either by renaming or by deleting it." % (filename, bibdoc.basedir)
else:
# we create the corresponding storage directory
old_umask = os.umask(022)
os.makedirs(bibdoc.basedir)
# and save the father record id if it exists
try:
if self.id != "":
recid_fd = open("%s/.recid" % bibdoc.basedir, "w")
recid_fd.write(str(self.id))
recid_fd.close()
if bibdoc.doctype != "":
type_fd = open("%s/.type" % bibdoc.basedir, "w")
type_fd.write(str(bibdoc.doctype))
type_fd.close()
except Exception, e:
register_exception()
raise InvenioBibDocFileError, e
os.umask(old_umask)
if not versions:
bibdoc.delete()
else:
for version, formats in versions.iteritems():
if zero_version_bug:
version += 1
for docformat, filename in formats.iteritems():
destination = '%s%s;%i' % (docname, docformat, version)
try:
shutil.move('%s/%s' % (bibdoc.basedir, filename), '%s/%s' % (bibdoc.basedir, destination))
except Exception, e:
register_exception()
raise InvenioBibDocFileError, "Error in renaming '%s' to '%s': '%s'" % ('%s/%s' % (bibdoc.basedir, filename), '%s/%s' % (bibdoc.basedir, destination), e)
try:
recid_fd = open("%s/.recid" % bibdoc.basedir, "w")
recid_fd.write(str(self.id))
recid_fd.close()
type_fd = open("%s/.type" % bibdoc.basedir, "w")
type_fd.write(str(bibdoc.doctype))
type_fd.close()
except Exception, e:
register_exception()
raise InvenioBibDocFileError, "Error in creating .recid and .type file for '%s' folder: '%s'" % (bibdoc.basedir, e)
self.build_bibdoc_list()
res = []
for (filename, version) in new_bibdocs:
if zero_version_bug:
version += 1
new_bibdoc = self.add_bibdoc(doctype=bibdoc.doctype, docname=docname, never_fail=True)
new_bibdoc.add_file_new_format('%s/%s' % (bibdoc.basedir, filename), version)
res.append(new_bibdoc)
try:
os.remove('%s/%s' % (bibdoc.basedir, filename))
except Exception, e:
register_exception()
raise InvenioBibDocFileError, "Error in removing '%s': '%s'" % ('%s/%s' % (bibdoc.basedir, filename), e)
Md5Folder(bibdoc.basedir).update(only_new=False)
bibdoc._build_file_list()
self.build_bibdoc_list()
for (bibdoc, dummyatttype) in self.bibdocs.values():
if not run_sql('SELECT data_value FROM bibdocmoreinfo WHERE bibdocid=%s', (bibdoc.id,)):
## Import from MARC only if the bibdoc has never had
## its more_info initialized.
try:
bibdoc.import_descriptions_and_comments_from_marc()
except Exception, e:
register_exception()
raise InvenioBibDocFileError, "Error in importing description and comment from %s for record %s: %s" % (repr(bibdoc), self.id, e)
return res
def check_format(self, docname):
"""
Check for any format related issue.
In case L{CFG_BIBDOCFILE_ADDITIONAL_KNOWN_FILE_EXTENSIONS} is
altered or Python version changes, it might happen that a docname
contains files which are no more docname + .format ; version, simply
because the .format is now recognized (and it was not before, so
it was contained into the docname).
This algorithm verify if it is necessary to fix (seel L{fix_format}).
@param docname: the document name whose formats should be verified.
@type docname: string
@return: True if format is correct. False if a fix is needed.
@rtype: bool
@raise InvenioBibDocFileError: in case of any error.
"""
bibdoc = self.get_bibdoc(docname)
correct_docname = decompose_file(docname + '.pdf')[1]
if docname != correct_docname:
return False
for filename in os.listdir(bibdoc.basedir):
if not filename.startswith('.'):
try:
dummy, dummy, docformat, version = decompose_file_with_version(filename)
except Exception:
raise InvenioBibDocFileError('Incorrect filename "%s" for docname %s for recid %i' % (filename, docname, self.id))
if '%s%s;%i' % (correct_docname, docformat, version) != filename:
return False
return True
def check_duplicate_docnames(self):
"""
Check wethever the record is connected with at least tho documents
with the same name.
@return: True if everything is fine.
@rtype: bool
"""
docnames = set()
for docname in self.get_bibdoc_names():
if docname in docnames:
return False
else:
docnames.add(docname)
return True
def uniformize_bibdoc(self, docname):
"""
This algorithm correct wrong file name belonging to a bibdoc.
@param docname: the document name whose formats should be verified.
@type docname: string
"""
bibdoc = self.get_bibdoc(docname)
for filename in os.listdir(bibdoc.basedir):
if not filename.startswith('.'):
try:
dummy, dummy, docformat, version = decompose_file_with_version(filename)
except ValueError:
register_exception(alert_admin=True, prefix= "Strange file '%s' is stored in %s" % (filename, bibdoc.basedir))
else:
os.rename(os.path.join(bibdoc.basedir, filename), os.path.join(bibdoc.basedir, '%s%s;%i' % (docname, docformat, version)))
Md5Folder(bibdoc.basedir).update()
bibdoc.touch()
bibdoc._build_file_list('rename')
def fix_format(self, docname, skip_check=False):
"""
Fixes format related inconsistencies.
@param docname: the document name whose formats should be verified.
@type docname: string
@param skip_check: if True assume L{check_format} has already been
called and the need for fix has already been found.
If False, will implicitly call L{check_format} and skip fixing
if no error is found.
@type skip_check: bool
@return: in case merging two bibdocs is needed but it's not possible.
@rtype: bool
"""
if not skip_check:
if self.check_format(docname):
return True
bibdoc = self.get_bibdoc(docname)
correct_docname = decompose_file(docname + '.pdf')[1]
need_merge = False
if correct_docname != docname:
need_merge = self.has_docname_p(correct_docname)
if need_merge:
proposed_docname = self.propose_unique_docname(correct_docname)
run_sql('UPDATE bibdoc SET docname=%s WHERE id=%s', (proposed_docname, bibdoc.id))
self.build_bibdoc_list()
self.uniformize_bibdoc(proposed_docname)
try:
self.merge_bibdocs(docname, proposed_docname)
except InvenioBibDocFileError:
return False
else:
run_sql('UPDATE bibdoc SET docname=%s WHERE id=%s', (correct_docname, bibdoc.id))
self.build_bibdoc_list()
self.uniformize_bibdoc(correct_docname)
else:
self.uniformize_bibdoc(docname)
return True
def fix_duplicate_docnames(self, skip_check=False):
"""
Algotirthm to fix duplicate docnames.
If a record is connected with at least two bibdoc having the same
docname, the algorithm will try to merge them.
@param skip_check: if True assume L{check_duplicate_docnames} has
already been called and the need for fix has already been found.
If False, will implicitly call L{check_duplicate_docnames} and skip
fixing if no error is found.
@type skip_check: bool
"""
if not skip_check:
if self.check_duplicate_docnames():
return
docnames = set()
for bibdoc in self.list_bibdocs():
docname = self.get_docname(bibdoc.id)
if docname in docnames:
new_docname = self.propose_unique_docname(self.get_docname(bibdoc.id))
self.change_name(docid=bibdoc.id, newname=new_docname)
self.merge_bibdocs(docname, new_docname)
docnames.add(docname)
def get_text(self, extract_text_if_necessary=True):
"""
@return: concatenated texts of all bibdocs separated by " ": string
"""
texts = []
for bibdoc in self.list_bibdocs():
if hasattr(bibdoc, 'has_text'):
if extract_text_if_necessary and not bibdoc.has_text(require_up_to_date=True):
re_perform_ocr = re.compile(CFG_BIBINDEX_PERFORM_OCR_ON_DOCNAMES)
perform_ocr = bool(re_perform_ocr.match(bibdoc.get_docname()))
from invenio.bibtask import write_message
write_message("... will extract words from %s (docid: %s) %s" % (bibdoc.get_docname(), bibdoc.get_id(), perform_ocr and 'with OCR' or ''), verbose=2)
bibdoc.extract_text(perform_ocr=perform_ocr)
texts.append(bibdoc.get_text())
return " ".join(texts)
class BibDoc(object):
"""
This class represents one document (i.e. a set of files with different
formats and with versioning information that consitutes a piece of
information.
To instanciate a new document, the recid and the docname are mandatory.
To instanciate an already existing document, either the recid and docname
or the docid alone are sufficient to retrieve it.
@param docid: the document identifier.
@type docid: integer
@param recid: the record identifier of the record to which this document
belongs to. If the C{docid} is specified the C{recid} is automatically
retrieven from the database.
@type recid: integer
@param docname: the document name.
@type docname: string
@param doctype: the document type (used when instanciating a new document).
@type doctype: string
@param human_readable: whether sizes should be represented in a human
readable format.
@type human_readable: bool
@raise InvenioBibDocFileError: in case of error.
"""
@staticmethod
def create_new_document(doc_type = "Main", rec_links = []):
status = ''
doc_id = run_sql("INSERT INTO bibdoc (status, creation_date, modification_date, doctype) "
"values(%s,NOW(),NOW(), %s)", (status, doc_type))
if not doc_id:
raise InvenioBibDocFileError, "New docid cannot be created"
# creating the representation on disk ... preparing the directory
try:
BibDoc.prepare_basedir(doc_id)
except Exception, e:
run_sql('DELETE FROM bibdoc WHERE id=%s', (doc_id, ))
# run_sql('DELETE FROM bibrec_bibdoc WHERE id_bibdoc=%s', (doc_id, ))
register_exception(alert_admin=True)
raise InvenioBibDocFileError, e
# the object has been created: linking to bibliographical records
doc = BibDoc(doc_id)
for link in rec_links:
if "rec_id" in link and link["rec_id"]:
rec_id = link["rec_id"]
doc_name = normalize_docname(link["doc_name"])
a_type = link["a_type"]
doc.attach_to_record(rec_id, str(a_type), str(doc_name))
return doc_id
def __init__(self, docid, human_readable=False, initial_data=None):
"""Constructor of a bibdoc. At least the docid or the recid/docname
pair is needed.
specifying recid, docname and doctype without specifying docid results in
attaching newly created document to a record
"""
# docid is known, the document already exists
res2 = run_sql("SELECT id_bibrec, type, docname FROM bibrec_bibdoc WHERE id_bibdoc=%s", (docid,))
self.bibrec_types = [(r[0], r[1], r[2]) for r in res2 ] # just in case the result was behaving like tuples but was something else
if not res2:
# fake attachment
self.bibrec_types = [(0, None, "fake_name_for_unattached_document")]
if initial_data is None:
initial_data = BibDoc._retrieve_data(docid)
self.docfiles = []
self.__md5s = None
self.human_readable = human_readable
self.cd = initial_data["cd"] # creation date
self.md = initial_data["md"] # modification date
self.td = initial_data["td"] # text extraction date # should be moved from here !!!!
self.bibrec_links = initial_data["bibrec_links"]
self.id = initial_data["id"]
self.status = initial_data["status"]
self.basedir = initial_data["basedir"]
self.doctype = initial_data["doctype"]
self.storagename = initial_data["storagename"] # the old docname -> now used as a storage name for old records
self.more_info = BibDocMoreInfo(self.id)
self._build_file_list('init')
# link with related_files
self._build_related_file_list()
@staticmethod
def prepare_basedir(doc_id):
"""Prepares the directory serving as root of a BibDoc"""
basedir = _make_base_dir(doc_id)
# we create the corresponding storage directory
if not os.path.exists(basedir):
old_umask = os.umask(022)
os.makedirs(basedir)
os.umask(old_umask)
def _update_additional_info_files_p(self):
"""Update the hidden file in the document directory ... the file contains all links to records"""
try:
reclinks_fd = open("%s/.reclinks" % (self.basedir, ), "w")
reclinks_fd.write("RECID DOCNAME TYPE\n")
for link in self.bibrec_links:
reclinks_fd.write("%(recid)s %(docname)s %(doctype)s\n" % link)
reclinks_fd.close()
except Exception, e:
register_exception(alert_admin=True)
raise InvenioBibDocFileError, e
@staticmethod
def _retrieve_data(docid = None):
"""
Filling information about a document from the database entry
"""
container = {}
container["bibrec_links"] = []
container["id"] = docid
container["basedir"] = _make_base_dir(container["id"])
# retrieving links betwen records and documents
res = run_sql("SELECT id_bibrec, type, docname FROM bibrec_bibdoc WHERE id_bibdoc=%s", (str(docid),), 1)
if res:
for r in res:
container["bibrec_links"].append({"recid": r[0], "doctype": r[1], "docname": r[2]})
# gather the other information
res = run_sql("SELECT status, creation_date, modification_date, text_extraction_date, doctype, docname FROM bibdoc WHERE id=%s LIMIT 1", (docid,), 1)
if res:
container["status"] = res[0][0]
container["cd"] = res[0][1]
container["md"] = res[0][2]
container["td"] = res[0][3]
container["doctype"] = res[0][4]
container["storagename"] = res[0][5]
else:
# this bibdoc doesn't exist
raise InvenioBibDocFileError, "The docid %s does not exist." % docid
# retreiving all available formats
fprefix = container["storagename"] or "content"
container["extensions"] = [fname[len(fprefix):] for fname in filter(lambda x: x.startswith(fprefix),os.listdir(container["basedir"]))]
return container
@staticmethod
def create_instance(docid=None, recid=None, docname=None,
doctype='Fulltext', a_type = 'Main', human_readable=False):
"""
Parameters of an attachement to the record:
a_type, recid, docname
@param a_type Type of the attachment to the record (by default Main)
@type a_type String
@param doctype Type of the document itself (by default Fulltext)
@type doctype String
"""
# first try to retrieve existing record based on obtained data
data = None
extensions = []
if docid != None:
data = BibDoc._retrieve_data(docid)
doctype = data["doctype"]
extensions = data["extensions"]
# now check if the doctypype is supported by any particular plugin
def plugin_bldr(plugin_code):
"""Preparing the plugin dictionary structure"""
if not plugin_code.__name__.split('.')[-1].startswith('bom_'):
return
ret = {}
ret['create_instance'] = getattr(plugin_code, "create_instance", None)
ret['supports'] = getattr(plugin_code, "supports", None)
return ret
bibdoc_plugins = filter(None, map(
plugin_bldr, import_submodules_from_packages(
'bibdocfile_plugins', packages=['invenio'])))
# Loading an appropriate plugin (by default a generic BibDoc)
used_plugin = None
for plugin in bibdoc_plugins:
if plugin['supports'](doctype, extensions):
used_plugin = plugin
if not docid:
rec_links = []
if recid:
rec_links.append({"rec_id": recid, "doc_name" : docname, "a_type": a_type})
if used_plugin and 'create_new' in used_plugin:
docid = used_plugin['create_new'](doctype, rec_links)
else:
docid = BibDoc.create_new_document(doctype, rec_links)
if used_plugin:
return used_plugin['create_instance'](docid=docid,
human_readable=human_readable,
initial_data=data)
return BibDoc(docid=docid,
human_readable=human_readable,
initial_data=data)
# parameters can not be passed any more
@staticmethod
def _attach_to_record_p(doc_id, rec_id, a_type, docname):
"""Private core of a method attaching document of a given ID to a record
@param a_type Attachment type (a function in which the document appears in the document)
@type a_type String
"""
run_sql("INSERT INTO bibrec_bibdoc (id_bibrec, id_bibdoc, type, docname) VALUES (%s,%s,%s,%s)",
(str(rec_id), str(doc_id), a_type, docname))
def attach_to_record(self, recid, a_type, docname):
""" Attaches given document to a record given by its identifier.
@param recid The identifier of the record
@type recid Integer
@param a_type Function of a document in the record
@type a_type String
@param docname Name of a document inside of a record
@type docname String
"""
run_sql("INSERT INTO bibrec_bibdoc (id_bibrec, id_bibdoc, type, docname) VALUES (%s,%s,%s,%s)",
(str(recid), str(self.id), a_type, docname))
self._update_additional_info_files_p()
def __repr__(self):
"""
@return: the canonical string representation of the C{BibDoc}.
@rtype: string
"""
return 'BibDoc(%s, %s, %s)' % (repr(self.id), repr(self.doctype), repr(self.human_readable))
def format_recids(self):
"""Returns a string representation of related record ids"""
if len(self.bibrec_links) == 1:
return self.bibrec_links[0]["recid"]
return "[" + ",".join([str(el["recid"]) for el in self.bibrec_links]) + "]"
def __str__(self):
"""
@return: an easy to be I{grepped} string representation of the
whole C{BibDoc} content.
@rtype: string
"""
recids = self.format_recids()
out = '%s:%i:::doctype=%s\n' % (recids, self.id, self.doctype)
out += '%s:%i:::status=%s\n' % (recids, self.id, self.status)
out += '%s:%i:::basedir=%s\n' % (recids, self.id, self.basedir)
out += '%s:%i:::creation date=%s\n' % (recids, self.id, self.cd)
out += '%s:%i:::modification date=%s\n' % (recids, self.id, self.md)
out += '%s:%i:::text extraction date=%s\n' % (recids, self.id, self.td)
out += '%s:%i:::total file attached=%s\n' % (recids, self.id, len(self.docfiles))
if self.human_readable:
out += '%s:%i:::total size latest version=%s\n' % (recids, self.id, nice_size(self.get_total_size_latest_version()))
out += '%s:%i:::total size all files=%s\n' % (recids, self.id, nice_size(self.get_total_size()))
else:
out += '%s:%i:::total size latest version=%s\n' % (recids, self.id, self.get_total_size_latest_version())
out += '%s:%i:::total size all files=%s\n' % (recids, self.id, self.get_total_size())
for docfile in self.docfiles:
out += str(docfile)
return out
def get_md5s(self):
"""
@return: an instance of the Md5Folder class to access MD5 information
of the current BibDoc
@rtype: Md5Folder
"""
if self.__md5s is None:
self.__md5s = Md5Folder(self.basedir)
return self.__md5s
md5s = property(get_md5s)
def format_already_exists_p(self, docformat):
"""
@param format: a format to be checked.
@type format: string
@return: True if a file of the given format already exists among the
latest files.
@rtype: bool
"""
docformat = normalize_format(docformat)
for afile in self.list_latest_files():
if docformat == afile.get_format():
return True
return False
def get_status(self):
"""
@return: the status information.
@rtype: string
"""
return self.status
@staticmethod
def get_fileprefix(basedir, storagename=None):
fname = "%s" % (storagename or "content", )
return os.path.join(basedir, fname )
def get_filepath(self, docformat, version):
""" Generaters the path inside of the filesystem where the document should be stored.
@param format The format of the document
@type format string
@param version version to be stored in the file
@type version string
TODO: this should be completely replaced. File storage (and so, also path building)
should be abstracted from BibDoc and be using loadable extensions
@param format Format of the document to be stored
@type format string
@param version Version of the document to be stored
@type version String
@return Full path to the file encoding a particular version and format of the document
@trype string
"""
return "%s%s;%i" % (BibDoc.get_fileprefix(self.basedir, self.storagename), docformat, version)
def get_docname(self):
"""Obsolete !! (will return empty String for new format documents"""
return self.storagename
def get_doctype(self, recid):
"""Retrieves the type of this document in the scope of a given recid"""
link_types = [attachement["doctype"] for attachement in \
filter(lambda x: str(x["recid"]) == str(recid), \
self.bibrec_links)]
if link_types:
return link_types[0]
return ""
def touch(self):
"""
Update the modification time of the bibdoc (as in the UNIX command
C{touch}).
"""
run_sql('UPDATE bibdoc SET modification_date=NOW() WHERE id=%s', (self.id, ))
#if self.recid:
#run_sql('UPDATE bibrec SET modification_date=NOW() WHERE id=%s', (self.recid, ))
def change_doctype(self, new_doctype):
"""
Modify the doctype of a BibDoc
"""
run_sql('UPDATE bibdoc SET doctype=%s WHERE id=%s', (new_doctype, self.id))
run_sql('UPDATE bibrec_bibdoc SET type=%s WHERE id_bibdoc=%s', (new_doctype, self.id))
def set_status(self, new_status):
"""
Set a new status. A document with a status information is a restricted
document that can be accessed only to user which as an authorization
to the I{viewrestrdoc} WebAccess action with keyword status with value
C{new_status}.
@param new_status: the new status. If empty the document will be
unrestricted.
@type new_status: string
@raise InvenioBibDocFileError: in case the reserved word
'DELETED' is used.
"""
if new_status != KEEP_OLD_VALUE:
if new_status == 'DELETED':
raise InvenioBibDocFileError('DELETED is a reserved word and can not be used for setting the status')
run_sql('UPDATE bibdoc SET status=%s WHERE id=%s', (new_status, self.id))
self.status = new_status
self.touch()
self._build_file_list()
def add_file_new_version(self, filename, description=None, comment=None, docformat=None, flags=None, modification_date=None):
"""
Add a new version of a file. If no physical file is already attached
to the document a the given file will have version 1. Otherwise the
new file will have the current version number plus one.
@param filename: the local path of the file.
@type filename: string
@param description: an optional description for the file.
@type description: string
@param comment: an optional comment to the file.
@type comment: string
@param format: the extension of the file. If not specified it will
be retrieved from the filename (see L{decompose_file}).
@type format: string
@param flags: a set of flags to be associated with the file (see
L{CFG_BIBDOCFILE_AVAILABLE_FLAGS})
@type flags: list of string
@raise InvenioBibDocFileError: in case of error.
"""
try:
latestVersion = self.get_latest_version()
if latestVersion == 0:
myversion = 1
else:
myversion = latestVersion + 1
if os.path.exists(filename):
if not os.path.getsize(filename) > 0:
raise InvenioBibDocFileError, "%s seems to be empty" % filename
if docformat is None:
docformat = decompose_file(filename)[2]
else:
docformat = normalize_format(docformat)
destination = self.get_filepath(docformat, myversion)
if run_sql("SELECT id_bibdoc FROM bibdocfsinfo WHERE id_bibdoc=%s AND version=%s AND format=%s", (self.id, myversion, docformat)):
raise InvenioBibDocFileError("According to the database a file of format %s is already attached to the docid %s" % (docformat, self.id))
try:
shutil.copyfile(filename, destination)
os.chmod(destination, 0644)
if modification_date: # if the modification time of the file needs to be changed
update_modification_date_of_file(destination, modification_date)
except Exception, e:
register_exception()
raise InvenioBibDocFileError, "Encountered an exception while copying '%s' to '%s': '%s'" % (filename, destination, e)
self.more_info.set_description(description, docformat, myversion)
self.more_info.set_comment(comment, docformat, myversion)
if flags is None:
flags = []
if 'pdfa' in get_subformat_from_format(docformat).split(';') and not 'PDF/A' in flags:
flags.append('PDF/A')
for flag in flags:
if flag == 'PERFORM_HIDE_PREVIOUS':
for afile in self.list_all_files():
docformat = afile.get_format()
version = afile.get_version()
if version < myversion:
self.more_info.set_flag('HIDDEN', docformat, myversion)
else:
self.more_info.set_flag(flag, docformat, myversion)
else:
raise InvenioBibDocFileError, "'%s' does not exists!" % filename
finally:
self.touch()
Md5Folder(self.basedir).update()
self._build_file_list()
just_added_file = self.get_file(docformat, myversion)
run_sql("INSERT INTO bibdocfsinfo(id_bibdoc, version, format, last_version, cd, md, checksum, filesize, mime) VALUES(%s, %s, %s, true, %s, %s, %s, %s, %s)", (self.id, myversion, docformat, just_added_file.cd, just_added_file.md, just_added_file.get_checksum(), just_added_file.get_size(), just_added_file.mime))
run_sql("UPDATE bibdocfsinfo SET last_version=false WHERE id_bibdoc=%s AND version<%s", (self.id, myversion))
def add_file_new_format(self, filename, version=None, description=None, comment=None, docformat=None, flags=None, modification_date=None):
"""
Add a file as a new format.
@param filename: the local path of the file.
@type filename: string
@param version: an optional specific version to which the new format
should be added. If None, the last version will be used.
@type version: integer
@param description: an optional description for the file.
@type description: string
@param comment: an optional comment to the file.
@type comment: string
@param format: the extension of the file. If not specified it will
be retrieved from the filename (see L{decompose_file}).
@type format: string
@param flags: a set of flags to be associated with the file (see
L{CFG_BIBDOCFILE_AVAILABLE_FLAGS})
@type flags: list of string
@raise InvenioBibDocFileError: if the given format already exists.
"""
try:
if version is None:
version = self.get_latest_version()
if version == 0:
version = 1
if os.path.exists(filename):
if not os.path.getsize(filename) > 0:
raise InvenioBibDocFileError, "%s seems to be empty" % filename
if docformat is None:
docformat = decompose_file(filename)[2]
else:
docformat = normalize_format(docformat)
if run_sql("SELECT id_bibdoc FROM bibdocfsinfo WHERE id_bibdoc=%s AND version=%s AND format=%s", (self.id, version, docformat)):
raise InvenioBibDocFileError("According to the database a file of format %s is already attached to the docid %s" % (docformat, self.id))
destination = self.get_filepath(docformat, version)
if os.path.exists(destination):
raise InvenioBibDocFileError, "A file for docid '%s' already exists for the format '%s'" % (str(self.id), docformat)
try:
shutil.copyfile(filename, destination)
os.chmod(destination, 0644)
if modification_date: # if the modification time of the file needs to be changed
update_modification_date_of_file(destination, modification_date)
except Exception, e:
register_exception()
raise InvenioBibDocFileError, "Encountered an exception while copying '%s' to '%s': '%s'" % (filename, destination, e)
self.more_info.set_comment(comment, docformat, version)
self.more_info.set_description(description, docformat, version)
if flags is None:
flags = []
if 'pdfa' in get_subformat_from_format(docformat).split(';') and not 'PDF/A' in flags:
flags.append('PDF/A')
for flag in flags:
if flag != 'PERFORM_HIDE_PREVIOUS':
self.more_info.set_flag(flag, docformat, version)
else:
raise InvenioBibDocFileError, "'%s' does not exists!" % filename
finally:
Md5Folder(self.basedir).update()
self.touch()
self._build_file_list()
just_added_file = self.get_file(docformat, version)
run_sql("INSERT INTO bibdocfsinfo(id_bibdoc, version, format, last_version, cd, md, checksum, filesize, mime) VALUES(%s, %s, %s, true, %s, %s, %s, %s, %s)", (self.id, version, docformat, just_added_file.cd, just_added_file.md, just_added_file.get_checksum(), just_added_file.get_size(), just_added_file.mime))
def change_docformat(self, oldformat, newformat):
"""
Renames a format name on disk and in all BibDoc structures.
The change will touch only the last version files.
The change will take place only if the newformat doesn't already exist.
@param oldformat: the format that needs to be renamed
@type oldformat: string
@param newformat: the format new name
@type newformat: string
"""
oldformat = normalize_format(oldformat)
newformat = normalize_format(newformat)
if self.format_already_exists_p(newformat):
# same format already exists in the latest files, abort
return
for bibdocfile in self.list_latest_files():
if bibdocfile.get_format() == oldformat:
# change format -> rename x.oldformat -> x.newformat
dirname, base, docformat, version = decompose_file_with_version(bibdocfile.get_full_path())
os.rename(bibdocfile.get_full_path(), os.path.join(dirname, '%s%s;%i' %(base, newformat, version)))
Md5Folder(self.basedir).update()
self.touch()
self._build_file_list('rename')
self._sync_to_db()
return
def purge(self):
"""
Physically removes all the previous version of the given bibdoc.
Everything but the last formats will be erased.
"""
version = self.get_latest_version()
if version > 1:
for afile in self.docfiles:
if afile.get_version() < version:
self.more_info.unset_comment(afile.get_format(), afile.get_version())
self.more_info.unset_description(afile.get_format(), afile.get_version())
for flag in CFG_BIBDOCFILE_AVAILABLE_FLAGS:
self.more_info.unset_flag(flag, afile.get_format(), afile.get_version())
try:
os.remove(afile.get_full_path())
except Exception, dummy:
register_exception()
Md5Folder(self.basedir).update()
self.touch()
self._build_file_list()
run_sql("DELETE FROM bibdocfsinfo WHERE id_bibdoc=%s AND version<%s", (self.id, version))
def expunge(self):
"""
Physically remove all the traces of a given document.
@note: an expunged BibDoc object shouldn't be used anymore or the
result might be unpredicted.
"""
del self.__md5s
self.more_info.delete()
del self.more_info
os.system('rm -rf %s' % escape_shell_arg(self.basedir))
run_sql('DELETE FROM bibrec_bibdoc WHERE id_bibdoc=%s', (self.id, ))
run_sql('DELETE FROM bibdoc_bibdoc WHERE id_bibdoc1=%s OR id_bibdoc2=%s', (self.id, self.id))
run_sql('DELETE FROM bibdoc WHERE id=%s', (self.id, ))
run_sql('INSERT DELAYED INTO hstDOCUMENT(action, id_bibdoc, doctimestamp) VALUES("EXPUNGE", %s, NOW())', (self.id, ))
run_sql('DELETE FROM bibdocfsinfo WHERE id_bibdoc=%s', (self.id, ))
del self.docfiles
del self.id
del self.cd
del self.md
del self.td
del self.basedir
del self.doctype
del self.bibrec_links
def revert(self, version):
"""
Revert the document to a given version. All the formats corresponding
to that version are copied forward to a new version.
@param version: the version to revert to.
@type version: integer
@raise InvenioBibDocFileError: in case of errors
"""
version = int(version)
docfiles = self.list_version_files(version)
if docfiles:
self.add_file_new_version(docfiles[0].get_full_path(), description=docfiles[0].get_description(), comment=docfiles[0].get_comment(), docformat=docfiles[0].get_format(), flags=docfiles[0].flags)
for docfile in docfiles[1:]:
self.add_file_new_format(docfile.filename, description=docfile.get_description(), comment=docfile.get_comment(), docformat=docfile.get_format(), flags=docfile.flags)
def import_descriptions_and_comments_from_marc(self, record=None):
"""
Import descriptions and comments from the corresponding MARC metadata.
@param record: the record (if None it will be calculated).
@type record: bibrecord recstruct
@note: If record is passed it is directly used, otherwise it is retrieved
from the MARCXML stored in the database.
"""
## Let's get the record
from invenio.search_engine import get_record
if record is None:
record = get_record(self.id)
fields = record_get_field_instances(record, '856', '4', ' ')
global_comment = None
global_description = None
local_comment = {}
local_description = {}
for field in fields:
url = field_get_subfield_values(field, 'u')
if url:
## Given a url
url = url[0]
if re.match('%s/%s/[0-9]+/files/' % (CFG_SITE_URL, CFG_SITE_RECORD), url):
## If it is a traditional /CFG_SITE_RECORD/1/files/ one
## We have global description/comment for all the formats
description = field_get_subfield_values(field, 'y')
if description:
global_description = description[0]
comment = field_get_subfield_values(field, 'z')
if comment:
global_comment = comment[0]
elif bibdocfile_url_p(url):
## Otherwise we have description/comment per format
dummy, docname, docformat = decompose_bibdocfile_url(url)
brd = BibRecDocs(self.id)
if docname == brd.get_docname(self.id):
description = field_get_subfield_values(field, 'y')
if description:
local_description[docformat] = description[0]
comment = field_get_subfield_values(field, 'z')
if comment:
local_comment[docformat] = comment[0]
## Let's update the tables
version = self.get_latest_version()
for docfile in self.list_latest_files():
docformat = docfile.get_format()
if docformat in local_comment:
self.set_comment(local_comment[docformat], docformat, version)
else:
self.set_comment(global_comment, docformat, version)
if docformat in local_description:
self.set_description(local_description[docformat], docformat, version)
else:
self.set_description(global_description, docformat, version)
self._build_file_list('init')
def get_icon(self, subformat_re=CFG_BIBDOCFILE_ICON_SUBFORMAT_RE, display_hidden=True):
"""
@param subformat_re: by default the convention is that
L{CFG_BIBDOCFILE_ICON_SUBFORMAT_RE} is used as a subformat indicator to
mean that a particular format is to be used as an icon.
Specifiy a different subformat if you need to use a different
convention.
@type subformat_re: compiled regular expression
@return: the bibdocfile corresponding to the icon of this document, or
None if any icon exists for this document.
@rtype: BibDocFile
@warning: before I{subformat} were introduced this method was
returning a BibDoc, while now is returning a BibDocFile. Check
if your client code is compatible with this.
"""
for docfile in self.list_latest_files(list_hidden=display_hidden):
if subformat_re.match(docfile.get_subformat()):
return docfile
return None
def add_icon(self, filename, docformat=None, subformat=CFG_BIBDOCFILE_DEFAULT_ICON_SUBFORMAT, modification_date=None):
"""
Attaches icon to this document.
@param filename: the local filesystem path to the icon.
@type filename: string
@param format: an optional format for the icon. If not specified it
will be calculated after the filesystem path.
@type format: string
@param subformat: by default the convention is that
CFG_BIBDOCFILE_DEFAULT_ICON_SUBFORMAT is used as a subformat indicator to
mean that a particular format is to be used as an icon.
Specifiy a different subformat if you need to use a different
convention.
@type subformat: string
@raise InvenioBibDocFileError: in case of errors.
"""
#first check if an icon already exists
if not docformat:
docformat = decompose_file(filename)[2]
if subformat:
docformat += ";%s" % subformat
self.add_file_new_format(filename, docformat=docformat, modification_date=modification_date)
def delete_icon(self, subformat_re=CFG_BIBDOCFILE_ICON_SUBFORMAT_RE):
"""
@param subformat_re: by default the convention is that
L{CFG_BIBDOCFILE_ICON_SUBFORMAT_RE} is used as a subformat indicator to
mean that a particular format is to be used as an icon.
Specifiy a different subformat if you need to use a different
convention.
@type subformat: compiled regular expression
Removes the icon attached to the document if it exists.
"""
for docfile in self.list_latest_files():
if subformat_re.match(docfile.get_subformat()):
self.delete_file(docfile.get_format(), docfile.get_version())
def change_name(self, recid, newname):
"""
Renames this document in connection with a given record.
@param newname: the new name.
@type newname: string
@raise InvenioBibDocFileError: if the new name corresponds to
a document already attached to the record owning this document.
"""
try:
newname = normalize_docname(newname)
res = run_sql("SELECT id_bibdoc FROM bibrec_bibdoc WHERE id_bibrec=%s AND docname=%s", (recid, newname))
if res:
raise InvenioBibDocFileError, "A bibdoc called %s already exists for recid %s" % (newname, recid)
run_sql("update bibrec_bibdoc set docname=%s where id_bibdoc=%s and id_bibrec=%s", (newname, self.id, recid))
finally:
self.touch()
def set_comment(self, comment, docformat, version=None):
"""
Updates the comment of a specific format/version of the document.
@param comment: the new comment.
@type comment: string
@param format: the specific format for which the comment should be
updated.
@type format: string
@param version: the specific version for which the comment should be
updated. If not specified the last version will be used.
@type version: integer
"""
if version is None:
version = self.get_latest_version()
docformat = normalize_format(docformat)
self.more_info.set_comment(comment, docformat, version)
self.touch()
self._build_file_list('init')
def set_description(self, description, docformat, version=None):
"""
Updates the description of a specific format/version of the document.
@param description: the new description.
@type description: string
@param format: the specific format for which the description should be
updated.
@type format: string
@param version: the specific version for which the description should be
updated. If not specified the last version will be used.
@type version: integer
"""
if version is None:
version = self.get_latest_version()
docformat = normalize_format(docformat)
self.more_info.set_description(description, docformat, version)
self.touch()
self._build_file_list('init')
def set_flag(self, flagname, docformat, version=None):
"""
Sets a flag for a specific format/version of the document.
@param flagname: a flag from L{CFG_BIBDOCFILE_AVAILABLE_FLAGS}.
@type flagname: string
@param format: the specific format for which the flag should be
set.
@type format: string
@param version: the specific version for which the flag should be
set. If not specified the last version will be used.
@type version: integer
"""
if version is None:
version = self.get_latest_version()
docformat = normalize_format(docformat)
self.more_info.set_flag(flagname, docformat, version)
self.touch()
self._build_file_list('init')
def has_flag(self, flagname, docformat, version=None):
"""
Checks if a particular flag for a format/version is set.
@param flagname: a flag from L{CFG_BIBDOCFILE_AVAILABLE_FLAGS}.
@type flagname: string
@param format: the specific format for which the flag should be
set.
@type format: string
@param version: the specific version for which the flag should be
set. If not specified the last version will be used.
@type version: integer
@return: True if the flag is set.
@rtype: bool
"""
if version is None:
version = self.get_latest_version()
docformat = normalize_format(docformat)
return self.more_info.has_flag(flagname, docformat, version)
def unset_flag(self, flagname, docformat, version=None):
"""
Unsets a flag for a specific format/version of the document.
@param flagname: a flag from L{CFG_BIBDOCFILE_AVAILABLE_FLAGS}.
@type flagname: string
@param format: the specific format for which the flag should be
unset.
@type format: string
@param version: the specific version for which the flag should be
unset. If not specified the last version will be used.
@type version: integer
"""
if version is None:
version = self.get_latest_version()
docformat = normalize_format(docformat)
self.more_info.unset_flag(flagname, docformat, version)
self.touch()
self._build_file_list('init')
def get_comment(self, docformat, version=None):
"""
Retrieve the comment of a specific format/version of the document.
@param format: the specific format for which the comment should be
retrieved.
@type format: string
@param version: the specific version for which the comment should be
retrieved. If not specified the last version will be used.
@type version: integer
@return: the comment.
@rtype: string
"""
if version is None:
version = self.get_latest_version()
docformat = normalize_format(docformat)
return self.more_info.get_comment(docformat, version)
def get_description(self, docformat, version=None):
"""
Retrieve the description of a specific format/version of the document.
@param format: the specific format for which the description should be
retrieved.
@type format: string
@param version: the specific version for which the description should
be retrieved. If not specified the last version will be used.
@type version: integer
@return: the description.
@rtype: string
"""
if version is None:
version = self.get_latest_version()
docformat = normalize_format(docformat)
return self.more_info.get_description(docformat, version)
def hidden_p(self, docformat, version=None):
"""
Returns True if the file specified by the given format/version is
hidden.
@param format: the specific format for which the description should be
retrieved.
@type format: string
@param version: the specific version for which the description should
be retrieved. If not specified the last version will be used.
@type version: integer
@return: True if hidden.
@rtype: bool
"""
if version is None:
version = self.get_latest_version()
return self.more_info.has_flag('HIDDEN', docformat, version)
def get_base_dir(self):
"""
@return: the base directory on the local filesystem for this document
(e.g. C{/soft/cdsweb/var/data/files/g0/123})
@rtype: string
"""
return self.basedir
def get_type(self):
"""
@return: the type of this document.
@rtype: string"""
return self.doctype
def get_id(self):
"""
@return: the id of this document.
@rtype: integer
"""
return self.id
def get_file(self, docformat, version=""):
"""
Returns a L{BibDocFile} instance of this document corresponding to the
specific format and version.
@param format: the specific format.
@type format: string
@param version: the specific version for which the description should
be retrieved. If not specified the last version will be used.
@type version: integer
@return: the L{BibDocFile} instance.
@rtype: BibDocFile
"""
if version == "":
docfiles = self.list_latest_files()
else:
version = int(version)
docfiles = self.list_version_files(version)
docformat = normalize_format(docformat)
for docfile in docfiles:
if (docfile.get_format() == docformat or not docformat):
return docfile
## Let's skip the subformat specification and consider just the
## superformat
superformat = get_superformat_from_format(docformat)
for docfile in docfiles:
if get_superformat_from_format(docfile.get_format()) == superformat:
return docfile
raise InvenioBibDocFileError, "No file for doc %i of format '%s', version '%s'" % (self.id, docformat, version)
def list_versions(self):
"""
@return: the list of existing version numbers for this document.
@rtype: list of integer
"""
versions = []
for docfile in self.docfiles:
if not docfile.get_version() in versions:
versions.append(docfile.get_version())
versions.sort()
return versions
def delete(self, recid = None):
"""
Delete this document.
@see: L{undelete} for how to undelete the document.
@raise InvenioBibDocFileError: in case of errors.
"""
try:
today = datetime.today()
recids = []
if recid:
recids = [recid]
else:
recids = [link["recid"] for link in self.bibrec_links]
for rid in recids:
brd = BibRecDocs(rid)
docname = brd.get_docname(self.id)
# if the document is attached to some records
brd.change_name(docid=self.id, newname = 'DELETED-%s%s-%s' % (today.strftime('%Y%m%d%H%M%S'), today.microsecond, docname))
run_sql("UPDATE bibdoc SET status='DELETED' WHERE id=%s", (self.id,))
self.status = 'DELETED'
except Exception, e:
register_exception()
raise InvenioBibDocFileError, "It's impossible to delete bibdoc %s: %s" % (self.id, e)
def deleted_p(self):
"""
@return: True if this document has been deleted.
@rtype: bool
"""
return self.status == 'DELETED'
def empty_p(self):
"""
@return: True if this document is empty, i.e. it has no bibdocfile
connected.
@rtype: bool
"""
return len(self.docfiles) == 0
def undelete(self, previous_status='', recid=None):
"""
Undelete a deleted file (only if it was actually deleted via L{delete}).
The previous C{status}, i.e. the restriction key can be provided.
Otherwise the undeleted document will be public.
@param previous_status: the previous status the should be restored.
@type previous_status: string
@raise InvenioBibDocFileError: in case of any error.
"""
try:
run_sql("UPDATE bibdoc SET status=%s WHERE id=%s AND status='DELETED'", (previous_status, self.id))
except Exception, e:
raise InvenioBibDocFileError, "It's impossible to undelete bibdoc %s: %s" % (self.id, e)
if recid:
bibrecdocs = BibRecDocs(recid)
docname = bibrecdocs.get_docname(self.id)
if docname.startswith('DELETED-'):
try:
# Let's remove DELETED-20080214144322- in front of the docname
original_name = '-'.join(docname.split('-')[2:])
original_name = bibrecdocs.propose_unique_docname(original_name)
bibrecdocs.change_name(docid=self.id, newname=original_name)
except Exception, e:
raise InvenioBibDocFileError, "It's impossible to restore the previous docname %s. %s kept as docname because: %s" % (original_name, docname, e)
else:
raise InvenioBibDocFileError, "Strange just undeleted docname isn't called DELETED-somedate-docname but %s" % docname
def delete_file(self, docformat, version):
"""
Delete a specific format/version of this document on the filesystem.
@param format: the particular format to be deleted.
@type format: string
@param version: the particular version to be deleted.
@type version: integer
@note: this operation is not reversible!"""
try:
afile = self.get_file(docformat, version)
except InvenioBibDocFileError:
return
try:
os.remove(afile.get_full_path())
run_sql("DELETE FROM bibdocfsinfo WHERE id_bibdoc=%s AND version=%s AND format=%s", (self.id, afile.get_version(), afile.get_format()))
last_version = run_sql("SELECT max(version) FROM bibdocfsinfo WHERE id_bibdoc=%s", (self.id, ))[0][0]
if last_version:
## Updating information about last version
run_sql("UPDATE bibdocfsinfo SET last_version=true WHERE id_bibdoc=%s AND version=%s", (self.id, last_version))
run_sql("UPDATE bibdocfsinfo SET last_version=false WHERE id_bibdoc=%s AND version<>%s", (self.id, last_version))
except OSError:
pass
self.touch()
self._build_file_list()
def get_history(self):
"""
@return: a human readable and parsable string that represent the
history of this document.
@rtype: string
"""
ret = []
hst = run_sql("""SELECT action, docname, docformat, docversion,
docsize, docchecksum, doctimestamp
FROM hstDOCUMENT
WHERE id_bibdoc=%s ORDER BY doctimestamp ASC""", (self.id, ))
for row in hst:
ret.append("%s %s '%s', format: '%s', version: %i, size: %s, checksum: '%s'" % (row[6].strftime('%Y-%m-%d %H:%M:%S'), row[0], row[1], row[2], row[3], nice_size(row[4]), row[5]))
return ret
def _build_file_list(self, context=''):
"""
Lists all files attached to the bibdoc. This function should be
called everytime the bibdoc is modified.
As a side effect it log everything that has happened to the bibdocfiles
in the log facility, according to the context:
"init": means that the function has been called;
for the first time by a constructor, hence no logging is performed
"": by default means to log every deleted file as deleted and every
added file as added;
"rename": means that every appearently deleted file is logged as
renamef and every new file as renamet.
"""
def log_action(action, docid, docname, docformat, version, size, checksum, timestamp=''):
"""Log an action into the bibdoclog table."""
try:
if timestamp:
run_sql('INSERT DELAYED INTO hstDOCUMENT(action, id_bibdoc, docname, docformat, docversion, docsize, docchecksum, doctimestamp) VALUES(%s, %s, %s, %s, %s, %s, %s, %s)', (action, docid, docname, docformat, version, size, checksum, timestamp))
else:
run_sql('INSERT DELAYED INTO hstDOCUMENT(action, id_bibdoc, docname, docformat, docversion, docsize, docchecksum, doctimestamp) VALUES(%s, %s, %s, %s, %s, %s, %s, NOW())', (action, docid, docname, docformat, version, size, checksum))
except DatabaseError:
register_exception()
def make_removed_added_bibdocfiles(previous_file_list):
"""Internal function for build the log of changed files."""
# Let's rebuild the previous situation
old_files = {}
for bibdocfile in previous_file_list:
old_files[(bibdocfile.name, bibdocfile.format, bibdocfile.version)] = (bibdocfile.size, bibdocfile.checksum, bibdocfile.md)
# Let's rebuild the new situation
new_files = {}
for bibdocfile in self.docfiles:
new_files[(bibdocfile.name, bibdocfile.format, bibdocfile.version)] = (bibdocfile.size, bibdocfile.checksum, bibdocfile.md)
# Let's subtract from added file all the files that are present in
# the old list, and let's add to deleted files that are not present
# added file.
added_files = dict(new_files)
deleted_files = {}
for key, value in old_files.iteritems():
if added_files.has_key(key):
del added_files[key]
else:
deleted_files[key] = value
return (added_files, deleted_files)
if context != ('init', 'init_from_disk'):
previous_file_list = list(self.docfiles)
res = run_sql("SELECT status, creation_date,"
"modification_date FROM bibdoc WHERE id=%s", (self.id,))
self.cd = res[0][1]
self.md = res[0][2]
self.status = res[0][0]
self.more_info = BibDocMoreInfo(self.id)
self.docfiles = []
if CFG_BIBDOCFILE_ENABLE_BIBDOCFSINFO_CACHE and context == 'init':
## In normal init context we read from DB
res = run_sql("SELECT version, format, cd, md, checksum, filesize FROM bibdocfsinfo WHERE id_bibdoc=%s", (self.id, ))
for version, docformat, cd, md, checksum, size in res:
filepath = self.get_filepath(docformat, version)
self.docfiles.append(BibDocFile(
filepath, self.bibrec_types,
version, docformat, self.id, self.status, checksum,
self.more_info, human_readable=self.human_readable, cd=cd, md=md, size=size, bibdoc=self))
else:
if os.path.exists(self.basedir):
files = os.listdir(self.basedir)
files.sort()
for afile in files:
if not afile.startswith('.'):
try:
filepath = os.path.join(self.basedir, afile)
dummy, dummy, docformat, fileversion = decompose_file_with_version(filepath)
checksum = self.md5s.get_checksum(afile)
self.docfiles.append(BibDocFile(filepath, self.bibrec_types,
fileversion, docformat,
self.id, self.status, checksum,
self.more_info, human_readable=self.human_readable, bibdoc=self))
except Exception, e:
register_exception()
raise InvenioBibDocFileError, e
if context in ('init', 'init_from_disk'):
return
else:
added_files, deleted_files = make_removed_added_bibdocfiles(previous_file_list)
deletedstr = "DELETED"
addedstr = "ADDED"
if context == 'rename':
deletedstr = "RENAMEDFROM"
addedstr = "RENAMEDTO"
for (docname, docformat, version), (size, checksum, md) in added_files.iteritems():
if context == 'rename':
md = '' # No modification time
log_action(addedstr, self.id, docname, docformat, version, size, checksum, md)
for (docname, docformat, version), (size, checksum, md) in deleted_files.iteritems():
if context == 'rename':
md = '' # No modification time
log_action(deletedstr, self.id, docname, docformat, version, size, checksum, md)
def _sync_to_db(self):
"""
Update the content of the bibdocfile table by taking what is available on the filesystem.
"""
self._build_file_list('init_from_disk')
run_sql("DELETE FROM bibdocfsinfo WHERE id_bibdoc=%s", (self.id,))
for afile in self.docfiles:
run_sql("INSERT INTO bibdocfsinfo(id_bibdoc, version, format, last_version, cd, md, checksum, filesize, mime) VALUES(%s, %s, %s, false, %s, %s, %s, %s, %s)", (self.id, afile.get_version(), afile.get_format(), afile.cd, afile.md, afile.get_checksum(), afile.get_size(), afile.mime))
run_sql("UPDATE bibdocfsinfo SET last_version=true WHERE id_bibdoc=%s AND version=%s", (self.id, self.get_latest_version()))
def _build_related_file_list(self):
"""Lists all files attached to the bibdoc. This function should be
called everytime the bibdoc is modified within e.g. its icon.
@deprecated: use subformats instead.
"""
self.related_files = {}
res = run_sql("SELECT ln.id_bibdoc2,ln.rel_type,bibdoc.status FROM "
"bibdoc_bibdoc AS ln,bibdoc WHERE bibdoc.id=ln.id_bibdoc2 AND "
"ln.id_bibdoc1=%s", (str(self.id),))
for row in res:
docid = row[0]
doctype = row[1]
if row[2] != 'DELETED':
if not self.related_files.has_key(doctype):
self.related_files[doctype] = []
cur_doc = BibDoc.create_instance(docid=docid, human_readable=self.human_readable)
self.related_files[doctype].append(cur_doc)
def get_total_size_latest_version(self):
"""Return the total size used on disk of all the files belonging
to this bibdoc and corresponding to the latest version."""
ret = 0
for bibdocfile in self.list_latest_files():
ret += bibdocfile.get_size()
return ret
def get_total_size(self):
"""Return the total size used on disk of all the files belonging
to this bibdoc."""
ret = 0
for bibdocfile in self.list_all_files():
ret += bibdocfile.get_size()
return ret
def list_all_files(self, list_hidden=True):
"""Returns all the docfiles linked with the given bibdoc."""
if list_hidden:
return self.docfiles
else:
return [afile for afile in self.docfiles if not afile.hidden_p()]
def list_latest_files(self, list_hidden=True):
"""Returns all the docfiles within the last version."""
return self.list_version_files(self.get_latest_version(), list_hidden=list_hidden)
def list_version_files(self, version, list_hidden=True):
"""Return all the docfiles of a particular version."""
version = int(version)
return [docfile for docfile in self.docfiles if docfile.get_version() == version and (list_hidden or not docfile.hidden_p())]
def get_latest_version(self):
""" Returns the latest existing version number for the given bibdoc.
If no file is associated to this bibdoc, returns '0'.
"""
version = 0
for bibdocfile in self.docfiles:
if bibdocfile.get_version() > version:
version = bibdocfile.get_version()
return version
def get_file_number(self):
"""Return the total number of files."""
return len(self.docfiles)
def register_download(self, ip_address, version, docformat, userid=0, recid=0):
"""Register the information about a download of a particular file."""
docformat = normalize_format(docformat)
if docformat[:1] == '.':
docformat = docformat[1:]
docformat = docformat.upper()
if not version:
version = self.get_latest_version()
return run_sql("INSERT DELAYED INTO rnkDOWNLOADS "
"(id_bibrec,id_bibdoc,file_version,file_format,"
"id_user,client_host,download_time) VALUES "
"(%s,%s,%s,%s,%s,INET_ATON(%s),NOW())",
(recid, self.id, version, docformat,
userid, ip_address,))
def get_incoming_relations(self, rel_type=None):
"""Return all relations in which this BibDoc appears on target position
@param rel_type: Type of the relation, to which we want to limit our search. None = any type
@type rel_type: string
@return: List of BibRelation instances
@rtype: list
"""
return BibRelation.get_relations(rel_type = rel_type,
bibdoc2_id = self.id)
def get_outgoing_relations(self, rel_type=None):
"""Return all relations in which this BibDoc appears on target position
@param rel_type: Type of the relation, to which we want to limit our search. None = any type
@type rel_type: string
@return: List of BibRelation instances
@rtype: list
"""
return BibRelation.get_relations(rel_type = rel_type,
bibdoc1_id = self.id)
def create_outgoing_relation(self, bibdoc2, rel_type):
"""
Create an outgoing relation between current BibDoc and a different one
"""
return BibRelation.create(bibdoc1_id = self.id, bibdoc2_id = bibdoc2.id, rel_type = rel_type)
def create_incoming_relation(self, bibdoc1, rel_type):
"""
Create an outgoing relation between a particular version of
current BibDoc and a particular version of a different BibDoc
"""
return BibRelation.create(bibdoc1_id = bibdoc1.id, bibdoc2_id = self.id, rel_type = rel_type)
def generic_path2bidocfile(fullpath):
"""
Returns a BibDocFile objects that wraps the given fullpath.
@note: the object will contain the minimum information that can be
guessed from the fullpath (e.g. docname, format, subformat, version,
md5, creation_date, modification_date). It won't contain for example
a comment, a description, a doctype, a restriction.
"""
fullpath = os.path.abspath(fullpath)
try:
path, name, docformat, version = decompose_file_with_version(fullpath)
except ValueError:
## There is no version
version = 0
path, name, docformat = decompose_file(fullpath)
md5folder = Md5Folder(path)
checksum = md5folder.get_checksum(os.path.basename(fullpath))
return BibDocFile(fullpath=fullpath,
recid_doctypes=[(0, None, name)],
version=version,
docformat=docformat,
docid=0,
status=None,
checksum=checksum,
more_info=None)
class BibDocFile(object):
"""This class represents a physical file in the Invenio filesystem.
It should never be instantiated directly"""
def __init__(self, fullpath, recid_doctypes, version, docformat, docid, status, checksum, more_info=None, human_readable=False, cd=None, md=None, size=None, bibdoc = None):
self.fullpath = os.path.abspath(fullpath)
self.docid = docid
self.recids_doctypes = recid_doctypes
self.version = version
self.status = status
self.checksum = checksum
self.human_readable = human_readable
self.name = recid_doctypes[0][2]
self.bibdoc = bibdoc
if more_info:
self.description = more_info.get_description(docformat, version)
self.comment = more_info.get_comment(docformat, version)
self.flags = more_info.get_flags(docformat, version)
else:
self.description = None
self.comment = None
self.flags = []
self.format = normalize_format(docformat)
self.superformat = get_superformat_from_format(self.format)
self.subformat = get_subformat_from_format(self.format)
if docformat:
self.recids_doctypes = [(a,b,c+self.superformat) for (a,b,c) in self.recids_doctypes]
self.mime, self.encoding = _mimes.guess_type(self.recids_doctypes[0][2])
if self.mime is None:
self.mime = "application/octet-stream"
self.more_info = more_info
self.hidden = 'HIDDEN' in self.flags
self.size = size or os.path.getsize(fullpath)
self.md = md or datetime.fromtimestamp(os.path.getmtime(fullpath))
try:
self.cd = cd or datetime.fromtimestamp(os.path.getctime(fullpath))
except OSError:
self.cd = self.md
self.dir = os.path.dirname(fullpath)
if self.subformat:
self.url = create_url('%s/%s/%s/files/%s%s' % (CFG_SITE_URL, CFG_SITE_RECORD, self.recids_doctypes[0][0], self.name, self.superformat), {'subformat' : self.subformat})
self.fullurl = create_url('%s/%s/%s/files/%s%s' % (CFG_SITE_URL, CFG_SITE_RECORD, self.recids_doctypes[0][0], self.name, self.superformat), {'subformat' : self.subformat, 'version' : self.version})
else:
self.url = create_url('%s/%s/%s/files/%s%s' % (CFG_SITE_URL, CFG_SITE_RECORD, self.recids_doctypes[0][0], self.name, self.superformat), {})
self.fullurl = create_url('%s/%s/%s/files/%s%s' % (CFG_SITE_URL, CFG_SITE_RECORD, self.recids_doctypes[0][0], self.name, self.superformat), {'version' : self.version})
self.etag = '"%i%s%i"' % (self.docid, self.format, self.version)
self.magic = None
def __repr__(self):
return ('BibDocFile(%s, %i, %s, %s, %i, %i, %s, %s, %s, %s)' % (repr(self.fullpath), self.version, repr(self.name), repr(self.format), self.recids_doctypes[0][0], self.docid, repr(self.status), repr(self.checksum), repr(self.more_info), repr(self.human_readable)))
def format_recids(self):
if self.bibdoc:
return self.bibdoc.format_recids()
return "0"
def __str__(self):
recids = self.format_recids()
out = '%s:%s:%s:%s:fullpath=%s\n' % (recids, self.docid, self.version, self.format, self.fullpath)
out += '%s:%s:%s:%s:name=%s\n' % (recids, self.docid, self.version, self.format, self.name)
out += '%s:%s:%s:%s:subformat=%s\n' % (recids, self.docid, self.version, self.format, get_subformat_from_format(self.format))
out += '%s:%s:%s:%s:status=%s\n' % (recids, self.docid, self.version, self.format, self.status)
out += '%s:%s:%s:%s:checksum=%s\n' % (recids, self.docid, self.version, self.format, self.checksum)
if self.human_readable:
out += '%s:%s:%s:%s:size=%s\n' % (recids, self.docid, self.version, self.format, nice_size(self.size))
else:
out += '%s:%s:%s:%s:size=%s\n' % (recids, self.docid, self.version, self.format, self.size)
out += '%s:%s:%s:%s:creation time=%s\n' % (recids, self.docid, self.version, self.format, self.cd)
out += '%s:%s:%s:%s:modification time=%s\n' % (recids, self.docid, self.version, self.format, self.md)
out += '%s:%s:%s:%s:magic=%s\n' % (recids, self.docid, self.version, self.format, self.get_magic())
out += '%s:%s:%s:%s:mime=%s\n' % (recids, self.docid, self.version, self.format, self.mime)
out += '%s:%s:%s:%s:encoding=%s\n' % (recids, self.docid, self.version, self.format, self.encoding)
out += '%s:%s:%s:%s:url=%s\n' % (recids, self.docid, self.version, self.format, self.url)
out += '%s:%s:%s:%s:fullurl=%s\n' % (recids, self.docid, self.version, self.format, self.fullurl)
out += '%s:%s:%s:%s:description=%s\n' % (recids, self.docid, self.version, self.format, self.description)
out += '%s:%s:%s:%s:comment=%s\n' % (recids, self.docid, self.version, self.format, self.comment)
out += '%s:%s:%s:%s:hidden=%s\n' % (recids, self.docid, self.version, self.format, self.hidden)
out += '%s:%s:%s:%s:flags=%s\n' % (recids, self.docid, self.version, self.format, self.flags)
out += '%s:%s:%s:%s:etag=%s\n' % (recids, self.docid, self.version, self.format, self.etag)
return out
def is_restricted(self, user_info):
"""Returns restriction state. (see acc_authorize_action return values)"""
if self.status not in ('', 'DELETED'):
return check_bibdoc_authorization(user_info, status=self.status)
elif self.status == 'DELETED':
return (1, 'File has ben deleted')
else:
return (0, '')
def is_icon(self, subformat_re=CFG_BIBDOCFILE_ICON_SUBFORMAT_RE):
"""
@param subformat_re: by default the convention is that
L{CFG_BIBDOCFILE_ICON_SUBFORMAT_RE} is used as a subformat indicator to
mean that a particular format is to be used as an icon.
Specifiy a different subformat if you need to use a different
convention.
@type subformat: compiled regular expression
@return: True if this file is an icon.
@rtype: bool
"""
return bool(subformat_re.match(self.subformat))
def hidden_p(self):
return self.hidden
def get_url(self):
return self.url
def get_type(self):
"""Returns the first type connected with the bibdoc of this file."""
return self.recids_doctypes[0][1]
def get_path(self):
return self.fullpath
def get_bibdocid(self):
return self.docid
def get_name(self):
return self.name
def get_full_name(self):
"""Returns the first name connected with the bibdoc of this file."""
return self.recids_doctypes[0][2]
def get_full_path(self):
return self.fullpath
def get_format(self):
return self.format
def get_subformat(self):
return self.subformat
def get_superformat(self):
return self.superformat
def get_size(self):
return self.size
def get_version(self):
return self.version
def get_checksum(self):
return self.checksum
def get_description(self):
return self.description
def get_comment(self):
return self.comment
def get_content(self):
"""Returns the binary content of the file."""
content_fd = open(self.fullpath, 'rb')
content = content_fd.read()
content_fd.close()
return content
def get_recid(self):
"""Returns the first recid connected with the bibdoc of this file."""
return self.recids_doctypes[0][0]
def get_status(self):
"""Returns the status of the file, i.e. either '', 'DELETED' or a
restriction keyword."""
return self.status
def get_magic(self):
"""Return all the possible guesses from the magic library about
the content of the file."""
if self.magic is None:
if CFG_HAS_MAGIC == 1:
magic_cookies = _get_magic_cookies()
magic_result = []
for key in magic_cookies.keys():
magic_result.append(magic_cookies[key].file(self.fullpath))
self.magic = tuple(magic_result)
elif CFG_HAS_MAGIC == 2:
magic_result = []
for key in ({'mime': False, 'mime_encoding': False},
{'mime': True, 'mime_encoding': False},
{'mime': False, 'mime_encoding': True}):
magic_result.append(_magic_wrapper(self.fullpath, **key))
self.magic = tuple(magic_result)
return self.magic
def check(self):
"""Return True if the checksum corresponds to the file."""
return calculate_md5(self.fullpath) == self.checksum
def stream(self, req, download=False):
"""Stream the file. Note that no restriction check is being
done here, since restrictions have been checked previously
inside websubmit_webinterface.py."""
if os.path.exists(self.fullpath):
if random.random() < CFG_BIBDOCFILE_MD5_CHECK_PROBABILITY and calculate_md5(self.fullpath) != self.checksum:
raise InvenioBibDocFileError, "File %s, version %i, is corrupted!" % (self.recids_doctypes[0][2], self.version)
stream_file(req, self.fullpath, "%s%s" % (self.name, self.superformat), self.mime, self.encoding, self.etag, self.checksum, self.fullurl, download=download)
raise apache.SERVER_RETURN, apache.DONE
else:
req.status = apache.HTTP_NOT_FOUND
raise InvenioBibDocFileError, "%s does not exists!" % self.fullpath
_RE_STATUS_PARSER = re.compile(r'^(?P<type>email|group|egroup|role|firerole|status):\s*(?P<value>.*)$', re.S + re.I)
def check_bibdoc_authorization(user_info, status):
"""
Check if the user is authorized to access a document protected with the given status.
L{status} is a string of the form::
auth_type: auth_value
where C{auth_type} can have values in::
email, group, role, firerole, status
and C{auth_value} has a value interpreted againsta C{auth_type}:
- C{email}: the user can access the document if his/her email matches C{auth_value}
- C{group}: the user can access the document if one of the groups (local or
external) of which he/she is member matches C{auth_value}
- C{role}: the user can access the document if he/she belongs to the WebAccess
role specified in C{auth_value}
- C{firerole}: the user can access the document if he/she is implicitly matched
by the role described by the firewall like role definition in C{auth_value}
- C{status}: the user can access the document if he/she is authorized to
for the action C{viewrestrdoc} with C{status} paramter having value
C{auth_value}
@note: If no C{auth_type} is specified or if C{auth_type} is not one of the
above, C{auth_value} will be set to the value contained in the
parameter C{status}, and C{auth_type} will be considered to be C{status}.
@param user_info: the user_info dictionary
@type: dict
@param status: the status of the document.
@type status: string
@return: a tuple, of the form C{(auth_code, auth_message)} where auth_code is 0
if the authorization is granted and greater than 0 otherwise.
@rtype: (int, string)
@raise ValueError: in case of unexpected parsing error.
"""
if not status:
return (0, CFG_WEBACCESS_WARNING_MSGS[0])
def parse_status(status):
g = _RE_STATUS_PARSER.match(status)
if g:
return (g.group('type').lower(), g.group('value'))
else:
return ('status', status)
if acc_is_user_in_role(user_info, acc_get_role_id(SUPERADMINROLE)):
return (0, CFG_WEBACCESS_WARNING_MSGS[0])
auth_type, auth_value = parse_status(status)
if auth_type == 'status':
return acc_authorize_action(user_info, 'viewrestrdoc', status=auth_value)
elif auth_type == 'email':
if not auth_value.lower().strip() == user_info['email'].lower().strip():
return (1, 'You must be member of the group %s in order to access this document' % repr(auth_value))
elif auth_type == 'group':
if not auth_value in user_info['group']:
return (1, 'You must be member of the group %s in order to access this document' % repr(auth_value))
elif auth_type == 'role':
if not acc_is_user_in_role(user_info, acc_get_role_id(auth_value)):
return (1, 'You must be member in the role %s in order to access this document' % repr(auth_value))
elif auth_type == 'firerole':
if not acc_firerole_check_user(user_info, compile_role_definition(auth_value)):
return (1, 'You must be authorized in order to access this document')
else:
raise ValueError, 'Unexpected authorization type %s for %s' % (repr(auth_type), repr(auth_value))
return (0, CFG_WEBACCESS_WARNING_MSGS[0])
## TODO for future reimplementation of stream_file
#class StreamFileException(Exception):
# def __init__(self, value):
# self.value = value
_RE_BAD_MSIE = re.compile("MSIE\s+(\d+\.\d+)")
def stream_file(req, fullpath, fullname=None, mime=None, encoding=None, etag=None, md5str=None, location=None, download=False):
"""This is a generic function to stream a file to the user.
If fullname, mime, encoding, and location are not provided they will be
guessed based on req and fullpath.
md5str should be passed as an hexadecimal string.
"""
## TODO for future reimplementation of stream_file
# from flask import send_file
# if fullname is None:
# fullname = fullpath.split('/')[-1]
# response = send_file(fullpath,
# attachment_filename=fullname.replace('"', '\\"'),
# as_attachment=False)
# if not download:
# response.headers['Content-Disposition'] = 'inline; filename="%s"' % fullname.replace('"', '\\"')
#
# raise StreamFileException(response)
def normal_streaming(size):
req.set_content_length(size)
req.send_http_header()
if req.method != 'HEAD':
req.sendfile(fullpath)
return ""
def single_range(size, the_range):
req.set_content_length(the_range[1])
req.headers_out['Content-Range'] = 'bytes %d-%d/%d' % (the_range[0], the_range[0] + the_range[1] - 1, size)
req.status = apache.HTTP_PARTIAL_CONTENT
req.send_http_header()
if req.method != 'HEAD':
req.sendfile(fullpath, the_range[0], the_range[1])
return ""
def multiple_ranges(size, ranges, mime):
req.status = apache.HTTP_PARTIAL_CONTENT
boundary = '%s%04d' % (time.strftime('THIS_STRING_SEPARATES_%Y%m%d%H%M%S'), random.randint(0, 9999))
req.content_type = 'multipart/byteranges; boundary=%s' % boundary
content_length = 0
for arange in ranges:
content_length += len('--%s\r\n' % boundary)
content_length += len('Content-Type: %s\r\n' % mime)
content_length += len('Content-Range: bytes %d-%d/%d\r\n' % (arange[0], arange[0] + arange[1] - 1, size))
content_length += len('\r\n')
content_length += arange[1]
content_length += len('\r\n')
content_length += len('--%s--\r\n' % boundary)
req.set_content_length(content_length)
req.send_http_header()
if req.method != 'HEAD':
for arange in ranges:
req.write('--%s\r\n' % boundary, 0)
req.write('Content-Type: %s\r\n' % mime, 0)
req.write('Content-Range: bytes %d-%d/%d\r\n' % (arange[0], arange[0] + arange[1] - 1, size), 0)
req.write('\r\n', 0)
req.sendfile(fullpath, arange[0], arange[1])
req.write('\r\n', 0)
req.write('--%s--\r\n' % boundary)
req.flush()
return ""
def parse_date(date):
"""According to <http://www.w3.org/Protocols/rfc2616/rfc2616-sec3.html#sec3.3>
a date can come in three formats (in order of preference):
Sun, 06 Nov 1994 08:49:37 GMT ; RFC 822, updated by RFC 1123
Sunday, 06-Nov-94 08:49:37 GMT ; RFC 850, obsoleted by RFC 1036
Sun Nov 6 08:49:37 1994 ; ANSI C's asctime() format
Moreover IE is adding some trailing information after a ';'.
Wrong dates should be simpled ignored.
This function return the time in seconds since the epoch GMT or None
in case of errors."""
if not date:
return None
try:
date = date.split(';')[0].strip() # Because of IE
## Sun, 06 Nov 1994 08:49:37 GMT
return time.mktime(time.strptime(date, '%a, %d %b %Y %X %Z'))
except:
try:
## Sun, 06 Nov 1994 08:49:37 GMT
return time.mktime(time.strptime(date, '%A, %d-%b-%y %H:%M:%S %Z'))
except:
try:
## Sun, 06 Nov 1994 08:49:37 GMT
return time.mktime(date)
except:
return None
def parse_ranges(ranges):
"""According to <http://www.w3.org/Protocols/rfc2616/rfc2616-sec14.html#sec14.35>
a (multiple) range request comes in the form:
bytes=20-30,40-60,70-,-80
with the meaning:
from byte to 20 to 30 inclusive (11 bytes)
from byte to 40 to 60 inclusive (21 bytes)
from byte 70 to (size - 1) inclusive (size - 70 bytes)
from byte size - 80 to (size - 1) inclusive (80 bytes)
This function will return the list of ranges in the form:
[[first_byte, last_byte], ...]
If first_byte or last_byte aren't specified they'll be set to None
If the list is not well formatted it will return None
"""
try:
if ranges.startswith('bytes') and '=' in ranges:
ranges = ranges.split('=')[1].strip()
else:
return None
ret = []
for arange in ranges.split(','):
arange = arange.strip()
if arange.startswith('-'):
ret.append([None, int(arange[1:])])
elif arange.endswith('-'):
ret.append([int(arange[:-1]), None])
else:
ret.append(map(int, arange.split('-')))
return ret
except:
return None
def parse_tags(tags):
"""Return a list of tags starting from a comma separated list."""
return [tag.strip() for tag in tags.split(',')]
def fix_ranges(ranges, size):
"""Complementary to parse_ranges it will transform all the ranges
into (first_byte, length), adjusting all the value based on the
actual size provided.
"""
ret = []
for arange in ranges:
if (arange[0] is None and arange[1] > 0) or arange[0] < size:
if arange[0] is None:
arange[0] = size - arange[1]
elif arange[1] is None:
arange[1] = size - arange[0]
else:
arange[1] = arange[1] - arange[0] + 1
arange[0] = max(0, arange[0])
arange[1] = min(size - arange[0], arange[1])
if arange[1] > 0:
ret.append(arange)
return ret
def get_normalized_headers():
"""Strip and lowerize all the keys of the headers dictionary plus
strip, lowerize and transform known headers value into their value."""
ret = {
'if-match' : None,
'unless-modified-since' : None,
'if-modified-since' : None,
'range' : None,
'if-range' : None,
'if-none-match' : None,
}
for key, value in req.headers_in.iteritems():
key = key.strip().lower()
value = value.strip()
if key in ('unless-modified-since', 'if-modified-since'):
value = parse_date(value)
elif key == 'range':
value = parse_ranges(value)
elif key == 'if-range':
value = parse_date(value) or parse_tags(value)
elif key in ('if-match', 'if-none-match'):
value = parse_tags(value)
if value:
ret[key] = value
return ret
headers = get_normalized_headers()
g = _RE_BAD_MSIE.search(headers.get('user-agent', "MSIE 6.0"))
bad_msie = g and float(g.group(1)) < 9.0
if CFG_BIBDOCFILE_USE_XSENDFILE:
## If XSendFile is supported by the server, let's use it.
if os.path.exists(fullpath):
if fullname is None:
fullname = os.path.basename(fullpath)
if bad_msie:
## IE is confused by quotes
req.headers_out["Content-Disposition"] = 'attachment; filename=%s' % fullname.replace('"', '\\"')
elif download:
req.headers_out["Content-Disposition"] = 'attachment; filename="%s"' % fullname.replace('"', '\\"')
else:
## IE is confused by inline
req.headers_out["Content-Disposition"] = 'inline; filename="%s"' % fullname.replace('"', '\\"')
req.headers_out["X-Sendfile"] = fullpath
if mime is None:
(mime, encoding) = _mimes.guess_type(fullpath)
if mime is None:
mime = "application/octet-stream"
if not bad_msie:
## IE is confused by not supported mimetypes
req.content_type = mime
return ""
else:
raise apache.SERVER_RETURN, apache.HTTP_NOT_FOUND
if headers['if-match']:
if etag is not None and etag not in headers['if-match']:
raise apache.SERVER_RETURN, apache.HTTP_PRECONDITION_FAILED
if os.path.exists(fullpath):
mtime = os.path.getmtime(fullpath)
if fullname is None:
fullname = os.path.basename(fullpath)
if mime is None:
(mime, encoding) = _mimes.guess_type(fullpath)
if mime is None:
mime = "application/octet-stream"
if location is None:
location = req.uri
if not bad_msie:
## IE is confused by not supported mimetypes
req.content_type = mime
req.encoding = encoding
req.filename = fullname
req.headers_out["Last-Modified"] = time.strftime('%a, %d %b %Y %X GMT', time.gmtime(mtime))
if CFG_ENABLE_HTTP_RANGE_REQUESTS:
req.headers_out["Accept-Ranges"] = "bytes"
else:
req.headers_out["Accept-Ranges"] = "none"
req.headers_out["Content-Location"] = location
if etag is not None:
req.headers_out["ETag"] = etag
if md5str is not None:
req.headers_out["Content-MD5"] = base64.encodestring(binascii.unhexlify(md5str.upper()))[:-1]
if bad_msie:
## IE is confused by quotes
req.headers_out["Content-Disposition"] = 'attachment; filename=%s' % fullname.replace('"', '\\"')
elif download:
req.headers_out["Content-Disposition"] = 'attachment; filename="%s"' % fullname.replace('"', '\\"')
else:
## IE is confused by inline
req.headers_out["Content-Disposition"] = 'inline; filename="%s"' % fullname.replace('"', '\\"')
size = os.path.getsize(fullpath)
if not size:
try:
raise Exception, '%s exists but is empty' % fullpath
except Exception:
register_exception(req=req, alert_admin=True)
raise apache.SERVER_RETURN, apache.HTTP_NOT_FOUND
if headers['if-modified-since'] and headers['if-modified-since'] >= mtime:
raise apache.SERVER_RETURN, apache.HTTP_NOT_MODIFIED
if headers['if-none-match']:
if etag is not None and etag in headers['if-none-match']:
raise apache.SERVER_RETURN, apache.HTTP_NOT_MODIFIED
if headers['unless-modified-since'] and headers['unless-modified-since'] < mtime:
return normal_streaming(size)
if CFG_ENABLE_HTTP_RANGE_REQUESTS and headers['range']:
try:
if headers['if-range']:
if etag is None or etag not in headers['if-range']:
return normal_streaming(size)
ranges = fix_ranges(headers['range'], size)
except:
return normal_streaming(size)
if len(ranges) > 1:
return multiple_ranges(size, ranges, mime)
elif ranges:
return single_range(size, ranges[0])
else:
raise apache.SERVER_RETURN, apache.HTTP_RANGE_NOT_SATISFIABLE
else:
return normal_streaming(size)
else:
raise apache.SERVER_RETURN, apache.HTTP_NOT_FOUND
def stream_restricted_icon(req):
"""Return the content of the "Restricted Icon" file."""
stream_file(req, '%s/img/restricted.gif' % CFG_WEBDIR)
raise apache.SERVER_RETURN, apache.DONE
#def list_versions_from_array(docfiles):
# """Retrieve the list of existing versions from the given docfiles list."""
# versions = []
# for docfile in docfiles:
# if not docfile.get_version() in versions:
# versions.append(docfile.get_version())
# versions.sort()
# versions.reverse()
# return versions
def _make_base_dir(docid):
"""Given a docid it returns the complete path that should host its files."""
group = "g" + str(int(int(docid) / CFG_BIBDOCFILE_FILESYSTEM_BIBDOC_GROUP_LIMIT))
return os.path.join(CFG_BIBDOCFILE_FILEDIR, group, str(docid))
class Md5Folder(object):
"""Manage all the Md5 checksum about a folder"""
def __init__(self, folder):
"""Initialize the class from the md5 checksum of a given path"""
self.folder = folder
self.load()
def update(self, only_new=True):
"""Update the .md5 file with the current files. If only_new
is specified then only not already calculated file are calculated."""
if not only_new:
self.md5s = {}
if os.path.exists(self.folder):
for filename in os.listdir(self.folder):
if filename not in self.md5s and not filename.startswith('.'):
self.md5s[filename] = calculate_md5(os.path.join(self.folder, filename))
self.store()
def store(self):
"""Store the current md5 dictionary into .md5"""
try:
old_umask = os.umask(022)
md5file = open(os.path.join(self.folder, ".md5"), "w")
for key, value in self.md5s.items():
md5file.write('%s *%s\n' % (value, key))
md5file.close()
os.umask(old_umask)
except Exception, e:
register_exception(alert_admin=True)
raise InvenioBibDocFileError("Encountered an exception while storing .md5 for folder '%s': '%s'" % (self.folder, e))
def load(self):
"""Load .md5 into the md5 dictionary"""
self.md5s = {}
md5_path = os.path.join(self.folder, ".md5")
if os.path.exists(md5_path):
for row in open(md5_path, "r"):
md5hash = row[:32]
filename = row[34:].strip()
self.md5s[filename] = md5hash
else:
self.update()
def check(self, filename=''):
"""Check the specified file or all the files for which it exists a hash
for being coherent with the stored hash."""
if filename and filename in self.md5s.keys():
try:
return self.md5s[filename] == calculate_md5(os.path.join(self.folder, filename))
except Exception, e:
register_exception(alert_admin=True)
raise InvenioBibDocFileError("Encountered an exception while loading '%s': '%s'" % (os.path.join(self.folder, filename), e))
else:
for filename, md5hash in self.md5s.items():
try:
if calculate_md5(os.path.join(self.folder, filename)) != md5hash:
return False
except Exception, e:
register_exception(alert_admin=True)
raise InvenioBibDocFileError("Encountered an exception while loading '%s': '%s'" % (os.path.join(self.folder, filename), e))
return True
def get_checksum(self, filename):
"""Return the checksum of a physical file."""
md5hash = self.md5s.get(filename, None)
if md5hash is None:
self.update()
# Now it should not fail!
md5hash = self.md5s[filename]
return md5hash
def calculate_md5_external(filename):
"""Calculate the md5 of a physical file through md5sum Command Line Tool.
This is suitable for file larger than 256Kb."""
try:
md5_result = os.popen(CFG_PATH_MD5SUM + ' -b %s' % escape_shell_arg(filename))
ret = md5_result.read()[:32]
md5_result.close()
if len(ret) != 32:
# Error in running md5sum. Let's fallback to internal
# algorithm.
return calculate_md5(filename, force_internal=True)
else:
return ret
except Exception, e:
raise InvenioBibDocFileError("Encountered an exception while calculating md5 for file '%s': '%s'" % (filename, e))
def calculate_md5(filename, force_internal=False):
"""Calculate the md5 of a physical file. This is suitable for files smaller
than 256Kb."""
if not CFG_PATH_MD5SUM or force_internal or os.path.getsize(filename) < CFG_BIBDOCFILE_MD5_THRESHOLD:
try:
to_be_read = open(filename, "rb")
computed_md5 = md5()
while True:
buf = to_be_read.read(CFG_BIBDOCFILE_MD5_BUFFER)
if buf:
computed_md5.update(buf)
else:
break
to_be_read.close()
return computed_md5.hexdigest()
except Exception, e:
register_exception(alert_admin=True)
raise InvenioBibDocFileError("Encountered an exception while calculating md5 for file '%s': '%s'" % (filename, e))
else:
return calculate_md5_external(filename)
def bibdocfile_url_to_bibrecdocs(url):
"""Given an URL in the form CFG_SITE_[SECURE_]URL/CFG_SITE_RECORD/xxx/files/... it returns
a BibRecDocs object for the corresponding recid."""
recid = decompose_bibdocfile_url(url)[0]
return BibRecDocs(recid)
def bibdocfile_url_to_bibdoc(url):
"""Given an URL in the form CFG_SITE_[SECURE_]URL/CFG_SITE_RECORD/xxx/files/... it returns
a BibDoc object for the corresponding recid/docname."""
docname = decompose_bibdocfile_url(url)[1]
return bibdocfile_url_to_bibrecdocs(url).get_bibdoc(docname)
def bibdocfile_url_to_bibdocfile(url):
"""Given an URL in the form CFG_SITE_[SECURE_]URL/CFG_SITE_RECORD/xxx/files/... it returns
a BibDocFile object for the corresponding recid/docname/format."""
docformat = decompose_bibdocfile_url(url)[2]
return bibdocfile_url_to_bibdoc(url).get_file(docformat)
def bibdocfile_url_to_fullpath(url):
"""Given an URL in the form CFG_SITE_[SECURE_]URL/CFG_SITE_RECORD/xxx/files/... it returns
the fullpath for the corresponding recid/docname/format."""
return bibdocfile_url_to_bibdocfile(url).get_full_path()
def bibdocfile_url_p(url):
"""Return True when the url is a potential valid url pointing to a
fulltext owned by a system."""
if url.startswith('%s/getfile.py' % CFG_SITE_URL) or url.startswith('%s/getfile.py' % CFG_SITE_SECURE_URL):
return True
if not (url.startswith('%s/%s/' % (CFG_SITE_URL, CFG_SITE_RECORD)) or url.startswith('%s/%s/' % (CFG_SITE_SECURE_URL, CFG_SITE_RECORD))):
return False
splitted_url = url.split('/files/')
return len(splitted_url) == 2 and splitted_url[0] != '' and splitted_url[1] != ''
def get_docid_from_bibdocfile_fullpath(fullpath):
"""Given a bibdocfile fullpath (e.g. "CFG_BIBDOCFILE_FILEDIR/g0/123/bar.pdf;1")
returns the docid (e.g. 123)."""
if not fullpath.startswith(os.path.join(CFG_BIBDOCFILE_FILEDIR, 'g')):
raise InvenioBibDocFileError, "Fullpath %s doesn't correspond to a valid bibdocfile fullpath" % fullpath
dirname = decompose_file_with_version(fullpath)[0]
try:
return int(dirname.split('/')[-1])
except:
raise InvenioBibDocFileError, "Fullpath %s doesn't correspond to a valid bibdocfile fullpath" % fullpath
def decompose_bibdocfile_fullpath(fullpath):
"""Given a bibdocfile fullpath (e.g. "CFG_BIBDOCFILE_FILEDIR/g0/123/bar.pdf;1")
returns a quadruple (recid, docname, format, version)."""
if not fullpath.startswith(os.path.join(CFG_BIBDOCFILE_FILEDIR, 'g')):
raise InvenioBibDocFileError, "Fullpath %s doesn't correspond to a valid bibdocfile fullpath" % fullpath
dirname, dummy, extension, version = decompose_file_with_version(fullpath)
try:
docid = int(dirname.split('/')[-1])
return {"doc_id" : docid, "extension": extension, "version": version}
except:
raise InvenioBibDocFileError, "Fullpath %s doesn't correspond to a valid bibdocfile fullpath" % fullpath
def decompose_bibdocfile_url(url):
"""Given a bibdocfile_url return a triple (recid, docname, format)."""
if url.startswith('%s/getfile.py' % CFG_SITE_URL) or url.startswith('%s/getfile.py' % CFG_SITE_SECURE_URL):
return decompose_bibdocfile_very_old_url(url)
if url.startswith('%s/%s/' % (CFG_SITE_URL, CFG_SITE_RECORD)):
recid_file = url[len('%s/%s/' % (CFG_SITE_URL, CFG_SITE_RECORD)):]
elif url.startswith('%s/%s/' % (CFG_SITE_SECURE_URL, CFG_SITE_RECORD)):
recid_file = url[len('%s/%s/' % (CFG_SITE_SECURE_URL, CFG_SITE_RECORD)):]
else:
raise InvenioBibDocFileError, "Url %s doesn't correspond to a valid record inside the system." % url
recid_file = recid_file.replace('/files/', '/')
recid, docname, docformat = decompose_file(urllib.unquote(recid_file)) # this will work in the case of URL... not file !
if not recid and docname.isdigit():
## If the URL was something similar to CFG_SITE_URL/CFG_SITE_RECORD/123
return (int(docname), '', '')
return (int(recid), docname, docformat)
re_bibdocfile_old_url = re.compile(r'/%s/(\d*)/files/' % CFG_SITE_RECORD)
def decompose_bibdocfile_old_url(url):
"""Given a bibdocfile old url (e.g. CFG_SITE_URL/CFG_SITE_RECORD/123/files)
it returns the recid."""
g = re_bibdocfile_old_url.search(url)
if g:
return int(g.group(1))
raise InvenioBibDocFileError('%s is not a valid old bibdocfile url' % url)
def decompose_bibdocfile_very_old_url(url):
"""Decompose an old /getfile.py? URL"""
if url.startswith('%s/getfile.py' % CFG_SITE_URL) or url.startswith('%s/getfile.py' % CFG_SITE_SECURE_URL):
params = urllib.splitquery(url)[1]
if params:
try:
params = cgi.parse_qs(params)
if 'docid' in params:
docid = int(params['docid'][0])
bibdoc = BibDoc.create_instance(docid)
if bibdoc.bibrec_links:
recid = bibdoc.bibrec_links[0]["rec_id"]
docname = bibdoc.bibrec_links[0]["doc_name"]
else:
raise InvenioBibDocFileError("Old style URL pointing to an unattached document")
elif 'recid' in params:
recid = int(params['recid'][0])
if 'name' in params:
docname = params['name'][0]
else:
docname = ''
else:
raise InvenioBibDocFileError('%s has not enough params to correspond to a bibdocfile.' % url)
docformat = normalize_format(params.get('format', [''])[0])
return (recid, docname, docformat)
except Exception, e:
raise InvenioBibDocFileError('Problem with %s: %s' % (url, e))
else:
raise InvenioBibDocFileError('%s has no params to correspond to a bibdocfile.' % url)
else:
raise InvenioBibDocFileError('%s is not a valid very old bibdocfile url' % url)
def get_docname_from_url(url):
"""Return a potential docname given a url"""
path = urllib2.urlparse.urlsplit(urllib.unquote(url))[2]
filename = os.path.split(path)[-1]
return file_strip_ext(filename)
def get_format_from_url(url):
"""Return a potential format given a url"""
path = urllib2.urlparse.urlsplit(urllib.unquote(url))[2]
filename = os.path.split(path)[-1]
return filename[len(file_strip_ext(filename)):]
def clean_url(url):
"""Given a local url e.g. a local path it render it a realpath."""
if is_url_a_local_file(url):
path = urllib2.urlparse.urlsplit(urllib.unquote(url))[2]
return os.path.abspath(path)
else:
return url
def is_url_a_local_file(url):
"""Return True if the given URL is pointing to a local file."""
protocol = urllib2.urlparse.urlsplit(url)[0]
return protocol in ('', 'file')
def check_valid_url(url):
"""
Check for validity of a url or a file.
@param url: the URL to check
@type url: string
@raise StandardError: if the URL is not a valid URL.
"""
try:
if is_url_a_local_file(url):
path = urllib2.urlparse.urlsplit(urllib.unquote(url))[2]
if os.path.abspath(path) != path:
raise StandardError, "%s is not a normalized path (would be %s)." % (path, os.path.normpath(path))
for allowed_path in CFG_BIBUPLOAD_FFT_ALLOWED_LOCAL_PATHS + [CFG_TMPDIR, CFG_TMPSHAREDDIR, CFG_WEBSUBMIT_STORAGEDIR]:
if path.startswith(allowed_path):
dummy_fd = open(path)
dummy_fd.close()
return
raise StandardError, "%s is not in one of the allowed paths." % path
else:
try:
open_url(url)
except InvenioBibdocfileUnauthorizedURL, e:
raise StandardError, str(e)
except Exception, e:
raise StandardError, "%s is not a correct url: %s" % (url, e)
def safe_mkstemp(suffix, prefix='bibdocfile_'):
"""Create a temporary filename that don't have any '.' inside a part
from the suffix."""
tmpfd, tmppath = tempfile.mkstemp(suffix=suffix, prefix=prefix, dir=CFG_TMPDIR)
# Close the file and leave the responsability to the client code to
# correctly open/close it.
os.close(tmpfd)
if '.' not in suffix:
# Just in case format is empty
return tmppath
while '.' in os.path.basename(tmppath)[:-len(suffix)]:
os.remove(tmppath)
tmpfd, tmppath = tempfile.mkstemp(suffix=suffix, prefix=prefix, dir=CFG_TMPDIR)
os.close(tmpfd)
return tmppath
def download_local_file(filename, docformat=None):
"""
Copies a local file to Invenio's temporary directory.
@param filename: the name of the file to copy
@type filename: string
@param format: the format of the file to copy (will be found if not
specified)
@type format: string
@return: the path of the temporary file created
@rtype: string
@raise StandardError: if something went wrong
"""
# Make sure the format is OK.
if docformat is None:
docformat = guess_format_from_url(filename)
else:
docformat = normalize_format(docformat)
tmppath = ''
# Now try to copy.
try:
path = urllib2.urlparse.urlsplit(urllib.unquote(filename))[2]
if os.path.abspath(path) != path:
raise StandardError, "%s is not a normalized path (would be %s)." \
% (path, os.path.normpath(path))
for allowed_path in CFG_BIBUPLOAD_FFT_ALLOWED_LOCAL_PATHS + [CFG_TMPDIR,
CFG_WEBSUBMIT_STORAGEDIR]:
if path.startswith(allowed_path):
tmppath = safe_mkstemp(docformat)
shutil.copy(path, tmppath)
if os.path.getsize(tmppath) == 0:
os.remove(tmppath)
raise StandardError, "%s seems to be empty" % filename
break
else:
raise StandardError, "%s is not in one of the allowed paths." % path
except Exception, e:
raise StandardError, "Impossible to copy the local file '%s': %s" % \
(filename, str(e))
return tmppath
def download_external_url(url, docformat=None, progress_callback=None):
"""
Download a url (if it corresponds to a remote file) and return a
local url to it.
@param url: the URL to download
@type url: string
@param format: the format of the file (will be found if not specified)
@type format: string
@return: the path to the download local file
@rtype: string
@raise StandardError: if the download failed
"""
tmppath = None
# Make sure the format is OK.
if docformat is None:
# First try to find a known extension to the URL
docformat = decompose_file(url, skip_version=True,
only_known_extensions=True)[2]
if not docformat:
# No correct format could be found. Will try to get it from the
# HTTP message headers.
docformat = ''
else:
docformat = normalize_format(docformat)
from_file, to_file, tmppath = None, None, ''
try:
from_file = open_url(url)
except InvenioBibdocfileUnauthorizedURL, e:
raise StandardError, str(e)
except urllib2.URLError, e:
raise StandardError, 'URL could not be opened: %s' % str(e)
if not docformat:
# We could not determine the format from the URL, so let's try
# to read it from the HTTP headers.
docformat = get_format_from_http_response(from_file)
try:
tmppath = safe_mkstemp(docformat)
if progress_callback:
total_size = int(from_file.info().getheader('Content-Length').strip())
progress_size = 0
to_file = open(tmppath, 'w')
while True:
block = from_file.read(CFG_BIBDOCFILE_BLOCK_SIZE)
if not block:
break
to_file.write(block)
if progress_callback:
progress_size += CFG_BIBDOCFILE_BLOCK_SIZE
progress_callback(progress_size, CFG_BIBDOCFILE_BLOCK_SIZE,
total_size)
to_file.close()
from_file.close()
if os.path.getsize(tmppath) == 0:
raise StandardError, "%s seems to be empty" % url
except Exception, e:
# Try to close and remove the temporary file.
try:
to_file.close()
except Exception:
pass
try:
os.remove(tmppath)
except Exception:
pass
raise StandardError, "Error when downloading %s into %s: %s" % \
(url, tmppath, e)
return tmppath
def get_format_from_http_response(response):
"""
Tries to retrieve the format of the file from the message headers of the
HTTP response.
@param response: the HTTP response
@type response: file-like object (as returned by urllib.urlopen)
@return: the format of the remote resource
@rtype: string
"""
def parse_content_type(text):
return text.split(';')[0].strip()
def parse_content_disposition(text):
for item in text.split(';'):
item = item.strip()
if item.strip().startswith('filename='):
return item[len('filename="'):-len('"')]
info = response.info()
docformat = ''
content_disposition = info.getheader('Content-Disposition')
if content_disposition:
filename = parse_content_disposition(content_disposition)
if filename:
docformat = decompose_file(filename, only_known_extensions=False)[2]
if docformat:
return docformat
content_type = info.getheader('Content-Type')
if content_type:
content_type = parse_content_type(content_type)
if content_type not in ('text/plain', 'application/octet-stream'):
## We actually ignore these mimetypes since they are the
## defaults often returned by Apache in case the mimetype
## was not known
ext = _mimes.guess_extension(content_type)
if ext:
## Normalize some common magic mis-interpreation
ext = {'.asc': '.txt', '.obj': '.bin'}.get(ext, ext)
docformat = normalize_format(ext)
return docformat
def download_url(url, docformat=None):
"""
Download a url (if it corresponds to a remote file) and return a
local url to it.
"""
tmppath = None
try:
if is_url_a_local_file(url):
tmppath = download_local_file(url, docformat = docformat)
else:
tmppath = download_external_url(url, docformat = docformat)
except StandardError:
raise
return tmppath
class MoreInfo(object):
"""This class represents a genering MoreInfo dictionary.
MoreInfo object can be attached to bibdoc, bibversion, format or BibRelation.
The entity where a particular MoreInfo object is attached has to be specified using the
constructor parametes.
This class is a thin wrapper around the database table.
"""
def __init__(self, docid = None, version = None, docformat = None,
relation = None, cache_only = False, cache_reads = True, initial_data = None):
"""
@param cache_only Determines if MoreInfo object should be created in
memory only or reflected in the database
@type cache_only boolean
@param cache_reads Determines if reads should be executed on the
in-memory cache or should be redirected to the
database. If this is true, cache can be entirely
regenerated from the database only upon an explicit
request. If the value is not present in the cache,
the database is queried
@type cache_reads boolean
@param initial_data Allows to specify initial content of the cache.
This parameter is useful when we create an in-memory
instance from serialised value
@type initial_data string
"""
self.docid = docid
self.version = version
self.format = docformat
self.relation = relation
self.cache_only = cache_only
if initial_data != None:
self.cache = initial_data
self.dirty = initial_data
if not self.cache_only:
self._flush_cache() #inserts new entries
else:
self.cache = {}
self.dirty = {}
self.cache_reads = cache_reads
if not self.cache_only:
self.populate_from_database()
@staticmethod
def create_from_serialised(ser_str, docid = None, version = None, docformat = None,
relation = None, cache_only = False, cache_reads = True):
"""Creates an instance of MoreInfo
using serialised data as the cache content"""
data = cPickle.loads(base64.b64decode(ser_str))
return MoreInfo(docid = docid, version = version, docformat = docformat,
relation = relation, cache_only = cache_only,
cache_reads = cache_reads, initial_data = data);
def serialise_cache(self):
"""Returns a serialised representation of the cache"""
return base64.b64encode(cPickle.dumps(self.get_cache()))
def populate_from_database(self):
"""Retrieves all values of MoreInfo and places them in the cache"""
where_str, where_args = self._generate_where_query_args()
query_str = "SELECT namespace, data_key, data_value FROM bibdocmoreinfo WHERE %s" % (where_str, )
res = run_sql(query_str, where_args)
if res:
for row in res:
namespace, data_key, data_value_ser = row
data_value = cPickle.loads(data_value_ser)
if not namespace in self.cache:
self.cache[namespace] = {}
self.cache[namespace][data_key] = data_value
def _mark_dirty(self, namespace, data_key):
"""Marks a data key dirty - that should be saved into the database"""
if not namespace in self.dirty:
self.dirty[namespace] = {}
self.dirty[namespace][data_key] = True
def _database_get_distinct_string_list(self, column, namespace = None):
"""A private method reading an unique list of strings from the
moreinfo database table"""
where_str, where_args = self._generate_where_query_args(
namespace = namespace)
query_str = "SELECT DISTINCT %s FROM bibdocmoreinfo WHERE %s" % \
( column, where_str, )
if DBG_LOG_QUERIES:
from invenio.bibtask import write_message
write_message("Executing query: " + query_str + " ARGS: " + repr(where_args))
print "Executing query: " + query_str + " ARGS: " + repr(where_args)
res = run_sql(query_str, where_args)
return (res and [x[0] for x in res]) or [] # after migrating to python 2.6, can be rewritten using x if y else z syntax: return [x[0] for x in res] if res else []
def _database_get_namespaces(self):
"""Read the database to discover namespaces declared in a given MoreInfo"""
return self._database_get_distinct_string_list("namespace")
def _database_get_keys(self, namespace):
"""Returns all keys assigned in a given namespace of a MoreInfo instance"""
return self._database_get_distinct_string_list("data_key", namespace=namespace)
def _database_contains_key(self, namespace, key):
return self._database_read_value(namespace, key) != None
def _database_save_value(self, namespace, key, value):
"""Write changes into the database"""
#TODO: this should happen within one transaction
serialised_val = cPickle.dumps(value)
# on duplicate key will not work here as miltiple null values are permitted by the index
if not self._database_contains_key(namespace, key):
#insert new value
query_parts = []
query_args = []
to_process = [(self.docid, "id_bibdoc"), (self.version, "version"),
(self.format, "format"), (self.relation, "id_rel"),
(str(namespace), "namespace"), (str(key), "data_key"),
(str(serialised_val), "data_value")]
for entry in to_process:
_val_or_null(entry[0], q_str = query_parts, q_args = query_args)
columns_str = ", ".join(map(lambda x: x[1], to_process))
values_str = ", ".join(query_parts)
query_str = "INSERT INTO bibdocmoreinfo (%s) VALUES(%s)" % \
(columns_str, values_str)
if DBG_LOG_QUERIES:
from invenio.bibtask import write_message
write_message("Executing query: " + query_str + " ARGS: " + repr(query_args))
print "Executing query: " + query_str + " ARGS: " + repr(query_args)
run_sql(query_str, query_args)
else:
#Update existing value
where_str, where_args = self._generate_where_query_args(namespace, key)
query_str = "UPDATE bibdocmoreinfo SET data_value=%s WHERE " + where_str
query_args = [str(serialised_val)] + where_args
if DBG_LOG_QUERIES:
from invenio.bibtask import write_message
write_message("Executing query: " + query_str + " ARGS: " + repr(query_args))
print "Executing query: " + query_str + " ARGS: " + repr(query_args)
run_sql(query_str, query_args )
def _database_read_value(self, namespace, key):
"""Reads a value directly from the database
@param namespace - namespace of the data to be read
@param key - key of the data to be read
"""
where_str, where_args = self._generate_where_query_args(namespace = namespace, data_key = key)
query_str = "SELECT data_value FROM bibdocmoreinfo WHERE " + where_str
res = run_sql(query_str, where_args)
if DBG_LOG_QUERIES:
from invenio.bibtask import write_message
write_message("Executing query: " + query_str + " ARGS: " + repr(where_args) + "WITH THE RESULT: " + str(res))
s_ = ""
if res:
s_ = cPickle.loads(res[0][0])
print "Executing query: " + query_str + " ARGS: " + repr(where_args) + " WITH THE RESULT: " + str(s_)
if res and res[0][0]:
try:
return cPickle.loads(res[0][0])
except:
raise Exception("Error when deserialising value for %s key=%s retrieved value=%s" % (repr(self), str(key), str(res[0][0])))
return None
def _database_remove_value(self, namespace, key):
"""Removes an entry directly in the database"""
where_str, where_args = self._generate_where_query_args(namespace = namespace, data_key = key)
query_str = "DELETE FROM bibdocmoreinfo WHERE " + where_str
if DBG_LOG_QUERIES:
from invenio.bibtask import write_message
write_message("Executing query: " + query_str + " ARGS: " + repr(where_args))
print "Executing query: " + query_str + " ARGS: " + repr(where_args)
run_sql(query_str, where_args)
return None
def _flush_cache(self):
"""Writes all the dirty cache entries into the database"""
for namespace in self.dirty:
for data_key in self.dirty[namespace]:
if namespace in self.cache and data_key in self.cache[namespace]\
and not self.cache[namespace][data_key] is None:
self._database_save_value(namespace, data_key, self.cache[namespace][data_key])
else:
# This might happen if a value has been removed from the cache
self._database_remove_value(namespace, data_key)
self.dirty = {}
def _generate_where_query_args(self, namespace = None, data_key = None):
"""Private method generating WHERE clause of SQL statements"""
ns = []
if namespace != None:
ns = [(namespace, "namespace")]
dk = []
if data_key != None:
dk = [(data_key, "data_key")]
to_process = [(self.docid, "id_bibdoc"), (self.version, "version"),
(self.format, "format"), (self.relation, "id_rel")] + \
ns + dk
return _sql_generate_conjunctive_where(to_process)
def set_data(self, namespace, key, value):
"""setting data directly in the database dictionary"""
if not namespace in self.cache:
self.cache[namespace] = {}
self.cache[namespace][key] = value
self._mark_dirty(namespace, key)
if not self.cache_only:
self._flush_cache()
def get_data(self, namespace, key):
"""retrieving data from the database"""
if self.cache_reads or self.cache_only:
if namespace in self.cache and key in self.cache[namespace]:
return self.cache[namespace][key]
if not self.cache_only:
# we have a permission to read from the database
value = self._database_read_value(namespace, key)
if value:
if not namespace in self.cache:
self.cache[namespace] = {}
self.cache[namespace][key] = value
return value
return None
def del_key(self, namespace, key):
"""retrieving data from the database"""
if not namespace in self.cache:
return None
del self.cache[namespace][key]
self._mark_dirty(namespace, key)
if not self.cache_only:
self._flush_cache()
def contains_key(self, namespace, key):
return self.get_data(namespace, key) != None
# the dictionary interface -> updating the default namespace
def __setitem__(self, key, value):
self.set_data("", key, value) #the default value
def __getitem__(self, key):
return self.get_data("", key)
def __delitem__(self, key):
self.del_key("", key)
def __contains__(self, key):
return self.contains_key("", key)
def __repr__(self):
return "MoreInfo(docid=%s, version=%s, docformat=%s, relation=%s)" % \
(self.docid, self.version, self.format, self.relation)
def delete(self):
"""Remove all entries associated with this MoreInfo"""
self.cache = {}
if not self.cache_only:
where_str, query_args = self._generate_where_query_args()
query_str = "DELETE FROM bibdocmoreinfo WHERE %s" % (where_str, )
if DBG_LOG_QUERIES:
from invenio.bibtask import write_message
write_message("Executing query: " + query_str + " ARGS: " + repr(query_args))
print "Executing query: " + query_str + " ARGS: " + repr(query_args)
run_sql(query_str, query_args)
def get_cache(self):
"""Returns the content of the cache
@return The content of the MoreInfo cache
@rtype dictionary {namespace: {key1: value1, ... }, namespace2: {}}
"""
return self.cache
def get_namespaces(self):
"""Returns a list of namespaces present in the MoreInfo structure.
If the object is permitted access to the database, the data should
be always read from there. Unlike when reading a particular value,
we can not check if value is missing in the cache
"""
if self.cache_only and self.cache_reads:
return self.cache.keys()
return self._database_get_namespaces()
def get_keys(self, namespace):
"""Returns a list of keys present in a given namespace"""
if self.cache_only and self.cache_reads:
res = []
if namespace in self.cache:
res = self.cache[namespace].keys()
return res
else:
return self._database_get_keys(namespace)
def flush(self):
"""Flush the content into the database"""
self._flush_cache()
class BibDocMoreInfo(MoreInfo):
"""
This class wraps contextual information of the documents, such as the
- comments
- descriptions
- flags.
Such information is kept separately per every format/version instance of
the corresponding document and is searialized in the database, ready
to be retrieved (but not searched).
@param docid: the document identifier.
@type docid: integer
@param more_info: a serialized version of an already existing more_info
object. If not specified this information will be readed from the
database, and othewise an empty dictionary will be allocated.
@raise ValueError: if docid is not a positive integer.
@ivar docid: the document identifier as passed to the constructor.
@type docid: integer
@ivar more_info: the more_info dictionary that will hold all the
additional document information.
@type more_info: dict of dict of dict
@note: in general this class is never instanciated in client code and
never used outside bibdocfile module.
@note: this class will be extended in the future to hold all the new auxiliary
information about a document.
"""
def __init__(self, docid, cache_only = False, initial_data = None):
if not (type(docid) in (long, int) and docid > 0):
raise ValueError("docid is not a positive integer, but %s." % docid)
MoreInfo.__init__(self, docid, cache_only = cache_only, initial_data = initial_data)
if 'descriptions' not in self:
self['descriptions'] = {}
if 'comments' not in self:
self['comments'] = {}
if 'flags' not in self:
self['flags'] = {}
if DBG_LOG_QUERIES:
from invenio.bibtask import write_message
write_message("Creating BibDocMoreInfo :" + repr(self["comments"]))
print "Creating BibdocMoreInfo :" + repr(self["comments"])
def __repr__(self):
"""
@return: the canonical string representation of the C{BibDocMoreInfo}.
@rtype: string
"""
return 'BibDocMoreInfo(%i, %s)' % (self.docid, repr(cPickle.dumps(self)))
def set_flag(self, flagname, docformat, version):
"""
Sets a flag.
@param flagname: the flag to set (see
L{CFG_BIBDOCFILE_AVAILABLE_FLAGS}).
@type flagname: string
@param format: the format for which the flag should set.
@type format: string
@param version: the version for which the flag should set:
@type version: integer
@raise ValueError: if the flag is not in
L{CFG_BIBDOCFILE_AVAILABLE_FLAGS}
"""
if flagname in CFG_BIBDOCFILE_AVAILABLE_FLAGS:
flags = self['flags']
if not flagname in flags:
flags[flagname] = {}
if not version in flags[flagname]:
flags[flagname][version] = {}
if not docformat in flags[flagname][version]:
flags[flagname][version][docformat] = {}
flags[flagname][version][docformat] = True
self['flags'] = flags
else:
raise ValueError, "%s is not in %s" % \
(flagname, CFG_BIBDOCFILE_AVAILABLE_FLAGS)
def get_comment(self, docformat, version):
"""
Returns the specified comment.
@param format: the format for which the comment should be
retrieved.
@type format: string
@param version: the version for which the comment should be
retrieved.
@type version: integer
@return: the specified comment.
@rtype: string
"""
try:
assert(type(version) is int)
docformat = normalize_format(docformat)
return self['comments'].get(version, {}).get(docformat)
except:
register_exception()
raise
def get_description(self, docformat, version):
"""
Returns the specified description.
@param format: the format for which the description should be
retrieved.
@type format: string
@param version: the version for which the description should be
retrieved.
@type version: integer
@return: the specified description.
@rtype: string
"""
try:
assert(type(version) is int)
docformat = normalize_format(docformat)
return self['descriptions'].get(version, {}).get(docformat)
except:
register_exception()
raise
def has_flag(self, flagname, docformat, version):
"""
Return True if the corresponding has been set.
@param flagname: the name of the flag (see
L{CFG_BIBDOCFILE_AVAILABLE_FLAGS}).
@type flagname: string
@param format: the format for which the flag should be checked.
@type format: string
@param version: the version for which the flag should be checked.
@type version: integer
@return: True if the flag is set for the given format/version.
@rtype: bool
@raise ValueError: if the flagname is not in
L{CFG_BIBDOCFILE_AVAILABLE_FLAGS}
"""
if flagname in CFG_BIBDOCFILE_AVAILABLE_FLAGS:
return self['flags'].get(flagname, {}).get(version, {}).get(docformat, False)
else:
raise ValueError, "%s is not in %s" % (flagname, CFG_BIBDOCFILE_AVAILABLE_FLAGS)
def get_flags(self, docformat, version):
"""
Return the list of all the enabled flags.
@param format: the format for which the list should be returned.
@type format: string
@param version: the version for which the list should be returned.
@type version: integer
@return: the list of enabled flags (from
L{CFG_BIBDOCFILE_AVAILABLE_FLAGS}).
@rtype: list of string
"""
return [flag for flag in self['flags'] if docformat in self['flags'][flag].get(version, {})]
def set_comment(self, comment, docformat, version):
"""
Set a comment.
@param comment: the comment to be set.
@type comment: string
@param format: the format for which the comment should be set.
@type format: string
@param version: the version for which the comment should be set:
@type version: integer
"""
try:
assert(type(version) is int and version > 0)
docformat = normalize_format(docformat)
if comment == KEEP_OLD_VALUE:
comment = self.get_comment(docformat, version) or self.get_comment(docformat, version - 1)
if not comment:
self.unset_comment(docformat, version)
return
if not version in self['comments']:
comments = self['comments']
comments[version] = {}
self['comments'] = comments
comments = self['comments']
comments[version][docformat] = comment
self['comments'] = comments
except:
register_exception()
raise
def set_description(self, description, docformat, version):
"""
Set a description.
@param description: the description to be set.
@type description: string
@param format: the format for which the description should be set.
@type format: string
@param version: the version for which the description should be set:
@type version: integer
"""
try:
assert(type(version) is int and version > 0)
docformat = normalize_format(docformat)
if description == KEEP_OLD_VALUE:
description = self.get_description(docformat, version) or self.get_description(docformat, version - 1)
if not description:
self.unset_description(docformat, version)
return
descriptions = self['descriptions']
if not version in descriptions:
descriptions[version] = {}
descriptions[version][docformat] = description
self.set_data("", 'descriptions', descriptions)
except:
register_exception()
raise
def unset_comment(self, docformat, version):
"""
Unset a comment.
@param format: the format for which the comment should be unset.
@type format: string
@param version: the version for which the comment should be unset:
@type version: integer
"""
try:
assert(type(version) is int and version > 0)
comments = self['comments']
del comments[version][docformat]
self['comments'] = comments
except KeyError:
pass
except:
register_exception()
raise
def unset_description(self, docformat, version):
"""
Unset a description.
@param format: the format for which the description should be unset.
@type format: string
@param version: the version for which the description should be unset:
@type version: integer
"""
try:
assert(type(version) is int and version > 0)
descriptions = self['descriptions']
del descriptions[version][docformat]
self['descriptions'] = descriptions
except KeyError:
pass
except:
register_exception()
raise
def unset_flag(self, flagname, docformat, version):
"""
Unset a flag.
@param flagname: the flag to be unset (see
L{CFG_BIBDOCFILE_AVAILABLE_FLAGS}).
@type flagname: string
@param format: the format for which the flag should be unset.
@type format: string
@param version: the version for which the flag should be unset:
@type version: integer
@raise ValueError: if the flag is not in
L{CFG_BIBDOCFILE_AVAILABLE_FLAGS}
"""
if flagname in CFG_BIBDOCFILE_AVAILABLE_FLAGS:
try:
flags = self['flags']
del flags[flagname][version][docformat]
self['flags'] = flags
except KeyError:
pass
else:
raise ValueError, "%s is not in %s" % (flagname, CFG_BIBDOCFILE_AVAILABLE_FLAGS)
_bib_relation__any_value = -1
class BibRelation(object):
"""
A representation of a relation between documents or their particular versions
"""
def __init__(self, rel_type = None,
bibdoc1_id = None, bibdoc2_id = None,
bibdoc1_ver = None, bibdoc2_ver = None,
bibdoc1_fmt = None, bibdoc2_fmt = None,
rel_id = None):
"""
The constructor of the class representing a relation between two
documents.
If the more_info parameter is specified, no data is retrieved from
the database and the internal dictionary is initialised with
the passed value. If the more_info is not provided, the value is
read from the database. In the case of non-existing record, an
empty dictionary is assigned.
If a version of whichever record is not specified, the resulting
object desctibes a relation of all version of a given BibDoc.
@param bibdoc1
@type bibdoc1 BibDoc
@param bibdoc1_ver
@type version1_ver int
@param bibdoc2
@type bibdoc2 BibDco
@param bibdoc2_ver
@type bibdoc2_ver int
@param bibdoc1_fmt format of the first document
@type bibdoc1_fmt string
@param bibdoc2_fmt format of the second document
@type bibdoc2_fmt string
@param rel_type
@type rel_type string
@param more_info The serialised representation of the more_info
@type more_info string
@param rel_id allows to specify the identifier of the newly created relation
@type rel_ide unsigned int
"""
self.id = rel_id
self.bibdoc1_id = bibdoc1_id
self.bibdoc2_id = bibdoc2_id
self.bibdoc1_ver = bibdoc1_ver
self.bibdoc2_ver = bibdoc2_ver
self.bibdoc1_fmt = bibdoc1_fmt
self.bibdoc2_fmt = bibdoc2_fmt
self.rel_type = rel_type
if rel_id == None:
self._fill_id_from_data()
else:
self._fill_data_from_id()
self.more_info = MoreInfo(relation = self.id)
def _fill_data_from_id(self):
"""Fill all the relation data from the relation identifier
"""
query = "SELECT id_bibdoc1, version1, format1, id_bibdoc2, version2, format2, rel_type FROM bibdoc_bibdoc WHERE id=%s"
res = run_sql(query, (str(self.id), ))
if res != None and res[0] != None:
self.bibdoc1_id = res[0][0]
self.bibdoc1_ver = res[0][1]
self.bibdoc1_fmt = res[0][2]
self.bibdoc2_id = res[0][3]
self.bibdoc2_ver = res[0][4]
self.bibdoc2_fmt = res[0][5]
self.rel_type = res[0][6]
def _fill_id_from_data(self):
"""Fill the relation identifier based on the data provided"""
where_str, where_args = self._get_where_clauses()
query = "SELECT id FROM bibdoc_bibdoc WHERE %s" % (where_str, )
res = run_sql(query, where_args)
if res and res[0][0]:
self.id = int(res[0][0])
def _get_value_column_mapping(self):
"""
Returns a list of tuples each tuple consists of a value and a name
of a database column where this value should fit
"""
return [(self.rel_type, "rel_type"), (self.bibdoc1_id, "id_bibdoc1"),
(self.bibdoc1_ver, "version1"),
(self.bibdoc1_fmt, "format1"),
(self.bibdoc2_id, "id_bibdoc2"),
(self.bibdoc2_ver, "version2"),
(self.bibdoc2_fmt, "format2")]
def _get_where_clauses(self):
"""Private function returning part of the SQL statement identifying
current relation
@return
@rtype tuple
"""
return _sql_generate_conjunctive_where(self._get_value_column_mapping())
@staticmethod
def create(bibdoc1_id = None, bibdoc1_ver = None,
bibdoc1_fmt = None, bibdoc2_id = None,
bibdoc2_ver = None, bibdoc2_fmt = None,
rel_type = ""):
"""
Create a relation and return instance.
Ommiting an argument means that a particular relation concerns any value of the parameter
"""
# check if there is already entry corresponding to parameters
existing = BibRelation.get_relations(rel_type = rel_type,
bibdoc1_id = bibdoc1_id,
bibdoc2_id = bibdoc2_id,
bibdoc1_ver = bibdoc1_ver,
bibdoc2_ver = bibdoc2_ver,
bibdoc1_fmt = bibdoc1_fmt,
bibdoc2_fmt = bibdoc2_fmt)
if len(existing) > 0:
return existing[0]
# build the insert query and execute it
to_process = [(rel_type, "rel_type"), (bibdoc1_id, "id_bibdoc1"),
(bibdoc1_ver, "version1"), (bibdoc1_fmt, "format1"),
(bibdoc2_id, "id_bibdoc2"), (bibdoc2_ver, "version2"),
(bibdoc2_fmt, "format2")]
values_list = []
args_list = []
columns_list = []
for entry in to_process:
columns_list.append(entry[1])
if entry[0] == None:
values_list.append("NULL")
else:
values_list.append("%s")
args_list.append(entry[0])
query = "INSERT INTO bibdoc_bibdoc (%s) VALUES (%s)" % (", ".join(columns_list), ", ".join(values_list))
# print "Query: %s Args: %s" % (query, str(args_list))
rel_id = run_sql(query, args_list)
return BibRelation(rel_id = rel_id)
def delete(self):
""" Removes a relation between objects from the database.
executing the flush function on the same object will restore
the relation
"""
where_str, where_args = self._get_where_clauses()
run_sql("DELETE FROM bibdoc_bibdoc WHERE %s" % (where_str,), where_args) # kwalitee: disable=sql
# removing associated MoreInfo
self.more_info.delete()
def get_more_info(self):
return self.more_info
@staticmethod
def get_relations(rel_type = _bib_relation__any_value,
bibdoc1_id = _bib_relation__any_value,
bibdoc2_id = _bib_relation__any_value,
bibdoc1_ver = _bib_relation__any_value,
bibdoc2_ver = _bib_relation__any_value,
bibdoc1_fmt = _bib_relation__any_value,
bibdoc2_fmt = _bib_relation__any_value):
"""Retrieves list of relations satisfying condtions.
If a parameter is specified, its value has to match exactly.
If a parameter is ommited, any of its values will be accepted"""
to_process = [(rel_type, "rel_type"), (bibdoc1_id, "id_bibdoc1"),
(bibdoc1_ver, "version1"), (bibdoc1_fmt, "format1"),
(bibdoc2_id, "id_bibdoc2"), (bibdoc2_ver, "version2"),
(bibdoc2_fmt, "format2")]
where_str, where_args = _sql_generate_conjunctive_where(
filter(lambda x: x[0] != _bib_relation__any_value, to_process))
if where_str:
where_str = "WHERE " + where_str # in case of nonempty where, we need a where clause
query_str = "SELECT id FROM bibdoc_bibdoc %s" % (where_str, )
# print "running query : %s with arguments %s on the object %s" % (query_str, str(where_args), repr(self))
try:
res = run_sql(query_str, where_args)
except:
raise Exception(query_str + " " + str(where_args))
results = []
if res != None:
for res_row in res:
results.append(BibRelation(rel_id=res_row[0]))
return results
# Access to MoreInfo
def set_data(self, category, key, value):
"""assign additional information to this relation"""
self.more_info.set_data(category, key, value)
def get_data(self, category, key):
"""read additional information assigned to this relation"""
return self.more_info.get_data(category, key)
#the dictionary interface allowing to set data bypassing the namespaces
def __setitem__(self, key, value):
self.more_info[key] = value
def __getitem__(self, key):
return self.more_info[key]
def __contains__(self, key):
return self.more_info.__contains__(key)
def __repr__(self):
return "BibRelation(id_bibdoc1 = %s, version1 = %s, format1 = %s, id_bibdoc2 = %s, version2 = %s, format2 = %s, rel_type = %s)" % \
(self.bibdoc1_id, self.bibdoc1_ver, self.bibdoc1_fmt,
self.bibdoc2_id, self.bibdoc2_ver, self.bibdoc2_fmt,
self.rel_type)
def readfile(filename):
"""
Read a file.
@param filename: the name of the file to be read.
@type filename: string
@return: the text contained in the file.
@rtype: string
@note: Returns empty string in case of any error.
@note: this function is useful for quick implementation of websubmit
functions.
"""
try:
return open(filename).read()
except Exception:
return ''
class HeadRequest(urllib2.Request):
"""
A request object to perform a HEAD request.
"""
def get_method(self):
return 'HEAD'
def read_cookie(cookiefile):
"""
Parses a cookie file and returns a string as needed for the urllib2 headers
The file should respect the Netscape cookie specifications
"""
cookie_data = ''
cfile = open(cookiefile, 'r')
for line in cfile.readlines():
tokens = line.split('\t')
if len(tokens) == 7: # we are on a cookie line
cookie_data += '%s=%s; ' % (tokens[5], tokens[6].replace('\n', ''))
cfile.close()
return cookie_data
def open_url(url, headers=None, head_request=False):
"""
Opens a URL. If headers are passed as argument, no check is performed and
the URL will be opened. Otherwise checks if the URL is present in
CFG_BIBUPLOAD_FFT_ALLOWED_EXTERNAL_URLS and uses the headers specified in
the config variable.
@param url: the URL to open
@type url: string
@param headers: the headers to use
@type headers: dictionary
@param head_request: if True, perform a HEAD request, otherwise a POST
request
@type head_request: boolean
@return: a file-like object as returned by urllib2.urlopen.
"""
headers_to_use = None
if headers is None:
for regex, headers in _CFG_BIBUPLOAD_FFT_ALLOWED_EXTERNAL_URLS:
if regex.match(url) is not None:
headers_to_use = headers
break
if headers_to_use is None:
# URL is not allowed.
raise InvenioBibdocfileUnauthorizedURL, "%s is not an authorized " \
"external URL." % url
else:
headers_to_use = headers
request_obj = head_request and HeadRequest or urllib2.Request
request = request_obj(url)
request.add_header('User-Agent', make_user_agent_string('bibdocfile'))
for key, value in headers_to_use.items():
try:
value = globals()[value['fnc']](**value['args'])
except (KeyError, TypeError):
pass
request.add_header(key, value)
return urllib2.urlopen(request)
def update_modification_date_of_file(filepath, modification_date):
"""Update the modification time and date of the file with the modification_date
@param filepath: the full path of the file that needs to be updated
@type filepath: string
@param modification_date: the new modification date and time
@type modification_date: datetime.datetime object
"""
try:
modif_date_in_seconds = time.mktime(modification_date.timetuple()) # try to get the time in seconds
except (AttributeError, TypeError):
modif_date_in_seconds = 0
if modif_date_in_seconds:
statinfo = os.stat(filepath) # we need to keep the same access time
os.utime(filepath, (statinfo.st_atime, modif_date_in_seconds)) #update the modification time
diff --git a/invenio/legacy/bibdocfile/cli.py b/invenio/legacy/bibdocfile/cli.py
index a280a89ff..08348dbd6 100644
--- a/invenio/legacy/bibdocfile/cli.py
+++ b/invenio/legacy/bibdocfile/cli.py
@@ -1,1259 +1,1259 @@
# -*- coding: utf-8 -*-
##
## This file is part of Invenio.
## Copyright (C) 2008, 2009, 2010, 2011, 2012 CERN.
##
## Invenio is free software; you can redistribute it and/or
## modify it under the terms of the GNU General Public License as
## published by the Free Software Foundation; either version 2 of the
## License, or (at your option) any later version.
##
## Invenio is distributed in the hope that it will be useful, but
## WITHOUT ANY WARRANTY; without even the implied warranty of
## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
## General Public License for more details.
##
## You should have received a copy of the GNU General Public License
## along with Invenio; if not, write to the Free Software Foundation, Inc.,
## 59 Temple Place, Suite 330, Boston, MA 02111-1307, USA.
"""
BibDocAdmin CLI administration tool
"""
__revision__ = "$Id$"
import sys
import re
import os
import time
import fnmatch
import time
from datetime import datetime
from logging import getLogger, debug, DEBUG
from optparse import OptionParser, OptionGroup, OptionValueError
from tempfile import mkstemp
-from invenio.errorlib import register_exception
+from invenio.ext.logging import register_exception
from invenio.config import CFG_SITE_URL, CFG_BIBDOCFILE_FILEDIR, \
CFG_SITE_RECORD, CFG_TMPSHAREDDIR
from invenio.bibdocfile import BibRecDocs, BibDoc, InvenioBibDocFileError, \
nice_size, check_valid_url, clean_url, get_docname_from_url, \
guess_format_from_url, KEEP_OLD_VALUE, decompose_bibdocfile_fullpath, \
bibdocfile_url_to_bibdoc, decompose_bibdocfile_url, CFG_BIBDOCFILE_AVAILABLE_FLAGS
from invenio.intbitset import intbitset
from invenio.search_engine import perform_request_search
from invenio.utils.text import wrap_text_in_a_box, wait_for_user
from invenio.dbquery import run_sql
from invenio.bibtask import task_low_level_submission
from invenio.utils.text import encode_for_xml
from invenio.websubmit_file_converter import can_perform_ocr
def _xml_mksubfield(key, subfield, fft):
return fft.get(key, None) is not None and '\t\t<subfield code="%s">%s</subfield>\n' % (subfield, encode_for_xml(str(fft[key]))) or ''
def _xml_mksubfields(key, subfield, fft):
ret = ""
for value in fft.get(key, []):
ret += '\t\t<subfield code="%s">%s</subfield>\n' % (subfield, encode_for_xml(str(value)))
return ret
def _xml_fft_creator(fft):
"""Transform an fft dictionary (made by keys url, docname, format,
new_docname, comment, description, restriction, doctype, into an xml
string."""
debug('Input FFT structure: %s' % fft)
out = '\t<datafield tag ="FFT" ind1=" " ind2=" ">\n'
out += _xml_mksubfield('url', 'a', fft)
out += _xml_mksubfield('docname', 'n', fft)
out += _xml_mksubfield('format', 'f', fft)
out += _xml_mksubfield('new_docname', 'm', fft)
out += _xml_mksubfield('doctype', 't', fft)
out += _xml_mksubfield('description', 'd', fft)
out += _xml_mksubfield('comment', 'z', fft)
out += _xml_mksubfield('restriction', 'r', fft)
out += _xml_mksubfields('options', 'o', fft)
out += _xml_mksubfield('version', 'v', fft)
out += '\t</datafield>\n'
debug('FFT created: %s' % out)
return out
def ffts_to_xml(ffts_dict):
"""Transform a dictionary: recid -> ffts where ffts is a list of fft dictionary
into xml.
"""
debug('Input FFTs dictionary: %s' % ffts_dict)
out = ''
recids = ffts_dict.keys()
recids.sort()
for recid in recids:
ffts = ffts_dict[recid]
if ffts:
out += '<record>\n'
out += '\t<controlfield tag="001">%i</controlfield>\n' % recid
for fft in ffts:
out += _xml_fft_creator(fft)
out += '</record>\n'
debug('MARC to Upload: %s' % out)
return out
_shift_re = re.compile("([-\+]{0,1})([\d]+)([dhms])")
def _parse_datetime(var):
"""Returns a date string according to the format string.
It can handle normal date strings and shifts with respect
to now."""
if not var:
return None
date = time.time()
factors = {"d":24*3600, "h":3600, "m":60, "s":1}
m = _shift_re.match(var)
if m:
sign = m.groups()[0] == "-" and -1 or 1
factor = factors[m.groups()[2]]
value = float(m.groups()[1])
return datetime.fromtimestamp(date + sign * factor * value)
else:
return datetime(*(time.strptime(var, "%Y-%m-%d %H:%M:%S")[0:6]))
# The code above is Python 2.4 compatible. The following is the 2.5
# version.
# return datetime.strptime(var, "%Y-%m-%d %H:%M:%S")
def _parse_date_range(var):
"""Returns the two dates contained as a low,high tuple"""
limits = var.split(",")
if len(limits)==1:
low = _parse_datetime(limits[0])
return low, None
if len(limits)==2:
low = _parse_datetime(limits[0])
high = _parse_datetime(limits[1])
return low, high
return None, None
def cli_quick_match_all_recids(options):
"""Return an quickly an approximate but (by excess) list of good recids."""
url = getattr(options, 'url', None)
if url:
return intbitset([decompose_bibdocfile_url(url)[0]])
path = getattr(options, 'path', None)
if path:
docid = decompose_bibdocfile_fullpath(path)["doc_id"]
bd = BibDoc(docid)
ids = []
for rec_link in bd.bibrec_links:
ids.append(rec_link["recid"])
return intbitset(ids)
docids = getattr(options, 'docids', None)
if docids:
ids = []
for docid in docids:
bd = BibDoc(docid)
for rec_link in bd.bibrec_links:
ids.append(rec_link["recid"])
return intbitset(ids)
collection = getattr(options, 'collection', None)
pattern = getattr(options, 'pattern', None)
recids = getattr(options, 'recids', None)
md_rec = getattr(options, 'md_rec', None)
cd_rec = getattr(options, 'cd_rec', None)
tmp_date_query = []
tmp_date_params = []
if recids is None:
debug('Initially considering all the recids')
recids = intbitset(run_sql('SELECT id FROM bibrec'))
if not recids:
print >> sys.stderr, 'WARNING: No record in the database'
if md_rec[0] is not None:
tmp_date_query.append('modification_date>=%s')
tmp_date_params.append(md_rec[0])
if md_rec[1] is not None:
tmp_date_query.append('modification_date<=%s')
tmp_date_params.append(md_rec[1])
if cd_rec[0] is not None:
tmp_date_query.append('creation_date>=%s')
tmp_date_params.append(cd_rec[0])
if cd_rec[1] is not None:
tmp_date_query.append('creation_date<=%s')
tmp_date_params.append(cd_rec[1])
if tmp_date_query:
tmp_date_query = ' AND '.join(tmp_date_query)
tmp_date_params = tuple(tmp_date_params)
query = 'SELECT id FROM bibrec WHERE %s' % tmp_date_query
debug('Query: %s, param: %s' % (query, tmp_date_params))
recids &= intbitset(run_sql(query % tmp_date_query, tmp_date_params))
debug('After applying dates we obtain recids: %s' % recids)
if not recids:
print >> sys.stderr, 'WARNING: Time constraints for records are too strict'
if collection or pattern:
recids &= intbitset(perform_request_search(cc=collection or '', p=pattern or ''))
debug('After applyings pattern and collection we obtain recids: %s' % recids)
debug('Quick recids: %s' % recids)
return recids
def cli_quick_match_all_docids(options, recids=None):
"""Return an quickly an approximate but (by excess) list of good docids."""
url = getattr(options, 'url', None)
if url:
return intbitset([bibdocfile_url_to_bibdoc(url).get_id()])
path = getattr(options, 'path', None)
if path:
docid = decompose_bibdocfile_fullpath(path)["doc_id"]
bd = BibDoc(docid)
ids = []
for rec_link in bd.bibrec_links:
ids.append(rec_link["recid"])
return intbitset(ids)
deleted_docs = getattr(options, 'deleted_docs', None)
action_undelete = getattr(options, 'action', None) == 'undelete'
docids = getattr(options, 'docids', None)
md_doc = getattr(options, 'md_doc', None)
cd_doc = getattr(options, 'cd_doc', None)
if docids is None:
debug('Initially considering all the docids')
if recids is None:
recids = cli_quick_match_all_recids(options)
docids = intbitset()
for id_bibrec, id_bibdoc in run_sql('SELECT id_bibrec, id_bibdoc FROM bibrec_bibdoc'):
if id_bibrec in recids:
docids.add(id_bibdoc)
else:
debug('Initially considering this docids: %s' % docids)
tmp_query = []
tmp_params = []
if deleted_docs is None and action_undelete:
deleted_docs = 'only'
if deleted_docs == 'no':
tmp_query.append('status<>"DELETED"')
elif deleted_docs == 'only':
tmp_query.append('status="DELETED"')
if md_doc[0] is not None:
tmp_query.append('modification_date>=%s')
tmp_params.append(md_doc[0])
if md_doc[1] is not None:
tmp_query.append('modification_date<=%s')
tmp_params.append(md_doc[1])
if cd_doc[0] is not None:
tmp_query.append('creation_date>=%s')
tmp_params.append(cd_doc[0])
if cd_doc[1] is not None:
tmp_query.append('creation_date<=%s')
tmp_params.append(cd_doc[1])
if tmp_query:
tmp_query = ' AND '.join(tmp_query)
tmp_params = tuple(tmp_params)
query = 'SELECT id FROM bibdoc WHERE %s' % tmp_query
debug('Query: %s, param: %s' % (query, tmp_params))
docids &= intbitset(run_sql(query, tmp_params))
debug('After applying dates we obtain docids: %s' % docids)
return docids
def cli_slow_match_single_recid(options, recid, docids=None):
"""Apply all the given queries in order to assert wethever a recid
match or not.
if with_docids is True, the recid is matched if it has at least one docid that is matched"""
debug('cli_slow_match_single_recid checking: %s' % recid)
deleted_docs = getattr(options, 'deleted_docs', None)
deleted_recs = getattr(options, 'deleted_recs', None)
empty_recs = getattr(options, 'empty_recs', None)
docname = cli2docname(options)
bibrecdocs = BibRecDocs(recid, deleted_too=(deleted_docs != 'no'))
if bibrecdocs.deleted_p() and (deleted_recs == 'no'):
return False
elif not bibrecdocs.deleted_p() and (deleted_recs != 'only'):
if docids:
for bibdoc in bibrecdocs.list_bibdocs():
if bibdoc.get_id() in docids:
break
else:
return False
if docname:
for other_docname in bibrecdocs.get_bibdoc_names():
if docname and fnmatch.fnmatchcase(other_docname, docname):
break
else:
return False
if bibrecdocs.empty_p() and (empty_recs != 'no'):
return True
elif not bibrecdocs.empty_p() and (empty_recs != 'only'):
return True
return False
def cli_slow_match_single_docid(options, docid, recids=None):
"""Apply all the given queries in order to assert wethever a recid
match or not."""
debug('cli_slow_match_single_docid checking: %s' % docid)
empty_docs = getattr(options, 'empty_docs', None)
docname = cli2docname(options)
if recids is None:
recids = cli_quick_match_all_recids(options)
bibdoc = BibDoc.create_instance(docid)
dn = None
if bibdoc.bibrec_links:
dn = bibdoc.bibrec_links[0]["docname"]
if docname and not fnmatch.fnmatchcase(dn, docname):
debug('docname %s does not match the pattern %s' % (repr(dn), repr(docname)))
return False
# elif bibdoc.get_recid() and bibdoc.get_recid() not in recids:
# debug('recid %s is not in pattern %s' % (repr(bibdoc.get_recid()), repr(recids)))
# return False
elif empty_docs == 'no' and bibdoc.empty_p():
debug('bibdoc is empty')
return False
elif empty_docs == 'only' and not bibdoc.empty_p():
debug('bibdoc is not empty')
return False
else:
return True
def cli2recid(options, recids=None, docids=None):
"""Given the command line options return a recid."""
recids = list(cli_recids_iterator(options, recids=recids, docids=docids))
if len(recids) == 1:
return recids[0]
if recids:
raise StandardError, "More than one recid has been matched: %s" % recids
else:
raise StandardError, "No recids matched"
def cli2docid(options, recids=None, docids=None):
"""Given the command line options return a docid."""
docids = list(cli_docids_iterator(options, recids=recids, docids=docids))
if len(docids) == 1:
return docids[0]
if docids:
raise StandardError, "More than one docid has been matched: %s" % docids
else:
raise StandardError, "No docids matched"
def cli2flags(options):
"""
Transform a comma separated list of flags into a list of valid flags.
"""
flags = getattr(options, 'flags', None)
if flags:
flags = [flag.strip().upper() for flag in flags.split(',')]
for flag in flags:
if flag not in CFG_BIBDOCFILE_AVAILABLE_FLAGS:
raise StandardError("%s is not among the valid flags: %s" % (flag, ', '.join(CFG_BIBDOCFILE_AVAILABLE_FLAGS)))
return flags
return []
def cli2description(options):
"""Return a good value for the description."""
description = getattr(options, 'set_description', None)
if description is None:
description = KEEP_OLD_VALUE
return description
def cli2restriction(options):
"""Return a good value for the restriction."""
restriction = getattr(options, 'set_restriction', None)
if restriction is None:
restriction = KEEP_OLD_VALUE
return restriction
def cli2comment(options):
"""Return a good value for the comment."""
comment = getattr(options, 'set_comment', None)
if comment is None:
comment = KEEP_OLD_VALUE
return comment
def cli2doctype(options):
"""Return a good value for the doctype."""
doctype = getattr(options, 'set_doctype', None)
if not doctype:
return 'Main'
return doctype
def cli2docname(options, url=None):
"""Given the command line options and optional precalculated docid
returns the corresponding docname."""
docname = getattr(options, 'docname', None)
if docname is not None:
return docname
if url is not None:
return get_docname_from_url(url)
else:
return None
def cli2format(options, url=None):
"""Given the command line options returns the corresponding format."""
docformat = getattr(options, 'format', None)
if docformat is not None:
return docformat
elif url is not None:
## FIXME: to deploy once conversion-tools branch is merged
#return guess_format_from_url(url)
return guess_format_from_url(url)
else:
raise OptionValueError("Not enough information to retrieve a valid format")
def cli_recids_iterator(options, recids=None, docids=None):
"""Slow iterator over all the matched recids.
if with_docids is True, the recid must be attached to at least a matched docid"""
debug('cli_recids_iterator')
if recids is None:
recids = cli_quick_match_all_recids(options)
debug('working on recids: %s, docids: %s' % (recids, docids))
for recid in recids:
if cli_slow_match_single_recid(options, recid, docids):
yield recid
raise StopIteration
def cli_docids_iterator(options, recids=None, docids=None):
"""Slow iterator over all the matched docids."""
if recids is None:
recids = cli_quick_match_all_recids(options)
if docids is None:
docids = cli_quick_match_all_docids(options, recids)
for docid in docids:
if cli_slow_match_single_docid(options, docid, recids):
yield docid
raise StopIteration
def cli_get_stats(dummy):
"""Print per every collection some stats"""
def print_table(title, table):
if table:
print "=" * 20, title, "=" * 20
for row in table:
print "\t".join(str(elem) for elem in row)
for collection, reclist in run_sql("SELECT name, reclist FROM collection ORDER BY name"):
print "-" * 79
print "Statistic for: %s " % collection
reclist = intbitset(reclist)
if reclist:
sqlreclist = "(" + ','.join(str(elem) for elem in reclist) + ')'
print_table("Formats", run_sql("SELECT COUNT(format) as c, format FROM bibrec_bibdoc AS bb JOIN bibdocfsinfo AS fs ON bb.id_bibdoc=fs.id_bibdoc WHERE id_bibrec in %s AND last_version=true GROUP BY format ORDER BY c DESC" % sqlreclist)) # kwalitee: disable=sql
print_table("Mimetypes", run_sql("SELECT COUNT(mime) as c, mime FROM bibrec_bibdoc AS bb JOIN bibdocfsinfo AS fs ON bb.id_bibdoc=fs.id_bibdoc WHERE id_bibrec in %s AND last_version=true GROUP BY mime ORDER BY c DESC" % sqlreclist)) # kwalitee: disable=sql
print_table("Sizes", run_sql("SELECT SUM(filesize) AS c FROM bibrec_bibdoc AS bb JOIN bibdocfsinfo AS fs ON bb.id_bibdoc=fs.id_bibdoc WHERE id_bibrec in %s AND last_version=true" % sqlreclist)) # kwalitee: disable=sql
class OptionParserSpecial(OptionParser):
def format_help(self, *args, **kwargs):
result = OptionParser.format_help(self, *args, **kwargs)
if hasattr(self, 'trailing_text'):
return "%s\n%s\n" % (result, self.trailing_text)
else:
return result
def prepare_option_parser():
"""Parse the command line options."""
def _ids_ranges_callback(option, opt, value, parser):
"""Callback for optparse to parse a set of ids ranges in the form
nnn1-nnn2,mmm1-mmm2... returning the corresponding intbitset.
"""
try:
debug('option: %s, opt: %s, value: %s, parser: %s' % (option, opt, value, parser))
debug('Parsing range: %s' % value)
value = ranges2ids(value)
setattr(parser.values, option.dest, value)
except Exception, e:
raise OptionValueError("It's impossible to parse the range '%s' for option %s: %s" % (value, opt, e))
def _date_range_callback(option, opt, value, parser):
"""Callback for optparse to parse a range of dates in the form
[date1],[date2]. Both date1 and date2 could be optional.
the date can be expressed absolutely ("%Y-%m-%d %H:%M:%S")
or relatively (([-\+]{0,1})([\d]+)([dhms])) to the current time."""
try:
value = _parse_date_range(value)
setattr(parser.values, option.dest, value)
except Exception, e:
raise OptionValueError("It's impossible to parse the range '%s' for option %s: %s" % (value, opt, e))
parser = OptionParserSpecial(usage="usage: %prog [options]",
#epilog="""With <query> you select the range of record/docnames/single files to work on. Note that some actions e.g. delete, append, revise etc. works at the docname level, while others like --set-comment, --set-description, at single file level and other can be applied in an iterative way to many records in a single run. Note that specifing docid(2) takes precedence over recid(2) which in turns takes precedence over pattern/collection search.""",
version=__revision__)
parser.trailing_text = """
Examples:
$ bibdocfile --append foo.tar.gz --recid=1
$ bibdocfile --revise http://foo.com?search=123 --with-docname='sam'
--format=pdf --recid=3 --set-docname='pippo' # revise for record 3
# the document sam, renaming it to pippo.
$ bibdocfile --delete --with-docname="*sam" --all # delete all documents
# starting ending
# with "sam"
$ bibdocfile --undelete -c "Test Collection" # undelete documents for
# the collection
$ bibdocfile --get-info --recids=1-4,6-8 # obtain informations
$ bibdocfile -r 1 --with-docname=foo --set-docname=bar # Rename a document
$ bibdocfile -r 1 --set-restriction "firerole: deny until '2011-01-01'
allow any" # set an embargo to all the documents attached to record 1
# (note the ^M or \\n before 'allow any')
# See also $r subfield in <%(site)s/help/admin/bibupload-admin-guide#3.6>
# and Firerole in <%(site)s/help/admin/webaccess-admin-guide#6>
$ bibdocfile --append x.pdf --recid=1 --with-flags='PDF/A,OCRED' # append
# to record 1 the file x.pdf specifying the PDF/A and OCRED flags
""" % {'site': CFG_SITE_URL}
query_options = OptionGroup(parser, 'Query options')
query_options.add_option('-r', '--recids', action="callback", callback=_ids_ranges_callback, type='string', dest='recids', help='matches records by recids, e.g.: --recids=1-3,5-7')
query_options.add_option('-d', '--docids', action="callback", callback=_ids_ranges_callback, type='string', dest='docids', help='matches documents by docids, e.g.: --docids=1-3,5-7')
query_options.add_option('-a', '--all', action='store_true', dest='all', help='Select all the records')
query_options.add_option("--with-deleted-recs", choices=['yes', 'no', 'only'], type="choice", dest="deleted_recs", help="'Yes' to also match deleted records, 'no' to exclude them, 'only' to match only deleted ones", metavar="yes/no/only", default='no')
query_options.add_option("--with-deleted-docs", choices=['yes', 'no', 'only'], type="choice", dest="deleted_docs", help="'Yes' to also match deleted documents, 'no' to exclude them, 'only' to match only deleted ones (e.g. for undeletion)", metavar="yes/no/only", default='no')
query_options.add_option("--with-empty-recs", choices=['yes', 'no', 'only'], type="choice", dest="empty_recs", help="'Yes' to also match records without attached documents, 'no' to exclude them, 'only' to consider only such records (e.g. for statistics)", metavar="yes/no/only", default='no')
query_options.add_option("--with-empty-docs", choices=['yes', 'no', 'only'], type="choice", dest="empty_docs", help="'Yes' to also match documents without attached files, 'no' to exclude them, 'only' to consider only such documents (e.g. for sanity checking)", metavar="yes/no/only", default='no')
query_options.add_option("--with-record-modification-date", action="callback", callback=_date_range_callback, dest="md_rec", nargs=1, type="string", default=(None, None), help="matches records modified date1 and date2; dates can be expressed relatively, e.g.:\"-5m,2030-2-23 04:40\" # matches records modified since 5 minutes ago until the 2030...", metavar="date1,date2")
query_options.add_option("--with-record-creation-date", action="callback", callback=_date_range_callback, dest="cd_rec", nargs=1, type="string", default=(None, None), help="matches records created between date1 and date2; dates can be expressed relatively", metavar="date1,date2")
query_options.add_option("--with-document-modification-date", action="callback", callback=_date_range_callback, dest="md_doc", nargs=1, type="string", default=(None, None), help="matches documents modified between date1 and date2; dates can be expressed relatively", metavar="date1,date2")
query_options.add_option("--with-document-creation-date", action="callback", callback=_date_range_callback, dest="cd_doc", nargs=1, type="string", default=(None, None), help="matches documents created between date1 and date2; dates can be expressed relatively", metavar="date1,date2")
query_options.add_option("--url", dest="url", help='matches the document referred by the URL, e.g. "%s/%s/1/files/foobar.pdf?version=2"' % (CFG_SITE_URL, CFG_SITE_RECORD))
query_options.add_option("--path", dest="path", help='matches the document referred by the internal filesystem path, e.g. %s/g0/1/foobar.pdf\\;1' % CFG_BIBDOCFILE_FILEDIR)
query_options.add_option("--with-docname", dest="docname", help='matches documents with the given docname (accept wildcards)')
query_options.add_option("--with-doctype", dest="doctype", help='matches documents with the given doctype')
query_options.add_option('-p', '--pattern', dest='pattern', help='matches records by pattern')
query_options.add_option('-c', '--collection', dest='collection', help='matches records by collection')
query_options.add_option('--force', dest='force', help='force an action even when it\'s not necessary e.g. textify on an already textified bibdoc.', action='store_true', default=False)
parser.add_option_group(query_options)
getting_information_options = OptionGroup(parser, 'Actions for getting information')
getting_information_options.add_option('--get-info', dest='action', action='store_const', const='get-info', help='print all the informations about the matched record/documents')
getting_information_options.add_option('--get-disk-usage', dest='action', action='store_const', const='get-disk-usage', help='print disk usage statistics of the matched documents')
getting_information_options.add_option('--get-history', dest='action', action='store_const', const='get-history', help='print the matched documents history')
getting_information_options.add_option('--get-stats', dest='action', action='store_const', const='get-stats', help='print some statistics of file properties grouped by collections')
parser.add_option_group(getting_information_options)
setting_information_options = OptionGroup(parser, 'Actions for setting information')
setting_information_options.add_option('--set-doctype', dest='set_doctype', help='specify the new doctype', metavar='doctype')
setting_information_options.add_option('--set-description', dest='set_description', help='specify a description', metavar='description')
setting_information_options.add_option('--set-comment', dest='set_comment', help='specify a comment', metavar='comment')
setting_information_options.add_option('--set-restriction', dest='set_restriction', help='specify a restriction tag', metavar='restriction')
setting_information_options.add_option('--set-docname', dest='new_docname', help='specifies a new docname for renaming', metavar='docname')
setting_information_options.add_option("--unset-comment", action="store_const", const='', dest="set_comment", help="remove any comment")
setting_information_options.add_option("--unset-descriptions", action="store_const", const='', dest="set_description", help="remove any description")
setting_information_options.add_option("--unset-restrictions", action="store_const", const='', dest="set_restriction", help="remove any restriction")
setting_information_options.add_option("--hide", dest="action", action='store_const', const='hide', help="hides matched documents and revisions")
setting_information_options.add_option("--unhide", dest="action", action='store_const', const='unhide', help="hides matched documents and revisions")
parser.add_option_group(setting_information_options)
revising_options = OptionGroup(parser, 'Action for revising content')
revising_options.add_option("--append", dest='append_path', help='specify the URL/path of the file that will appended to the bibdoc (implies --with-empty-recs=yes)', metavar='PATH/URL')
revising_options.add_option("--revise", dest='revise_path', help='specify the URL/path of the file that will revise the bibdoc', metavar='PATH/URL')
revising_options.add_option("--revert", dest='action', action='store_const', const='revert', help='reverts a document to the specified version')
revising_options.add_option("--delete", action='store_const', const='delete', dest='action', help='soft-delete the matched documents')
revising_options.add_option("--hard-delete", action='store_const', const='hard-delete', dest='action', help='hard-delete the single matched document with a specific format and a specific revision (this operation is not revertible)')
revising_options.add_option("--undelete", action='store_const', const='undelete', dest='action', help='undelete previosuly soft-deleted documents')
revising_options.add_option("--purge", action='store_const', const='purge', dest='action', help='purge (i.e. hard-delete any format of any version prior to the latest version of) the matched documents')
revising_options.add_option("--expunge", action='store_const', const='expunge', dest='action', help='expunge (i.e. hard-delete any version and formats of) the matched documents')
revising_options.add_option("--with-version", dest="version", help="specifies the version(s) to be used with hide, unhide, e.g.: 1-2,3 or ALL. Specifies the version to be used with hard-delete and revert, e.g. 2")
revising_options.add_option("--with-format", dest="format", help='to specify a format when appending/revising/deleting/reverting a document, e.g. "pdf"', metavar='FORMAT')
revising_options.add_option("--with-hide-previous", dest='hide_previous', action='store_true', help='when revising, hides previous versions', default=False)
revising_options.add_option("--with-flags", dest='flags', help='comma-separated optional list of flags used when appending/revising a document. Valid flags are: %s' % ', '.join(CFG_BIBDOCFILE_AVAILABLE_FLAGS), default=None)
parser.add_option_group(revising_options)
housekeeping_options = OptionGroup(parser, 'Actions for housekeeping')
housekeeping_options.add_option("--check-md5", action='store_const', const='check-md5', dest='action', help='check md5 checksum validity of files')
housekeeping_options.add_option("--check-format", action='store_const', const='check-format', dest='action', help='check if any format-related inconsistences exists')
housekeeping_options.add_option("--check-duplicate-docnames", action='store_const', const='check-duplicate-docnames', dest='action', help='check for duplicate docnames associated with the same record')
housekeeping_options.add_option("--update-md5", action='store_const', const='update-md5', dest='action', help='update md5 checksum of files')
housekeeping_options.add_option("--fix-all", action='store_const', const='fix-all', dest='action', help='fix inconsistences in filesystem vs database vs MARC')
housekeeping_options.add_option("--fix-marc", action='store_const', const='fix-marc', dest='action', help='synchronize MARC after filesystem/database')
housekeeping_options.add_option("--fix-format", action='store_const', const='fix-format', dest='action', help='fix format related inconsistences')
housekeeping_options.add_option("--fix-duplicate-docnames", action='store_const', const='fix-duplicate-docnames', dest='action', help='fix duplicate docnames associated with the same record')
housekeeping_options.add_option("--fix-bibdocfsinfo-cache", action='store_const', const='fix-bibdocfsinfo-cache', dest='action', help='fix bibdocfsinfo cache related inconsistences')
parser.add_option_group(housekeeping_options)
experimental_options = OptionGroup(parser, 'Experimental options (do not expect to find them in the next release)')
experimental_options.add_option('--textify', dest='action', action='store_const', const='textify', help='extract text from matched documents and store it for later indexing')
experimental_options.add_option('--with-ocr', dest='perform_ocr', action='store_true', default=False, help='when used with --textify, wether to perform OCR')
parser.add_option_group(experimental_options)
parser.add_option('-D', '--debug', action='store_true', dest='debug', default=False)
parser.add_option('-H', '--human-readable', dest='human_readable', action='store_true', default=False, help='print sizes in human readable format (e.g., 1KB 234MB 2GB)')
parser.add_option('--yes-i-know', action='store_true', dest='yes-i-know', help='use with care!')
return parser
def print_info(docid, info):
"""Nicely print info about a docid."""
print '%i:%s' % (docid, info)
def bibupload_ffts(ffts, append=False, do_debug=False, interactive=True):
"""Given an ffts dictionary it creates the xml and submit it."""
xml = ffts_to_xml(ffts)
if xml:
if interactive:
print xml
tmp_file_fd, tmp_file_name = mkstemp(suffix='.xml', prefix="bibdocfile_%s" % time.strftime("%Y-%m-%d_%H:%M:%S"), dir=CFG_TMPSHAREDDIR)
os.write(tmp_file_fd, xml)
os.close(tmp_file_fd)
os.chmod(tmp_file_name, 0644)
if append:
if interactive:
wait_for_user("This will be appended via BibUpload")
if do_debug:
task = task_low_level_submission('bibupload', 'bibdocfile', '-a', tmp_file_name, '-N', 'FFT', '-S2', '-v9')
else:
task = task_low_level_submission('bibupload', 'bibdocfile', '-a', tmp_file_name, '-N', 'FFT', '-S2')
if interactive:
print "BibUpload append submitted with id %s" % task
else:
if interactive:
wait_for_user("This will be corrected via BibUpload")
if do_debug:
task = task_low_level_submission('bibupload', 'bibdocfile', '-c', tmp_file_name, '-N', 'FFT', '-S2', '-v9')
else:
task = task_low_level_submission('bibupload', 'bibdocfile', '-c', tmp_file_name, '-N', 'FFT', '-S2')
if interactive:
print "BibUpload correct submitted with id %s" % task
elif interactive:
print >> sys.stderr, "WARNING: no MARC to upload."
return True
def ranges2ids(parse_string):
"""Parse a string and return the intbitset of the corresponding ids."""
ids = intbitset()
ranges = parse_string.split(",")
for arange in ranges:
tmp_ids = arange.split("-")
if len(tmp_ids)==1:
ids.add(int(tmp_ids[0]))
else:
if int(tmp_ids[0]) > int(tmp_ids[1]): # sanity check
tmp = tmp_ids[0]
tmp_ids[0] = tmp_ids[1]
tmp_ids[1] = tmp
ids += xrange(int(tmp_ids[0]), int(tmp_ids[1]) + 1)
return ids
def cli_append(options, append_path):
"""Create a bibupload FFT task submission for appending a format."""
recid = cli2recid(options)
comment = cli2comment(options)
description = cli2description(options)
restriction = cli2restriction(options)
doctype = cli2doctype(options)
docname = cli2docname(options, url=append_path)
flags = cli2flags(options)
if not docname:
raise OptionValueError, 'Not enough information to retrieve a valid docname'
docformat = cli2format(options, append_path)
url = clean_url(append_path)
check_valid_url(url)
bibrecdocs = BibRecDocs(recid)
if bibrecdocs.has_docname_p(docname) and bibrecdocs.get_bibdoc(docname).format_already_exists_p(docformat):
new_docname = bibrecdocs.propose_unique_docname(docname)
wait_for_user("WARNING: a document with name %s and format %s already exists for recid %s. A new document with name %s will be created instead." % (repr(docname), repr(docformat), repr(recid), repr(new_docname)))
docname = new_docname
ffts = {recid: [{
'docname' : docname,
'comment' : comment,
'description' : description,
'restriction' : restriction,
'doctype' : doctype,
'format' : docformat,
'url' : url,
'options': flags
}]}
return bibupload_ffts(ffts, append=True)
def cli_revise(options, revise_path):
"""Create aq bibupload FFT task submission for appending a format."""
recid = cli2recid(options)
comment = cli2comment(options)
description = cli2description(options)
restriction = cli2restriction(options)
docname = cli2docname(options, url=revise_path)
hide_previous = getattr(options, 'hide_previous', None)
flags = cli2flags(options)
if hide_previous and 'PERFORM_HIDE_PREVIOUS' not in flags:
flags.append('PERFORM_HIDE_PREVIOUS')
if not docname:
raise OptionValueError, 'Not enough information to retrieve a valid docname'
docformat = cli2format(options, revise_path)
doctype = cli2doctype(options)
url = clean_url(revise_path)
new_docname = getattr(options, 'new_docname', None)
check_valid_url(url)
ffts = {recid : [{
'docname' : docname,
'new_docname' : new_docname,
'comment' : comment,
'description' : description,
'restriction' : restriction,
'doctype' : doctype,
'format' : docformat,
'url' : url,
'options' : flags
}]}
return bibupload_ffts(ffts)
def cli_set_batch(options):
"""Change in batch the doctype, description, comment and restriction."""
ffts = {}
doctype = getattr(options, 'set_doctype', None)
description = cli2description(options)
comment = cli2comment(options)
restriction = cli2restriction(options)
with_format = getattr(options, 'format', None)
for docid in cli_docids_iterator(options):
bibdoc = BibDoc.create_instance(docid)
recid = None
docname = None
if bibdoc.bibrec_links:
# pick a sample recid from those to which a BibDoc is attached
recid = bibdoc.bibrec_links[0]["recid"]
docname = bibdoc.bibrec_links[0]["docname"]
fft = []
if description is not None or comment is not None:
for bibdocfile in bibdoc.list_latest_files():
docformat = bibdocfile.get_format()
if not with_format or with_format == format:
fft.append({
'docname': docname,
'restriction': restriction,
'comment': comment,
'description': description,
'format': docformat,
'doctype': doctype
})
else:
fft.append({
'docname': docname,
'restriction': restriction,
'doctype': doctype,
})
ffts[recid] = fft
return bibupload_ffts(ffts, append=False)
def cli_textify(options):
"""Extract text to let indexing on fulltext be possible."""
force = getattr(options, 'force', None)
perform_ocr = getattr(options, 'perform_ocr', None)
if perform_ocr:
if not can_perform_ocr():
print >> sys.stderr, "WARNING: OCR requested but OCR is not possible"
perform_ocr = False
if perform_ocr:
additional = ' using OCR (this might take some time)'
else:
additional = ''
for docid in cli_docids_iterator(options):
bibdoc = BibDoc.create_instance(docid)
print 'Extracting text for docid %s%s...' % (docid, additional),
sys.stdout.flush()
#pylint: disable=E1103
if force or (hasattr(bibdoc, "has_text") and not bibdoc.has_text(require_up_to_date=True)):
try:
#pylint: disable=E1103
bibdoc.extract_text(perform_ocr=perform_ocr)
print "DONE"
except InvenioBibDocFileError, e:
print >> sys.stderr, "WARNING: %s" % e
else:
print "not needed"
def cli_rename(options):
"""Rename a docname within a recid."""
new_docname = getattr(options, 'new_docname', None)
docid = cli2docid(options)
bibdoc = BibDoc.create_instance(docid)
docname = None
if bibdoc.bibrec_links:
docname = bibdoc.bibrec_links[0]["docname"]
recid = cli2recid(options) # now we read the recid from options
ffts = {recid : [{'docname' : docname, 'new_docname' : new_docname}]}
return bibupload_ffts(ffts, append=False)
def cli_fix_bibdocfsinfo_cache(options):
"""Rebuild the bibdocfsinfo table according to what is available on filesystem"""
to_be_fixed = intbitset()
for docid in intbitset(run_sql("SELECT id FROM bibdoc")):
print "Fixing bibdocfsinfo table for docid %s..." % docid,
sys.stdout.flush()
try:
bibdoc = BibDoc(docid)
except InvenioBibDocFileError, err:
print err
continue
try:
bibdoc._sync_to_db()
except Exception, err:
if bibdoc.bibrec_links:
recid = bibdoc.bibrec_links[0]["recid"]
if recid:
to_be_fixed.add(recid)
print "ERROR: %s, scheduling a fix for recid %s" % (err, recid)
else:
print "ERROR %s" % (err, )
print "DONE"
if to_be_fixed:
cli_fix_format(options, recids=to_be_fixed)
print "You can now add CFG_BIBDOCFILE_ENABLE_BIBDOCFSINFO_CACHE=1 to your invenio-local.conf file."
def cli_fix_all(options):
"""Fix all the records of a recid_set."""
ffts = {}
for recid in cli_recids_iterator(options):
ffts[recid] = []
for docname in BibRecDocs(recid).get_bibdoc_names():
ffts[recid].append({'docname' : docname, 'doctype' : 'FIX-ALL'})
return bibupload_ffts(ffts, append=False)
def cli_fix_marc(options, explicit_recid_set=None, interactive=True):
"""Fix all the records of a recid_set."""
ffts = {}
if explicit_recid_set is not None:
for recid in explicit_recid_set:
ffts[recid] = [{'doctype' : 'FIX-MARC'}]
else:
for recid in cli_recids_iterator(options):
ffts[recid] = [{'doctype' : 'FIX-MARC'}]
return bibupload_ffts(ffts, append=False, interactive=interactive)
def cli_check_format(options):
"""Check if any format-related inconsistences exists."""
count = 0
tot = 0
duplicate = False
for recid in cli_recids_iterator(options):
tot += 1
bibrecdocs = BibRecDocs(recid)
if not bibrecdocs.check_duplicate_docnames():
print >> sys.stderr, "recid %s has duplicate docnames!"
broken = True
duplicate = True
else:
broken = False
for docname in bibrecdocs.get_bibdoc_names():
if not bibrecdocs.check_format(docname):
print >> sys.stderr, "recid %s with docname %s need format fixing" % (recid, docname)
broken = True
if broken:
count += 1
if count:
result = "%d out of %d records need their formats to be fixed." % (count, tot)
else:
result = "All records appear to be correct with respect to formats."
if duplicate:
result += " Note however that at least one record appear to have duplicate docnames. You should better fix this situation by using --fix-duplicate-docnames."
print wrap_text_in_a_box(result, style="conclusion")
return not(duplicate or count)
def cli_check_duplicate_docnames(options):
"""Check if some record is connected with bibdoc having the same docnames."""
count = 0
tot = 0
for recid in cli_recids_iterator(options):
tot += 1
bibrecdocs = BibRecDocs(recid)
if bibrecdocs.check_duplicate_docnames():
count += 1
print >> sys.stderr, "recid %s has duplicate docnames!"
if count:
print "%d out of %d records have duplicate docnames." % (count, tot)
return False
else:
print "All records appear to be correct with respect to duplicate docnames."
return True
def cli_fix_format(options, recids=None):
"""Fix format-related inconsistences."""
fixed = intbitset()
tot = 0
if not recids:
recids = cli_recids_iterator(options)
for recid in recids:
tot += 1
bibrecdocs = BibRecDocs(recid)
for docname in bibrecdocs.get_bibdoc_names():
if not bibrecdocs.check_format(docname):
if bibrecdocs.fix_format(docname, skip_check=True):
print >> sys.stderr, "%i has been fixed for docname %s" % (recid, docname)
else:
print >> sys.stderr, "%i has been fixed for docname %s. However note that a new bibdoc might have been created." % (recid, docname)
fixed.add(recid)
if fixed:
print "Now we need to synchronize MARC to reflect current changes."
cli_fix_marc(options, explicit_recid_set=fixed)
print wrap_text_in_a_box("%i out of %i record needed to be fixed." % (tot, len(fixed)), style="conclusion")
return not fixed
def cli_fix_duplicate_docnames(options):
"""Fix duplicate docnames."""
fixed = intbitset()
tot = 0
for recid in cli_recids_iterator(options):
tot += 1
bibrecdocs = BibRecDocs(recid)
if not bibrecdocs.check_duplicate_docnames():
bibrecdocs.fix_duplicate_docnames(skip_check=True)
print >> sys.stderr, "%i has been fixed for duplicate docnames." % recid
fixed.add(recid)
if fixed:
print "Now we need to synchronize MARC to reflect current changes."
cli_fix_marc(options, explicit_recid_set=fixed)
print wrap_text_in_a_box("%i out of %i record needed to be fixed." % (tot, len(fixed)), style="conclusion")
return not fixed
def cli_delete(options):
"""Delete the given docid_set."""
ffts = {}
for docid in cli_docids_iterator(options):
bibdoc = BibDoc.create_instance(docid)
docname = None
recid = None
# retrieve the 1st recid
if recid.bibrec_links:
recid = bibdoc.bibrec_links[0]["recid"]
docname = bibdoc.bibrec_links[0]["docname"]
if recid not in ffts:
ffts[recid] = [{'docname' : docname, 'doctype' : 'DELETE'}]
else:
ffts[recid].append({'docname' : docname, 'doctype' : 'DELETE'})
return bibupload_ffts(ffts)
def cli_delete_file(options):
"""Delete the given file irreversibely."""
docid = cli2docid(options)
recid = cli2recid(options, docids=intbitset([docid]))
docformat = cli2format(options)
bdr = BibRecDocs(recid)
docname = bdr.get_docname(docid)
version = getattr(options, 'version', None)
try:
version_int = int(version)
if 0 >= version_int:
raise ValueError
except:
raise OptionValueError, 'when hard-deleting, version should be valid positive integer, not %s' % version
ffts = {recid : [{'docname' : docname, 'version' : version, 'format' : docformat, 'doctype' : 'DELETE-FILE'}]}
return bibupload_ffts(ffts)
def cli_revert(options):
"""Revert a bibdoc to a given version."""
docid = cli2docid(options)
recid = cli2recid(options, docids=intbitset([docid]))
bdr = BibRecDocs(recid)
docname = bdr.get_docname(docid)
version = getattr(options, 'version', None)
try:
version_int = int(version)
if 0 >= version_int:
raise ValueError
except:
raise OptionValueError, 'when reverting, version should be valid positive integer, not %s' % version
ffts = {recid : [{'docname' : docname, 'version' : version, 'doctype' : 'REVERT'}]}
return bibupload_ffts(ffts)
def cli_undelete(options):
"""Delete the given docname"""
docname = cli2docname(options)
restriction = getattr(options, 'restriction', None)
count = 0
if not docname:
docname = 'DELETED-*-*'
if not docname.startswith('DELETED-'):
docname = 'DELETED-*-' + docname
to_be_undeleted = intbitset()
fix_marc = intbitset()
setattr(options, 'deleted_docs', 'only')
for docid in cli_docids_iterator(options):
bibdoc = BibDoc.create_instance(docid)
dnold = None
if bibdoc.bibrec_links:
dnold = bibdoc.bibrec_links[0]["docname"]
if bibdoc.get_status() == 'DELETED' and fnmatch.fnmatch(dnold, docname):
to_be_undeleted.add(docid)
# get the 1st recid to which the document is attached
recid = None
if bibdoc.bibrec_links:
recid = bibdoc.bibrec_links[0]["recid"]
fix_marc.add(recid)
count += 1
print '%s (docid %s from recid %s) will be undeleted to restriction: %s' % (dnold, docid, recid, restriction)
wait_for_user("I'll proceed with the undeletion")
for docid in to_be_undeleted:
bibdoc = BibDoc.create_instance(docid)
bibdoc.undelete(restriction)
cli_fix_marc(options, explicit_recid_set=fix_marc)
print wrap_text_in_a_box("%s bibdoc successfuly undeleted with status '%s'" % (count, restriction), style="conclusion")
def cli_get_info(options):
"""Print all the info of the matched docids or recids."""
debug('Getting info!')
human_readable = bool(getattr(options, 'human_readable', None))
debug('human_readable: %s' % human_readable)
deleted_docs = getattr(options, 'deleted_docs', None) in ('yes', 'only')
debug('deleted_docs: %s' % deleted_docs)
if getattr(options, 'docids', None):
for docid in cli_docids_iterator(options):
sys.stdout.write(str(BibDoc.create_instance(docid, human_readable=human_readable)))
else:
for recid in cli_recids_iterator(options):
sys.stdout.write(str(BibRecDocs(recid, deleted_too=deleted_docs, human_readable=human_readable)))
def cli_purge(options):
"""Purge the matched docids."""
ffts = {}
for docid in cli_docids_iterator(options):
bibdoc = BibDoc.create_instance(docid)
recid = None
docname = None
if bibdoc.bibrec_links:
recid = bibdoc.bibrec_links[0]["recid"]
docname = bibdoc.bibrec_links[0]["docname"]
if recid:
if recid not in ffts:
ffts[recid] = []
ffts[recid].append({
'docname' : docname,
'doctype' : 'PURGE',
})
return bibupload_ffts(ffts)
def cli_expunge(options):
"""Expunge the matched docids."""
ffts = {}
for docid in cli_docids_iterator(options):
bibdoc = BibDoc.create_instance(docid)
recid = None
docname = None
if bibdoc.bibrec_links:
#TODO: If we have a syntax for manipulating completely standalone objects,
# this has to be modified
recid = bibdoc.bibrec_links[0]["recid"]
docname = bibdoc.bibrec_links[0]["docname"]
if recid:
if recid not in ffts:
ffts[recid] = []
ffts[recid].append({
'docname' : docname,
'doctype' : 'EXPUNGE',
})
return bibupload_ffts(ffts)
def cli_get_history(options):
"""Print the history of a docid_set."""
for docid in cli_docids_iterator(options):
bibdoc = BibDoc.create_instance(docid)
history = bibdoc.get_history()
for row in history:
print_info(docid, row)
def cli_get_disk_usage(options):
"""Print the space usage of a docid_set."""
human_readable = getattr(options, 'human_readable', None)
total_size = 0
total_latest_size = 0
for docid in cli_docids_iterator(options):
bibdoc = BibDoc.create_instance(docid)
size = bibdoc.get_total_size()
total_size += size
latest_size = bibdoc.get_total_size_latest_version()
total_latest_size += latest_size
if human_readable:
print_info(docid, 'size=%s' % nice_size(size))
print_info(docid, 'latest version size=%s' % nice_size(latest_size))
else:
print_info(docid, 'size=%s' % size)
print_info( docid, 'latest version size=%s' % latest_size)
if human_readable:
print wrap_text_in_a_box('total size: %s\n\nlatest version total size: %s'
% (nice_size(total_size), nice_size(total_latest_size)),
style='conclusion')
else:
print wrap_text_in_a_box('total size: %s\n\nlatest version total size: %s'
% (total_size, total_latest_size),
style='conclusion')
def cli_check_md5(options):
"""Check the md5 sums of a docid_set."""
failures = 0
for docid in cli_docids_iterator(options):
bibdoc = BibDoc.create_instance(docid)
if bibdoc.md5s.check():
print_info(docid, 'checksum OK')
else:
for afile in bibdoc.list_all_files():
if not afile.check():
failures += 1
print_info(docid, '%s failing checksum!' % afile.get_full_path())
if failures:
print wrap_text_in_a_box('%i files failing' % failures , style='conclusion')
else:
print wrap_text_in_a_box('All files are correct', style='conclusion')
def cli_update_md5(options):
"""Update the md5 sums of a docid_set."""
for docid in cli_docids_iterator(options):
bibdoc = BibDoc.create_instance(docid)
if bibdoc.md5s.check():
print_info(docid, 'checksum OK')
else:
for afile in bibdoc.list_all_files():
if not afile.check():
print_info(docid, '%s failing checksum!' % afile.get_full_path())
wait_for_user('Updating the md5s of this document can hide real problems.')
bibdoc.md5s.update(only_new=False)
bibdoc._sync_to_db()
def cli_hide(options):
"""Hide the matched versions of documents."""
documents_to_be_hidden = {}
to_be_fixed = intbitset()
versions = getattr(options, 'version', 'all')
if versions != 'all':
try:
versions = ranges2ids(versions)
except:
raise OptionValueError, 'You should specify correct versions. Not %s' % versions
else:
versions = intbitset(trailing_bits=True)
for docid in cli_docids_iterator(options):
bibdoc = BibDoc.create_instance(docid)
recid = None
if bibdoc.bibrec_links:
recid = bibdoc.bibrec_links[0]["recid"]
if recid:
for bibdocfile in bibdoc.list_all_files():
this_version = bibdocfile.get_version()
this_format = bibdocfile.get_format()
if this_version in versions:
if docid not in documents_to_be_hidden:
documents_to_be_hidden[docid] = []
documents_to_be_hidden[docid].append((this_version, this_format))
to_be_fixed.add(recid)
print '%s (docid: %s, recid: %s) will be hidden' % (bibdocfile.get_full_name(), docid, recid)
wait_for_user('Proceeding to hide the matched documents...')
for docid, documents in documents_to_be_hidden.iteritems():
bibdoc = BibDoc.create_instance(docid)
for version, docformat in documents:
bibdoc.set_flag('HIDDEN', docformat, version)
return cli_fix_marc(options, to_be_fixed)
def cli_unhide(options):
"""Unhide the matched versions of documents."""
documents_to_be_unhidden = {}
to_be_fixed = intbitset()
versions = getattr(options, 'version', 'all')
if versions != 'all':
try:
versions = ranges2ids(versions)
except:
raise OptionValueError, 'You should specify correct versions. Not %s' % versions
else:
versions = intbitset(trailing_bits=True)
for docid in cli_docids_iterator(options):
bibdoc = BibDoc.create_instance(docid)
recid = None
if bibdoc.bibrec_links:
recid = bibdoc.bibrec_links[0]["recid"]
if recid:
for bibdocfile in bibdoc.list_all_files():
this_version = bibdocfile.get_version()
this_format = bibdocfile.get_format()
if this_version in versions:
if docid not in documents_to_be_unhidden:
documents_to_be_unhidden[docid] = []
documents_to_be_unhidden[docid].append((this_version, this_format))
to_be_fixed.add(recid)
print '%s (docid: %s, recid: %s) will be unhidden' % (bibdocfile.get_full_name(), docid, recid)
wait_for_user('Proceeding to unhide the matched documents...')
for docid, documents in documents_to_be_unhidden.iteritems():
bibdoc = BibDoc.create_instance(docid)
for version, docformat in documents:
bibdoc.unset_flag('HIDDEN', docformat, version)
return cli_fix_marc(options, to_be_fixed)
def main():
parser = prepare_option_parser()
(options, args) = parser.parse_args()
if getattr(options, 'debug', None):
getLogger().setLevel(DEBUG)
debug('test')
debug('options: %s, args: %s' % (options, args))
try:
if not getattr(options, 'action', None) and \
not getattr(options, 'append_path', None) and \
not getattr(options, 'revise_path', None):
if getattr(options, 'set_doctype', None) is not None or \
getattr(options, 'set_comment', None) is not None or \
getattr(options, 'set_description', None) is not None or \
getattr(options, 'set_restriction', None) is not None:
cli_set_batch(options)
elif getattr(options, 'new_docname', None):
cli_rename(options)
else:
print >> sys.stderr, "ERROR: no action specified"
sys.exit(1)
elif getattr(options, 'append_path', None):
options.empty_recs = 'yes'
options.empty_docs = 'yes'
cli_append(options, getattr(options, 'append_path', None))
elif getattr(options, 'revise_path', None):
cli_revise(options, getattr(options, 'revise_path', None))
elif options.action == 'textify':
cli_textify(options)
elif getattr(options, 'action', None) == 'get-history':
cli_get_history(options)
elif getattr(options, 'action', None) == 'get-info':
cli_get_info(options)
elif getattr(options, 'action', None) == 'get-disk-usage':
cli_get_disk_usage(options)
elif getattr(options, 'action', None) == 'check-md5':
cli_check_md5(options)
elif getattr(options, 'action', None) == 'update-md5':
cli_update_md5(options)
elif getattr(options, 'action', None) == 'fix-all':
cli_fix_all(options)
elif getattr(options, 'action', None) == 'fix-marc':
cli_fix_marc(options)
elif getattr(options, 'action', None) == 'delete':
cli_delete(options)
elif getattr(options, 'action', None) == 'hard-delete':
cli_delete_file(options)
elif getattr(options, 'action', None) == 'fix-duplicate-docnames':
cli_fix_duplicate_docnames(options)
elif getattr(options, 'action', None) == 'fix-format':
cli_fix_format(options)
elif getattr(options, 'action', None) == 'check-duplicate-docnames':
cli_check_duplicate_docnames(options)
elif getattr(options, 'action', None) == 'check-format':
cli_check_format(options)
elif getattr(options, 'action', None) == 'undelete':
cli_undelete(options)
elif getattr(options, 'action', None) == 'purge':
cli_purge(options)
elif getattr(options, 'action', None) == 'expunge':
cli_expunge(options)
elif getattr(options, 'action', None) == 'revert':
cli_revert(options)
elif getattr(options, 'action', None) == 'hide':
cli_hide(options)
elif getattr(options, 'action', None) == 'unhide':
cli_unhide(options)
elif getattr(options, 'action', None) == 'fix-bibdocfsinfo-cache':
options.empty_docs = 'yes'
cli_fix_bibdocfsinfo_cache(options)
elif getattr(options, 'action', None) == 'get-stats':
cli_get_stats(options)
else:
print >> sys.stderr, "ERROR: Action %s is not valid" % getattr(options, 'action', None)
sys.exit(1)
except Exception, e:
register_exception()
print >> sys.stderr, 'ERROR: %s' % e
sys.exit(1)
if __name__ == '__main__':
main()
diff --git a/invenio/legacy/bibdocfile/icon_migration_kit.py b/invenio/legacy/bibdocfile/icon_migration_kit.py
index 56c4bed29..e82fe2dca 100644
--- a/invenio/legacy/bibdocfile/icon_migration_kit.py
+++ b/invenio/legacy/bibdocfile/icon_migration_kit.py
@@ -1,163 +1,163 @@
## This file is part of Invenio.
## Copyright (C) 2010, 2011 CERN.
##
## Invenio is free software; you can redistribute it and/or
## modify it under the terms of the GNU General Public License as
## published by the Free Software Foundation; either version 2 of the
## License, or (at your option) any later version.
##
## Invenio is distributed in the hope that it will be useful, but
## WITHOUT ANY WARRANTY; without even the implied warranty of
## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
## General Public License for more details.
##
## You should have received a copy of the GNU General Public License
## along with Invenio; if not, write to the Free Software Foundation, Inc.,
## 59 Temple Place, Suite 330, Boston, MA 02111-1307, USA.
"""
This script updates the filesystem and database structure WRT icons.
In particular it will move all the icons information out of bibdoc_bibdoc
tables and into the normal bibdoc + subformat infrastructure.
"""
import sys
from datetime import datetime
from invenio.utils.text import wrap_text_in_a_box, wait_for_user
from invenio.bibtask import check_running_process_user
from invenio.dbquery import run_sql, OperationalError
from invenio.bibdocfile import BibDoc
from invenio.config import CFG_LOGDIR, CFG_SITE_SUPPORT_EMAIL
from invenio.bibdocfilecli import cli_fix_marc
-from invenio.errorlib import register_exception
+from invenio.ext.logging import register_exception
from invenio.intbitset import intbitset
from invenio.search_engine import record_exists
def retrieve_bibdoc_bibdoc():
return run_sql('SELECT id_bibdoc1, id_bibdoc2 from bibdoc_bibdoc')
def get_recid_from_docid(docid):
return run_sql('SELECT id_bibrec FROM bibrec_bibdoc WHERE id_bibdoc=%s', (docid, ))
def backup_tables(drop=False):
"""This function create a backup of bibrec_bibdoc, bibdoc and bibdoc_bibdoc tables. Returns False in case dropping of previous table is needed."""
if drop:
run_sql('DROP TABLE bibdoc_bibdoc_backup_for_icon')
try:
run_sql("""CREATE TABLE bibdoc_bibdoc_backup_for_icon (KEY id_bibdoc1(id_bibdoc1),
KEY id_bibdoc2(id_bibdoc2)) SELECT * FROM bibdoc_bibdoc""")
except OperationalError, e:
if not drop:
return False
raise e
return True
def fix_bibdoc_bibdoc(id_bibdoc1, id_bibdoc2, logfile):
"""
Migrate an icon.
"""
try:
the_bibdoc = BibDoc.create_instance(id_bibdoc1)
except Exception, err:
msg = "WARNING: when opening docid %s: %s" % (id_bibdoc1, err)
print >> logfile, msg
print msg
return True
try:
msg = "Fixing icon for the document %s" % (id_bibdoc1, )
print msg,
print >> logfile, msg,
the_icon = BibDoc.create_instance(id_bibdoc2)
for a_file in the_icon.list_latest_files():
the_bibdoc.add_icon(a_file.get_full_path(), format=a_file.get_format())
the_icon.delete()
run_sql("DELETE FROM bibdoc_bibdoc WHERE id_bibdoc1=%s AND id_bibdoc2=%s", (id_bibdoc1, id_bibdoc2))
print "OK"
print >> logfile, "OK"
return True
except Exception, err:
print "ERROR: %s" % err
print >> logfile, "ERROR: %s" % err
register_exception()
return False
def main():
"""Core loop."""
check_running_process_user()
logfilename = '%s/fulltext_files_migration_kit-%s.log' % (CFG_LOGDIR, datetime.today().strftime('%Y%m%d%H%M%S'))
try:
logfile = open(logfilename, 'w')
except IOError, e:
print wrap_text_in_a_box('NOTE: it\'s impossible to create the log:\n\n %s\n\nbecause of:\n\n %s\n\nPlease run this migration kit as the same user who runs Invenio (e.g. Apache)' % (logfilename, e), style='conclusion', break_long=False)
sys.exit(1)
bibdoc_bibdoc = retrieve_bibdoc_bibdoc()
print wrap_text_in_a_box ("""This script migrate the filesystem structure used to store icons files to the new stricter structure.
This script must not be run during normal Invenio operations.
It is safe to run this script. No file will be deleted.
Anyway it is recommended to run a backup of the filesystem structure just in case.
A backup of the database tables involved will be automatically performed.""", style='important')
if not bibdoc_bibdoc:
print wrap_text_in_a_box("No need for migration", style='conclusion')
return
print "%s icons will be migrated/fixed." % len(bibdoc_bibdoc)
wait_for_user()
print "Backing up database tables"
try:
if not backup_tables():
print wrap_text_in_a_box("""It appears that is not the first time that you run this script.
Backup tables have been already created by a previous run.
In order for the script to go further they need to be removed.""", style='important')
wait_for_user()
print "Backing up database tables (after dropping previous backup)",
backup_tables(drop=True)
print "-> OK"
else:
print "-> OK"
except Exception, e:
print wrap_text_in_a_box("Unexpected error while backing up tables. Please, do your checks: %s" % e, style='conclusion')
sys.exit(1)
to_fix_marc = intbitset()
print "Created a complete log file into %s" % logfilename
try:
try:
for id_bibdoc1, id_bibdoc2 in bibdoc_bibdoc:
try:
record_does_exist = True
recids = get_recid_from_docid(id_bibdoc1)
if not recids:
print "Skipping %s" % id_bibdoc1
continue
for recid in recids:
if record_exists(recid[0]) > 0:
to_fix_marc.add(recid[0])
else:
record_does_exist = False
if not fix_bibdoc_bibdoc(id_bibdoc1, id_bibdoc2, logfile):
if record_does_exist:
raise StandardError("Error when correcting document ID %s" % id_bibdoc1)
except Exception, err:
print >> logfile, "ERROR: %s" % err
print wrap_text_in_a_box("DONE", style='conclusion')
except:
logfile.close()
register_exception()
print wrap_text_in_a_box(
title = "INTERRUPTED BECAUSE OF ERROR!",
body = """Please see the log file %s for what was the status prior to the error. Contact %s in case of problems, attaching the log.""" % (logfilename, CFG_SITE_SUPPORT_EMAIL),
style = 'conclusion')
sys.exit(1)
finally:
print "Scheduling FIX-MARC to synchronize MARCXML for updated records."
cli_fix_marc(options={}, explicit_recid_set=to_fix_marc)
if __name__ == '__main__':
main()
diff --git a/invenio/legacy/bibdocfile/managedocfiles.py b/invenio/legacy/bibdocfile/managedocfiles.py
index ca364f117..4a859ca31 100644
--- a/invenio/legacy/bibdocfile/managedocfiles.py
+++ b/invenio/legacy/bibdocfile/managedocfiles.py
@@ -1,2938 +1,2938 @@
## $Id: Revise_Files.py,v 1.37 2009/03/26 15:11:05 jerome Exp $
## This file is part of Invenio.
## Copyright (C) 2010, 2011, 2012, 2013 CERN.
##
## Invenio is free software; you can redistribute it and/or
## modify it under the terms of the GNU General Public License as
## published by the Free Software Foundation; either version 2 of the
## License, or (at your option) any later version.
##
## Invenio is distributed in the hope that it will be useful, but
## WITHOUT ANY WARRANTY; without even the implied warranty of
## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
## General Public License for more details.
##
## You should have received a copy of the GNU General Public License
## along with Invenio; if not, write to the Free Software Foundation, Inc.,
## 59 Temple Place, Suite 330, Boston, MA 02111-1307, USA.
"""
BibDocFile Upload File Interface utils
=====================================
Tools to help with creation of file management interfaces.
Contains the two main functions `create_file_upload_interface' and
`move_uploaded_files_to_storage', which must be run one after the
other:
- create_file_upload_interface: Generates the HTML of an interface to
revise files of a given record. The actions on the files are
recorded in a working directory, but not applied to the record.
- move_uploaded_files_to_storage: Applies/executes the modifications
on files as recorded by the `create_file_upload_interface'
function.
Theses functions are a complex interplay of HTML, Javascript and HTTP
requests. They are not meant to be used in any type of scenario, but
require to be used in extremely specific contexts (Currently in
WebSubmit Response Elements, WebSubmit functions and the BibDocFile
File Management interface).
NOTES:
======
- Comments are not considered as a property of bibdocfiles, but
bibdocs: this conflicts with the APIs
FIXME:
======
- refactor into smaller components. Eg. form processing in
create_file_upload_interface could be move outside the function.
- better differentiate between revised file, and added format
(currently when adding a format, the whole bibdoc is marked as
updated, and all links are removed)
- After a file has been revised or added, add a 'check' icon
- One issue: if we allow deletion or renaming, we might lose track of
a bibdoc: someone adds X, renames X->Y, and adds again another file
with name X: when executing actions, we will add the second X, and
rename it to Y
-> need to go back in previous action when renaming... or check
that name has never been used..
DEPENDENCIES:
=============
- jQuery Form plugin U{http://jquery.malsup.com/form/}
"""
import cPickle
import os
import time
import cgi
from urllib import urlencode
from invenio.config import \
CFG_SITE_LANG, \
CFG_SITE_URL, \
CFG_WEBSUBMIT_STORAGEDIR, \
CFG_TMPSHAREDDIR, \
CFG_SITE_SUPPORT_EMAIL, \
CFG_CERN_SITE, \
CFG_SITE_RECORD
from invenio.base.i18n import gettext_set_language
from invenio.bibdocfilecli import cli_fix_marc
from invenio.bibdocfile import BibRecDocs, \
decompose_file, calculate_md5, BibDocFile, \
InvenioBibDocFileError, BibDocMoreInfo
from invenio.legacy.websubmit.functions.Shared_Functions import \
createRelatedFormats
-from invenio.errorlib import register_exception
+from invenio.ext.logging import register_exception
from invenio.dbquery import run_sql
from invenio.websubmit_icon_creator import \
create_icon, InvenioWebSubmitIconCreatorError
from invenio.utils.url import create_html_mailto
from invenio.htmlutils import escape_javascript_string
from invenio.bibdocfile_config import CFG_BIBDOCFILE_DEFAULT_ICON_SUBFORMAT
CFG_ALLOWED_ACTIONS = ['revise', 'delete', 'add', 'addFormat']
params_id = 0
def create_file_upload_interface(recid,
form=None,
print_outside_form_tag=True,
print_envelope=True,
include_headers=False,
ln=CFG_SITE_LANG,
minsize='', maxsize='',
doctypes_and_desc=None,
can_delete_doctypes=None,
can_revise_doctypes=None,
can_describe_doctypes=None,
can_comment_doctypes=None,
can_keep_doctypes=None,
can_rename_doctypes=None,
can_add_format_to_doctypes=None,
create_related_formats=True,
can_name_new_files=True,
keep_default=True, show_links=True,
file_label=None, filename_label=None,
description_label=None, comment_label=None,
restrictions_and_desc=None,
can_restrict_doctypes=None,
restriction_label=None,
doctypes_to_default_filename=None,
max_files_for_doctype=None,
sbm_indir=None, sbm_doctype=None, sbm_access=None,
uid=None, sbm_curdir=None,
display_hidden_files=False, protect_hidden_files=True):
"""
Returns the HTML for the file upload interface.
@param recid: the id of the record to edit files
@type recid: int or None
@param form: the form sent by the user's browser in response to a
user action. This is used to read and record user's
actions.
@param form: as returned by the interface handler.
@param print_outside_form_tag: display encapsulating <form> tag or
not
@type print_outside_form_tag: boolean
@param print_envelope: (internal parameter) if True, return the
encapsulating initial markup, otherwise
skip it.
@type print_envelope: boolean
@param include_headers: include javascript and css headers in the
body of the page. If you set this to
False, you must take care of including
these headers in your page header. Setting
this parameter to True is useful if you
cannot change the page header.
@type include_headers: boolean
@param ln: language
@type ln: string
@param minsize: the minimum size (in bytes) allowed for the
uploaded files. Files not big enough are
discarded.
@type minsize: int
@param maxsize: the maximum size (in bytes) allowed for the
uploaded files. Files too big are discarded.
@type maxsize: int
@param doctypes_and_desc: the list of doctypes (like 'Main' or
'Additional') and their description that users
can choose from when adding new files.
- When no value is provided, users cannot add new
file (they can only revise/delete/add format)
- When a single value is given, it is used as
default doctype for all new documents
Order is relevant
Eg:
[('main', 'Main document'), ('additional', 'Figure, schema. etc')]
@type doctypes_and_desc: list(tuple(string, string))
@param restrictions_and_desc: the list of restrictions (like 'Restricted' or
'No Restriction') and their description that
users can choose from when adding or revising
files. Restrictions can then be configured at
the level of WebAccess.
- When no value is provided, no restriction is
applied
- When a single value is given, it is used as
default resctriction for all documents.
- The first value of the list is used as default
restriction if the user if not given the
choice of the restriction. Order is relevant
Eg:
[('', 'No restriction'), ('restr', 'Restricted')]
@type restrictions_and_desc: list(tuple(string, string))
@param can_delete_doctypes: the list of doctypes that users are
allowed to delete.
Eg: ['main', 'additional']
Use ['*'] for "all doctypes"
@type can_delete_doctypes: list(string)
@param can_revise_doctypes: the list of doctypes that users are
allowed to revise
Eg: ['main', 'additional']
Use ['*'] for "all doctypes"
@type can_revise_doctypes: list(string)
@param can_describe_doctypes: the list of doctypes that users are
allowed to describe
Eg: ['main', 'additional']
Use ['*'] for "all doctypes"
@type can_describe_doctypes: list(string)
@param can_comment_doctypes: the list of doctypes that users are
allowed to comment
Eg: ['main', 'additional']
Use ['*'] for "all doctypes"
@type can_comment_doctypes: list(string)
@param can_keep_doctypes: the list of doctypes for which users can
choose to keep previous versions visible when
revising a file (i.e. 'Keep previous version'
checkbox). See also parameter 'keepDefault'.
Note that this parameter is ~ignored when
revising the attributes of a file (comment,
description) without uploading a new
file. See also parameter
Move_Uploaded_Files_to_Storage.force_file_revision
Eg: ['main', 'additional']
Use ['*'] for "all doctypes"
@type can_keep_doctypes: list(string)
@param can_add_format_to_doctypes: the list of doctypes for which users can
add new formats. If there is no value,
then no 'add format' link nor warning
about losing old formats are displayed.
Eg: ['main', 'additional']
Use ['*'] for "all doctypes"
@type can_add_format_to_doctypes: list(string)
@param can_restrict_doctypes: the list of doctypes for which users can
choose the access restrictions when adding or
revising a file. If no value is given:
- no restriction is applied if none is defined
in the 'restrictions' parameter.
- else the *first* value of the 'restrictions'
parameter is used as default restriction.
Eg: ['main', 'additional']
Use ['*'] for "all doctypes"
@type can_restrict_doctypes : list(string)
@param can_rename_doctypes: the list of doctypes that users are allowed
to rename (when revising)
Eg: ['main', 'additional']
Use ['*'] for "all doctypes"
@type can_rename_doctypes: list(string)
@param can_name_new_files: if user can choose the name of the files they
upload or not
@type can_name_new_files: boolean
@param doctypes_to_default_filename: Rename uploaded files to admin-chosen
values. To rename to a value found in a file in curdir,
use 'file:' prefix to specify the file to read from.
Eg:
{'main': 'file:RN', 'additional': 'foo'}
If the same doctype is submitted
several times, a"-%i" suffix is added
to the name defined in the file.
When using 'file:' prefix, the name
is only resolved at the end of the
submission, when attaching the file.
The default filenames are overriden
by user-chosen names if you allow
'can_name_new_files' or
'can_rename_doctypes', excepted if the
name is prefixed with 'file:'.
@type doctypes_to_default_filename: dict
@param max_files_for_doctype: the maximum number of files that users can
upload for each doctype.
Eg: {'main': 1, 'additional': 2}
Do not specify the doctype here to have an
unlimited number of files for a given
doctype.
@type max_files_for_doctype: dict
@param create_related_formats: if uploaded files get converted to
whatever format we can or not
@type create_related_formats: boolean
@param keep_default: the default behaviour for keeping or not previous
version of files when users cannot choose (no
value in can_keep_doctypes).
Note that this parameter is ignored when revising
the attributes of a file (comment, description)
without uploading a new file. See also parameter
Move_Uploaded_Files_to_Storage.force_file_revision
@type keep_default: boolean
@param show_links: if we display links to files when possible or
not
@type show_links: boolean
@param file_label: the label for the file field
@type file_label: string
@param filename_label: the label for the file name field
@type filename_label: string
@param description_label: the label for the description field
@type description_label: string
@param comment_label: the label for the comments field
@type comment_label: string
@param restriction_label: the label in front of the restrictions list
@type restriction_label: string
@param sbm_indir: the submission indir parameter, in case the
function is used in a WebSubmit submission
context.
This value will be used to retrieve where to
read the current state of the interface and
store uploaded files
@type sbm_indir : string
@param sbm_doctype: the submission doctype parameter, in case the
function is used in a WebSubmit submission
context.
This value will be used to retrieve where to
read the current state of the interface and
store uploaded files
@type sbm_doctype: string
@param sbm_access: the submission access parameter. Must be
specified in the context of WebSubmit
submission, as well when used in the
WebSubmit Admin file management interface.
This value will be used to retrieve where to
read the current state of the interface and
store uploaded files
@type sbm_access: string
@param sbm_curdir: the submission curdir parameter. Must be
specified in the context of WebSubmit
function Create_Upload_File_Interface.
This value will be used to retrieve where to
read the current state of the interface and
store uploaded files.
@type sbm_curdir: string
@param uid: the user id
@type uid: int
@param display_hidden_files: if bibdoc containing bibdocfiles
flagged as 'HIDDEN' should be
displayed or not.
@type display_hidden_files: boolean
@param protect_hidden_files: if bibdoc containing bibdocfiles
flagged as 'HIDDEN' can be edited
(revise, delete, add format) or not.
@type protect_hidden_files: boolean
@return Tuple (errorcode, html)
"""
# Clean and set up a few parameters
_ = gettext_set_language(ln)
body = ''
if not file_label:
file_label = _('Choose a file')
if not filename_label:
filename_label = _('Name')
if not description_label:
description_label = _('Description')
if not comment_label:
comment_label = _('Comment')
if not restriction_label:
restriction_label = _('Access')
if not doctypes_and_desc:
doctypes_and_desc = []
if not can_delete_doctypes:
can_delete_doctypes = []
if not can_revise_doctypes:
can_revise_doctypes = []
if not can_describe_doctypes:
can_describe_doctypes = []
if not can_comment_doctypes:
can_comment_doctypes = []
if not can_keep_doctypes:
can_keep_doctypes = []
if not can_rename_doctypes:
can_rename_doctypes = []
if not can_add_format_to_doctypes:
can_add_format_to_doctypes = []
if not restrictions_and_desc:
restrictions_and_desc = []
if not can_restrict_doctypes:
can_restrict_doctypes = []
if not doctypes_to_default_filename:
doctypes_to_default_filename = {}
if not max_files_for_doctype:
max_files_for_doctype = {}
doctypes = [doctype for (doctype, desc) in doctypes_and_desc]
# Retrieve/build a working directory to save uploaded files and
# states + configuration.
working_dir = None
if sbm_indir and sbm_doctype and sbm_access:
# Write/read configuration to/from working_dir (WebSubmit mode).
# Retrieve the interface configuration from the current
# submission directory.
working_dir = os.path.join(CFG_WEBSUBMIT_STORAGEDIR,
sbm_indir,
sbm_doctype,
sbm_access)
try:
assert(working_dir == os.path.abspath(working_dir))
except AssertionError:
register_exception(prefix='Cannot create file upload interface: ' + \
+ 'missing parameter ',
alert_admin=True)
return (1, "Unauthorized parameters")
form_url_params = "?" + urlencode({'access': sbm_access,
'indir': sbm_indir,
'doctype': sbm_doctype})
elif uid and sbm_access:
# WebSubmit File Management (admin) interface mode.
# Working directory is in CFG_TMPSHAREDDIR
working_dir = os.path.join(CFG_TMPSHAREDDIR,
'websubmit_upload_interface_config_' + str(uid),
sbm_access)
try:
assert(working_dir == os.path.abspath(working_dir))
except AssertionError:
register_exception(prefix='Some user tried to access ' \
+ working_dir + \
' which is different than ' + \
os.path.abspath(working_dir),
alert_admin=True)
return (1, "Unauthorized parameters")
if not os.path.exists(working_dir):
os.makedirs(working_dir)
form_url_params = "?" + urlencode({'access': sbm_access})
elif sbm_curdir:
# WebSubmit Create_Upload_File_Interface.py function
working_dir = sbm_curdir
form_url_params = None
else:
register_exception(prefix='Some user tried to access ' \
+ working_dir + \
' which is different than ' + \
os.path.abspath(working_dir),
alert_admin=True)
return (1, "Unauthorized parameters")
# Save interface configuration, if this is the first time we come
# here, or else load parameters
try:
parameters = _read_file_revision_interface_configuration_from_disk(working_dir)
(minsize, maxsize, doctypes_and_desc, doctypes,
can_delete_doctypes, can_revise_doctypes,
can_describe_doctypes,
can_comment_doctypes, can_keep_doctypes,
can_rename_doctypes,
can_add_format_to_doctypes, create_related_formats,
can_name_new_files, keep_default, show_links,
file_label, filename_label, description_label,
comment_label, restrictions_and_desc,
can_restrict_doctypes,
restriction_label, doctypes_to_default_filename,
max_files_for_doctype, print_outside_form_tag,
display_hidden_files, protect_hidden_files) = parameters
except:
# Initial display of the interface: save configuration to
# disk for later reuse
parameters = (minsize, maxsize, doctypes_and_desc, doctypes,
can_delete_doctypes, can_revise_doctypes,
can_describe_doctypes,
can_comment_doctypes, can_keep_doctypes,
can_rename_doctypes,
can_add_format_to_doctypes, create_related_formats,
can_name_new_files, keep_default, show_links,
file_label, filename_label, description_label,
comment_label, restrictions_and_desc,
can_restrict_doctypes,
restriction_label, doctypes_to_default_filename,
max_files_for_doctype, print_outside_form_tag,
display_hidden_files, protect_hidden_files)
_write_file_revision_interface_configuration_to_disk(working_dir, parameters)
# Get the existing bibdocs as well as the actions performed during
# the former revise sessions of the user, to build an updated list
# of documents. We will use it to check if last action performed
# by user is allowed.
performed_actions = read_actions_log(working_dir)
if recid:
bibrecdocs = BibRecDocs(recid)
# Create the list of files based on current files and performed
# actions
bibdocs = bibrecdocs.list_bibdocs()
else:
bibdocs = []
# "merge":
abstract_bibdocs = build_updated_files_list(bibdocs,
performed_actions,
recid or -1,
display_hidden_files)
# If any, process form submitted by user
if form:
## Get and clean parameters received from user
(file_action, file_target, file_target_doctype,
keep_previous_files, file_description, file_comment, file_rename,
file_doctype, file_restriction, uploaded_filename, uploaded_filepath) = \
wash_form_parameters(form, abstract_bibdocs, can_keep_doctypes,
keep_default, can_describe_doctypes, can_comment_doctypes,
can_rename_doctypes, can_name_new_files, can_restrict_doctypes,
doctypes_to_default_filename, working_dir)
if protect_hidden_files and \
(file_action in ['revise', 'addFormat', 'delete']) and \
is_hidden_for_docname(file_target, abstract_bibdocs):
# Sanity check. We should not let editing
file_action = ''
body += '<script>alert("%s");</script>' % \
_("The file you want to edit is protected against modifications. Your action has not been applied")
## Check the last action performed by user, and log it if
## everything is ok
if uploaded_filepath and \
((file_action == 'add' and (file_doctype in doctypes)) or \
(file_action == 'revise' and \
((file_target_doctype in can_revise_doctypes) or \
'*' in can_revise_doctypes)) or
(file_action == 'addFormat' and \
((file_target_doctype in can_add_format_to_doctypes) or \
'*' in can_add_format_to_doctypes))):
# A file has been uploaded (user has revised or added a file,
# or a format)
dirname, filename, extension = decompose_file(uploaded_filepath)
os.unlink("%s/myfile" % working_dir)
if minsize.isdigit() and os.path.getsize(uploaded_filepath) < int(minsize):
os.unlink(uploaded_filepath)
body += '<script>alert("%s");</script>' % \
(_("The uploaded file is too small (<%i o) and has therefore not been considered") % \
int(minsize)).replace('"', '\\"')
elif maxsize.isdigit() and os.path.getsize(uploaded_filepath) > int(maxsize):
os.unlink(uploaded_filepath)
body += '<script>alert("%s");</script>' % \
(_("The uploaded file is too big (>%i o) and has therefore not been considered") % \
int(maxsize)).replace('"', '\\"')
elif len(filename) + len(extension) + 4 > 255:
# Max filename = 256, including extension and version that
# will be appended later by BibDoc
os.unlink(uploaded_filepath)
body += '<script>alert("%s");</script>' % \
_("The uploaded file name is too long and has therefore not been considered").replace('"', '\\"')
elif file_action == 'add' and \
max_files_for_doctype.has_key(file_doctype) and \
max_files_for_doctype[file_doctype] < \
(len([bibdoc for bibdoc in abstract_bibdocs \
if bibdoc['get_type'] == file_doctype]) + 1):
# User has tried to upload more than allowed for this
# doctype. Should never happen, unless the user did some
# nasty things
os.unlink(uploaded_filepath)
body += '<script>alert("%s");</script>' % \
_("You have already reached the maximum number of files for this type of document").replace('"', '\\"')
else:
# Prepare to move file to
# working_dir/files/updated/doctype/bibdocname/
folder_doctype = file_doctype or \
bibrecdocs.get_bibdoc(file_target).get_type()
folder_bibdocname = file_rename or file_target or filename
new_uploaded_filepath = os.path.join(working_dir, 'files', 'updated',
folder_doctype,
folder_bibdocname, uploaded_filename)
# First check that we do not conflict with an already
# existing bibdoc name
if file_action == "add" and \
((filename in [bibdoc['get_docname'] for bibdoc \
in abstract_bibdocs] and not file_rename) or \
file_rename in [bibdoc['get_docname'] for bibdoc \
in abstract_bibdocs]):
# A file with that name already exist. Cancel action
# and tell user.
os.unlink(uploaded_filepath)
body += '<script>alert("%s");</script>' % \
(_("A file named %s already exists. Please choose another name.") % \
(file_rename or filename)).replace('"', '\\"')
elif file_action == "revise" and \
file_rename != file_target and \
file_rename in [bibdoc['get_docname'] for bibdoc \
in abstract_bibdocs]:
# A file different from the one to revise already has
# the same bibdocname
os.unlink(uploaded_filepath)
body += '<script>alert("%s");</script>' % \
(_("A file named %s already exists. Please choose another name.") % \
file_rename).replace('"', '\\"')
elif file_action == "addFormat" and \
(extension in \
get_extensions_for_docname(file_target,
abstract_bibdocs)):
# A file with that extension already exists. Cancel
# action and tell user.
os.unlink(uploaded_filepath)
body += '<script>alert("%s");</script>' % \
(_("A file with format '%s' already exists. Please upload another format.") % \
extension).replace('"', '\\"')
elif '.' in file_rename or '/' in file_rename or "\\" in file_rename or \
not os.path.abspath(new_uploaded_filepath).startswith(os.path.join(working_dir, 'files', 'updated')):
# We forbid usage of a few characters, for the good of
# everybody...
os.unlink(uploaded_filepath)
body += '<script>alert("%s");</script>' % \
_("You are not allowed to use dot '.', slash '/', or backslash '\\\\' in file names. Choose a different name and upload your file again. In particular, note that you should not include the extension in the renaming field.").replace('"', '\\"')
else:
# No conflict with file name
# When revising, delete previously uploaded files for
# this entry, so that we do not execute the
# corresponding action
if file_action == "revise":
for path_to_delete in \
get_uploaded_files_for_docname(working_dir, file_target):
delete_file(working_dir, path_to_delete)
# Move uploaded file to working_dir/files/updated/doctype/bibdocname/
os.renames(uploaded_filepath, new_uploaded_filepath)
if file_action == "add":
# no need to check bibrecdocs.check_file_exists(new_uploaded_filepath, new_uploaded_format): was done before
# Log
if file_rename != '':
# at this point, bibdocname is specified
# name, no need to 'rename'
filename = file_rename
log_action(working_dir, file_action, filename,
new_uploaded_filepath, file_rename,
file_description, file_comment,
file_doctype, keep_previous_files,
file_restriction)
# Automatically create additional formats when
# possible.
additional_formats = []
if create_related_formats:
additional_formats = createRelatedFormats(new_uploaded_filepath,
overwrite=False)
for additional_format in additional_formats:
# Log
log_action(working_dir, 'addFormat', filename,
additional_format, file_rename,
file_description, file_comment,
file_doctype, True, file_restriction)
if file_action == "revise" and file_target != "":
# Log
log_action(working_dir, file_action, file_target,
new_uploaded_filepath, file_rename,
file_description, file_comment,
file_target_doctype, keep_previous_files,
file_restriction)
# Automatically create additional formats when
# possible.
additional_formats = []
if create_related_formats:
additional_formats = createRelatedFormats(new_uploaded_filepath,
overwrite=False)
for additional_format in additional_formats:
# Log
log_action(working_dir, 'addFormat',
(file_rename or file_target),
additional_format, file_rename,
file_description, file_comment,
file_target_doctype, True,
file_restriction)
if file_action == "addFormat" and file_target != "":
# We have already checked above that this format does
# not already exist.
# Log
log_action(working_dir, file_action, file_target,
new_uploaded_filepath, file_rename,
file_description, file_comment,
file_target_doctype, keep_previous_files,
file_restriction)
elif file_action in ["add", "addFormat"]:
# No file found, but action involved adding file: ask user to
# select a file
body += """<script>
alert("You did not specify a file. Please choose one before uploading.");
</script>"""
elif file_action == "revise" and file_target != "":
# User has chosen to revise attributes of a file (comment,
# name, etc.) without revising the file itself.
if file_rename != file_target and \
file_rename in [bibdoc['get_docname'] for bibdoc \
in abstract_bibdocs]:
# A file different from the one to revise already has
# the same bibdocname
body += '<script>alert("%s");</script>' % \
(_("A file named %s already exists. Please choose another name.") % \
file_rename).replace('"', '\\"')
elif file_rename != file_target and \
('.' in file_rename or '/' in file_rename or "\\" in file_rename):
# We forbid usage of a few characters, for the good of
# everybody...
body += '<script>alert("%s");</script>' % \
_("You are not allowed to use dot '.', slash '/', or backslash '\\\\' in file names. Choose a different name and upload your file again. In particular, note that you should not include the extension in the renaming field.").replace('"', '\\"')
else:
# Log
log_action(working_dir, file_action, file_target,
"", file_rename,
file_description, file_comment,
file_target_doctype, keep_previous_files,
file_restriction)
elif file_action == "delete" and file_target != "" and \
((file_target_doctype in can_delete_doctypes) or \
'*' in can_delete_doctypes):
# Delete previously uploaded files for this entry
for path_to_delete in get_uploaded_files_for_docname(working_dir, file_target):
delete_file(working_dir, path_to_delete)
# Log
log_action(working_dir, file_action, file_target, "", file_rename,
file_description, file_comment, "",
keep_previous_files, file_restriction)
## Display
performed_actions = read_actions_log(working_dir)
#performed_actions = []
if recid:
bibrecdocs = BibRecDocs(recid)
# Create the list of files based on current files and performed
# actions
bibdocs = bibrecdocs.list_bibdocs()
else:
bibdocs = []
abstract_bibdocs = build_updated_files_list(bibdocs, performed_actions,
recid or -1, display_hidden_files)
abstract_bibdocs.sort(lambda x, y: x['order'] - y['order'])
# Display form and necessary CSS + Javscript
#body += '<div>'
#body += css
js_can_describe_doctypes = repr({}.fromkeys(can_describe_doctypes, ''))
js_can_comment_doctypes = repr({}.fromkeys(can_comment_doctypes, ''))
js_can_restrict_doctypes = repr({}.fromkeys(can_restrict_doctypes, ''))
# Prepare to display file revise panel "balloon". Check if we
# should display the list of doctypes or if it is not necessary (0
# or 1 doctype). Also make sure that we do not exceed the maximum
# number of files specified per doctype. The markup of the list of
# doctypes is prepared here, and will be passed as parameter to
# the display_revise_panel function
cleaned_doctypes = [doctype for doctype in doctypes if
not max_files_for_doctype.has_key(doctype) or
(max_files_for_doctype[doctype] > \
len([bibdoc for bibdoc in abstract_bibdocs \
if bibdoc['get_type'] == doctype]))]
doctypes_list = ""
if len(cleaned_doctypes) > 1:
doctypes_list = '<select id="fileDoctype" name="fileDoctype" onchange="var idx=this.selectedIndex;var doctype=this.options[idx].value;updateForm(doctype,'+','.join([js_can_describe_doctypes, js_can_comment_doctypes, js_can_restrict_doctypes])+');">' + \
'\n'.join(['<option value="' + cgi.escape(doctype, True) + '">' + \
cgi.escape(description) + '</option>' \
for (doctype, description) \
in doctypes_and_desc if \
doctype in cleaned_doctypes]) + \
'</select>'
elif len(cleaned_doctypes) == 1:
doctypes_list = '<input id="fileDoctype" name="fileDoctype" type="hidden" value="%s" />' % cleaned_doctypes[0]
# Check if we should display the list of access restrictions or if
# it is not necessary
restrictions_list = ""
if len(restrictions_and_desc) > 1:
restrictions_list = '<select id="fileRestriction" name="fileRestriction">' + \
'\n'.join(['<option value="' + cgi.escape(restriction, True) + '">' + \
cgi.escape(description) + '</option>' \
for (restriction, description) \
in restrictions_and_desc]) + \
'</select>'
restrictions_list = '''<label for="restriction">%(restriction_label)s:</label>&nbsp;%(restrictions_list)s&nbsp;<small>[<a href="" onclick="alert('%(restriction_help)s');return false;">?</a>]</small>''' % \
{'restrictions_list': restrictions_list,
'restriction_label': restriction_label,
'restriction_help': _('Choose how you want to restrict access to this file.').replace("'", "\\'")}
elif len(restrictions_and_desc) == 1:
restrictions_list = '<select style="display:none" id="fileRestriction" name="fileRestriction"><option value="%(restriction_attr)s">%(restriction)s</option></select>' % {
'restriction': cgi.escape(restrictions_and_desc[0][0]),
'restriction_attr': cgi.escape(restrictions_and_desc[0][0], True)
}
else:
restrictions_list = '<select style="display:none" id="fileRestriction" name="fileRestriction"></select>'
# List the files
body += '''
<div id="reviseControl">
<table class="reviseControlBrowser">'''
i = 0
for bibdoc in abstract_bibdocs:
if bibdoc['list_latest_files']:
i += 1
body += create_file_row(bibdoc, can_delete_doctypes,
can_rename_doctypes,
can_revise_doctypes,
can_describe_doctypes,
can_comment_doctypes,
can_keep_doctypes,
can_add_format_to_doctypes,
doctypes_list,
show_links,
can_restrict_doctypes,
even=not (i % 2),
ln=ln,
form_url_params=form_url_params,
protect_hidden_files=protect_hidden_files)
body += '</table>'
if len(cleaned_doctypes) > 0:
(revise_panel, javascript_prefix) = javascript_display_revise_panel(action='add', target='', show_doctypes=True, show_keep_previous_versions=False, show_rename=can_name_new_files, show_description=True, show_comment=True, bibdocname='', description='', comment='', show_restrictions=True, restriction=len(restrictions_and_desc) > 0 and restrictions_and_desc[0][0] or '', doctypes=doctypes_list)
body += '''%(javascript_prefix)s<input type="button" onclick="%(display_revise_panel)s;updateForm('%(defaultSelectedDoctype)s', %(can_describe_doctypes)s, %(can_comment_doctypes)s, %(can_restrict_doctypes)s);return false;" value="%(add_new_file)s"/>''' % \
{'display_revise_panel': revise_panel,
'javascript_prefix': javascript_prefix,
'defaultSelectedDoctype': escape_javascript_string(cleaned_doctypes[0], escape_quote_for_html=True),
'add_new_file': _("Add new file"),
'can_describe_doctypes':js_can_describe_doctypes,
'can_comment_doctypes': repr({}.fromkeys(can_comment_doctypes, '')),
'can_restrict_doctypes': repr({}.fromkeys(can_restrict_doctypes, ''))}
body += '</div>'
if print_envelope:
# We should print this only if we display for the first time
body = '<div id="uploadFileInterface">' + body + '</div>'
if include_headers:
body = get_upload_file_interface_javascript(form_url_params) + \
get_upload_file_interface_css() + \
body
# Display markup of the revision panel. This one is also
# printed only at the beginning, so that it does not need to
# be returned with each response
body += revise_balloon % \
{'CFG_SITE_URL': CFG_SITE_URL,
'file_label': file_label,
'filename_label': filename_label,
'description_label': description_label,
'comment_label': comment_label,
'restrictions': restrictions_list,
'previous_versions_help': _('You can decide to hide or not previous version(s) of this file.').replace("'", "\\'"),
'revise_format_help': _('When you revise a file, the additional formats that you might have previously uploaded are removed, since they no longer up-to-date with the new file.').replace("'", "\\'"),
'revise_format_warning': _('Alternative formats uploaded for current version of this file will be removed'),
'previous_versions_label': _('Keep previous versions'),
'cancel': _('Cancel'),
'upload': _('Upload'),
'uploading_label': _('Uploading...'),
'postprocess_label': _('Please wait...'),
'submit_or_button': form_url_params and 'button' or 'submit'}
body += '''
<input type="hidden" name="recid" value="%(recid)i"/>
<input type="hidden" name="ln" value="%(ln)s"/>
''' % \
{'recid': recid or -1,
'ln': ln}
# End submission button
if sbm_curdir:
body += '''<br /><div style="font-size:small">
<input type="button" class="adminbutton" name="Submit" id="applyChanges" value="%(apply_changes)s" onClick="nextStep();"></div>''' % \
{'apply_changes': _("Apply changes")}
# Display a link to support email in case users have problem
# revising/adding files
mailto_link = create_html_mailto(email=CFG_SITE_SUPPORT_EMAIL,
subject=_("Need help revising or adding files to record %(recid)s") % \
{'recid': recid or ''},
body=_("""Dear Support,
I would need help to revise or add a file to record %(recid)s.
I have attached the new version to this email.
Best regards""") % {'recid': recid or ''})
problem_revising = _('Having a problem revising a file? Send the revised version to %(mailto_link)s.') % {'mailto_link': mailto_link}
if len(cleaned_doctypes) > 0:
# We can add files, so change note
problem_revising = _('Having a problem adding or revising a file? Send the new/revised version to %(mailto_link)s.') % {'mailto_link': mailto_link}
body += '<br />'
body += problem_revising
if print_envelope and print_outside_form_tag:
body = '<form method="post" action="/%s/managedocfilesasync" id="uploadFileForm">' % CFG_SITE_RECORD + body + '</form>'
return (0, body)
def create_file_row(abstract_bibdoc, can_delete_doctypes,
can_rename_doctypes, can_revise_doctypes,
can_describe_doctypes, can_comment_doctypes,
can_keep_doctypes, can_add_format_to_doctypes,
doctypes_list, show_links, can_restrict_doctypes,
even=False, ln=CFG_SITE_LANG, form_url_params='',
protect_hidden_files=True):
"""
Creates a row in the files list representing the given abstract_bibdoc
@param abstract_bibdoc: list of "fake" BibDocs: it is a list of dictionaries
with keys 'list_latest_files' and 'get_docname' with
values corresponding to what you would expect to receive
when calling their counterpart function on a real BibDoc
object.
@param can_delete_doctypes: list of doctypes for which we allow users to delete
documents
@param can_revise_doctypes: the list of doctypes that users are
allowed to revise.
@param can_describe_doctypes: the list of doctypes that users are
allowed to describe.
@param can_comment_doctypes: the list of doctypes that users are
allowed to comment.
@param can_keep_doctypes: the list of doctypes for which users can
choose to keep previous versions visible
when revising a file (i.e. 'Keep previous
version' checkbox).
@param can_rename_doctypes: the list of doctypes that users are
allowed to rename (when revising)
@param can_add_format_to_doctypes: the list of doctypes for which users can
add new formats
@param show_links: if we display links to files
@param even: if the row is even or odd on the list
@type even: boolean
@param ln: language
@type ln: string
@param form_url_params: the
@type form_url_params: string
@param protect_hidden_files: if bibdoc containing bibdocfiles
flagged as 'HIDDEN' can be edited
(revise, delete, add format) or not.
@type protect_hidden_files: boolean
@return: an HTML formatted "file" row
@rtype: string
"""
_ = gettext_set_language(ln)
# Try to retrieve "main format", to display as link for the
# file. There is no such concept in BibDoc, but let's just try to
# get the pdf file if it exists
main_bibdocfile = [bibdocfile for bibdocfile in abstract_bibdoc['list_latest_files'] \
if bibdocfile.get_format().strip('.').lower() == 'pdf']
if len(main_bibdocfile) > 0:
main_bibdocfile = main_bibdocfile[0]
else:
main_bibdocfile = abstract_bibdoc['list_latest_files'][0]
main_bibdocfile_description = main_bibdocfile.get_description()
if main_bibdocfile_description is None:
main_bibdocfile_description = ''
updated = abstract_bibdoc['updated'] # Has BibDoc been updated?
hidden_p = abstract_bibdoc['hidden_p']
# Main file row
out = '<tr%s>' % (even and ' class="even"' or '')
out += '<td class="reviseControlFileColumn"%s>' % (hidden_p and ' style="color:#99F"' or '')
if not updated and show_links and not hidden_p:
out += '<a target="_blank" href="' + main_bibdocfile.get_url() \
+ '">'
out += cgi.escape(abstract_bibdoc['get_docname'])
if hidden_p:
out += ' <span style="font-size:small;font-style:italic;color:#888">(hidden)</span>'
if not updated and show_links and not hidden_p:
out += '</a>'
if main_bibdocfile_description:
out += ' (<em>' + cgi.escape(main_bibdocfile_description) + '</em>)'
out += '</td>'
(description, comment) = get_description_and_comment(abstract_bibdoc['list_latest_files'])
restriction = abstract_bibdoc['get_status']
# Revise link
out += '<td class="reviseControlActionColumn">'
if main_bibdocfile.get_type() in can_revise_doctypes or \
'*' in can_revise_doctypes and not (hidden_p and protect_hidden_files):
(revise_panel, javascript_prefix) = javascript_display_revise_panel(
action='revise',
target=abstract_bibdoc['get_docname'],
show_doctypes=False,
show_keep_previous_versions=(main_bibdocfile.get_type() in can_keep_doctypes) or '*' in can_keep_doctypes,
show_rename=(main_bibdocfile.get_type() in can_rename_doctypes) or '*' in can_rename_doctypes,
show_description=(main_bibdocfile.get_type() in can_describe_doctypes) or '*' in can_describe_doctypes,
show_comment=(main_bibdocfile.get_type() in can_comment_doctypes) or '*' in can_comment_doctypes,
bibdocname=abstract_bibdoc['get_docname'],
description=description,
comment=comment,
show_restrictions=(main_bibdocfile.get_type() in can_restrict_doctypes) or '*' in can_restrict_doctypes,
restriction=restriction,
doctypes=doctypes_list)
out += '%(javascript_prefix)s[<a href="" onclick="%(display_revise_panel)s;return false;">%(revise)s</a>]' % \
{'display_revise_panel': revise_panel,
'javascript_prefix': javascript_prefix,
'revise': _("revise")
}
# Delete link
if main_bibdocfile.get_type() in can_delete_doctypes or \
'*' in can_delete_doctypes and not (hidden_p and protect_hidden_files):
global params_id
params_id += 1
out += '''
<script type="text/javascript">
/*<![CDATA[*/
var delete_panel_params_%(id)i = "%(bibdocname)s";
/*]]>*/
</script>
[<a href="" onclick="return askDelete(delete_panel_params_%(id)i, '%(form_url_params)s')">%(delete)s</a>]
''' % {'bibdocname': escape_javascript_string(abstract_bibdoc['get_docname'], escape_for_html=False),
'delete': _("delete"),
'form_url_params': form_url_params or '',
'id': params_id}
out += '''</td>'''
# Format row
out += '''<tr%s>
<td class="reviseControlFormatColumn"%s>
<img src="%s/img/tree_branch.gif" alt="">
''' % (even and ' class="even"' or '', hidden_p and ' style="color:#999"' or '', CFG_SITE_URL)
for bibdocfile in abstract_bibdoc['list_latest_files']:
if not updated and show_links and not hidden_p:
out += '<a target="_blank" href="' + bibdocfile.get_url() + '">'
out += bibdocfile.get_format().strip('.')
if not updated and show_links and not hidden_p:
out += '</a>'
out += ' '
# Add format link
out += '<td class="reviseControlActionColumn">'
if main_bibdocfile.get_type() in can_add_format_to_doctypes or \
'*' in can_add_format_to_doctypes and not (hidden_p and protect_hidden_files):
(revise_panel, javascript_prefix) = javascript_display_revise_panel(
action='addFormat',
target=abstract_bibdoc['get_docname'],
show_doctypes=False,
show_keep_previous_versions=False,
show_rename=False,
show_description=False,
show_comment=False,
bibdocname='',
description='',
comment='',
show_restrictions=False,
restriction=restriction,
doctypes=doctypes_list)
out += '%(javascript_prefix)s[<a href="" onclick="%(display_revise_panel)s;return false;">%(add_format)s</a>]' % \
{'display_revise_panel': revise_panel,
'javascript_prefix': javascript_prefix,
'add_format':_("add format")}
out += '</td></tr>'
return out
def build_updated_files_list(bibdocs, actions, recid, display_hidden_files=False):
"""
Parses the list of BibDocs and builds an updated version to reflect
the changes performed by the user of the file
It is necessary to abstract the BibDocs since user wants to
perform action on the files that are committed only at the end of
the session.
@param bibdocs: the original list of bibdocs on which we want to
build a new updated list
@param actions: the list of actions performed by the user on the
files, and that we want to consider to build an
updated file list
@param recid: the record ID to which the files belong
@param display_hidden_files: if bibdoc containing bibdocfiles
flagged as 'HIDDEN' should be
displayed or not.
@type display_hidden_files: boolean
"""
abstract_bibdocs = {}
i = 0
for bibdoc in bibdocs:
hidden_p = True in [bibdocfile.hidden_p() for bibdocfile in bibdoc.list_latest_files()]
if CFG_CERN_SITE:
hidden_p = False # Temporary workaround. See Ticket #846
if not display_hidden_files and hidden_p:
# Do not consider hidden files
continue
i += 1
status = bibdoc.get_status()
if status == "DELETED":
status = ''
brd = BibRecDocs(recid)
abstract_bibdocs[brd.get_docname(bibdoc.id)] = \
{'list_latest_files': bibdoc.list_latest_files(),
'get_docname': brd.get_docname(bibdoc.id),
'updated': False,
'get_type': bibdoc.get_type(),
'get_status': status,
'order': i,
'hidden_p': hidden_p}
for action, bibdoc_name, file_path, rename, description, \
comment, doctype, keep_previous_versions, \
file_restriction in actions:
dirname, filename, fileformat = decompose_file(file_path)
i += 1
if action in ["add", "revise"] and \
os.path.exists(file_path):
checksum = calculate_md5(file_path)
order = i
if action == "revise" and \
abstract_bibdocs.has_key(bibdoc_name):
# Keep previous values
order = abstract_bibdocs[bibdoc_name]['order']
doctype = abstract_bibdocs[bibdoc_name]['get_type']
if bibdoc_name.strip() == '' and rename.strip() == '':
bibdoc_name = os.path.extsep.join(filename.split(os.path.extsep)[:-1])
elif rename.strip() != '' and \
abstract_bibdocs.has_key(bibdoc_name):
# Keep previous position
del abstract_bibdocs[bibdoc_name]
# First instantiate a fake BibDocMoreInfo object, without any side effect
more_info = BibDocMoreInfo(1, cache_only = False, initial_data = {})
if description is not None:
more_info['descriptions'] = {1: {fileformat:description}}
if comment is not None:
more_info['comments'] = {1: {fileformat:comment}}
abstract_bibdocs[(rename or bibdoc_name)] = \
{'list_latest_files': [BibDocFile(file_path, [(int(recid), doctype,(rename or bibdoc_name))], version=1,
docformat=fileformat,
docid=-1,
status=file_restriction,
checksum=checksum,
more_info=more_info)],
'get_docname': rename or bibdoc_name,
'get_type': doctype,
'updated': True,
'get_status': file_restriction,
'order': order,
'hidden_p': False}
abstract_bibdocs[(rename or bibdoc_name)]['updated'] = True
elif action == "revise" and not file_path:
# revision of attributes of a file (description, name,
# comment or restriction) but no new file.
abstract_bibdocs[bibdoc_name]['get_docname'] = rename or bibdoc_name
abstract_bibdocs[bibdoc_name]['get_status'] = file_restriction
set_description_and_comment(abstract_bibdocs[bibdoc_name]['list_latest_files'],
description, comment)
abstract_bibdocs[bibdoc_name]['updated'] = True
elif action == "delete":
if abstract_bibdocs.has_key(bibdoc_name):
del abstract_bibdocs[bibdoc_name]
elif action == "addFormat" and \
os.path.exists(file_path):
checksum = calculate_md5(file_path)
# Preserve type and status
doctype = abstract_bibdocs[bibdoc_name]['get_type']
file_restriction = abstract_bibdocs[bibdoc_name]['get_status']
# First instantiate a fake BibDocMoreInfo object, without any side effect
more_info = BibDocMoreInfo(1, cPickle.dumps({}))
if description is not None:
more_info['descriptions'] = {1: {fileformat:description}}
if comment is not None:
more_info['comments'] = {1: {fileformat:comment}}
abstract_bibdocs[bibdoc_name]['list_latest_files'].append(\
BibDocFile(file_path, [(int(recid), doctype, (rename or bibdoc_name))], version=1,
docformat=fileformat,
docid=-1, status='',
checksum=checksum, more_info=more_info))
abstract_bibdocs[bibdoc_name]['updated'] = True
return abstract_bibdocs.values()
def _read_file_revision_interface_configuration_from_disk(working_dir):
"""
Read the configuration of the file revision interface from disk
@param working_dir: the path to the working directory where we can find
the configuration file
"""
input_file = open(os.path.join(working_dir, 'upload_interface.config'), 'rb')
configuration = cPickle.load(input_file)
input_file.close()
return configuration
def _write_file_revision_interface_configuration_to_disk(working_dir, parameters):
"""
Write the configuration of the file revision interface to disk
@param working_dir: the path to the working directory where we should
write the configuration.
@param parameters: the parameters to write to disk
"""
output = open(os.path.join(working_dir, 'upload_interface.config'), 'wb')
cPickle.dump(parameters, output)
output.close()
def log_action(log_dir, action, bibdoc_name, file_path, rename,
description, comment, doctype, keep_previous_versions,
file_restriction):
"""
Logs a new action performed by user on a BibDoc file.
The log file record one action per line, each column being split
by '<--->' ('---' is escaped from values 'rename', 'description',
'comment' and 'bibdoc_name'). The original request for this
format was motivated by the need to have it easily readable by
other scripts. Not sure it still makes sense nowadays...
Newlines are also reserved, and are escaped from the input values
(necessary for the 'comment' field, which is the only one allowing
newlines from the browser)
Each line starts with the time of the action in the following
format: '2008-06-20 08:02:04 --> '
@param log_dir: directory where to save the log (ie. working_dir)
@param action: the performed action (one of 'revise', 'delete',
'add', 'addFormat')
@param bibdoc_name: the name of the bibdoc on which the change is
applied
@param file_path: the path to the file that is going to be
integrated as bibdoc, if any (should be""
in case of action="delete", or action="revise"
when revising only attributes of a file)
@param rename: the name used to display the bibdoc, instead of the
filename (can be None for no renaming)
@param description: a description associated with the file
@param comment: a comment associated with the file
@param doctype: the category in which the file is going to be
integrated
@param keep_previous_versions: if the previous versions of this
file are to be hidden (0) or not (1)
@param file_restriction: the restriction applied to the
file. Empty string if no restriction
"""
log_file = os.path.join(log_dir, 'bibdocactions.log')
try:
file_desc = open(log_file, "a+")
# We must escape new lines from comments in some way:
comment = str(comment).replace('\\', '\\\\').replace('\r\n', '\\n\\r')
msg = action + '<--->' + \
bibdoc_name.replace('---', '___') + '<--->' + \
file_path + '<--->' + \
str(rename).replace('---', '___') + '<--->' + \
str(description).replace('---', '___') + '<--->' + \
comment.replace('---', '___') + '<--->' + \
doctype + '<--->' + \
str(int(keep_previous_versions)) + '<--->' + \
file_restriction + '\n'
file_desc.write("%s --> %s" %(time.strftime("%Y-%m-%d %H:%M:%S"), msg))
file_desc.close()
except Exception ,e:
raise e
def read_actions_log(log_dir):
"""
Reads the logs of action to be performed on files
See log_action(..) for more information about the structure of the
log file.
@param log_dir: the path to the directory from which to read the
log file
@type log_dir: string
"""
actions = []
log_file = os.path.join(log_dir, 'bibdocactions.log')
try:
file_desc = open(log_file, "r")
for line in file_desc.readlines():
(timestamp, action) = line.split(' --> ', 1)
try:
(action, bibdoc_name, file_path, rename, description,
comment, doctype, keep_previous_versions,
file_restriction) = action.rstrip('\n').split('<--->')
except ValueError, e:
# Malformed action log
pass
# Clean newline-escaped comment:
comment = comment.replace('\\n\\r', '\r\n').replace('\\\\', '\\')
# Perform some checking
if action not in CFG_ALLOWED_ACTIONS:
# Malformed action log
pass
try:
keep_previous_versions = int(keep_previous_versions)
except:
# Malformed action log
keep_previous_versions = 1
pass
actions.append((action, bibdoc_name, file_path, rename, \
description, comment, doctype,
keep_previous_versions, file_restriction))
file_desc.close()
except:
pass
return actions
def javascript_display_revise_panel(action, target, show_doctypes, show_keep_previous_versions, show_rename, show_description, show_comment, bibdocname, description, comment, show_restrictions, restriction, doctypes):
"""
Returns a correctly encoded call to the javascript function to
display the revision panel.
"""
global params_id
params_id += 1
javascript_prefix = '''
<script type="text/javascript">
/*<![CDATA[*/
var revise_panel_params_%(id)i = {"action": "%(action)s",
"target": "%(target)s",
"showDoctypes": %(showDoctypes)s,
"showKeepPreviousVersions": %(showKeepPreviousVersions)s,
"showRename": %(showRename)s,
"showDescription": %(showDescription)s,
"showComment": %(showComment)s,
"bibdocname": "%(bibdocname)s",
"description": "%(description)s",
"comment": "%(comment)s",
"showRestrictions": %(showRestrictions)s,
"restriction": "%(restriction)s",
"doctypes": "%(doctypes)s"}
/*]]>*/
</script>''' % {'id': params_id,
'action': action,
'showDoctypes': show_doctypes and 'true' or 'false',
'target': escape_javascript_string(target, escape_for_html=False),
'bibdocname': escape_javascript_string(bibdocname, escape_for_html=False),
'showRename': show_rename and 'true' or 'false',
'showKeepPreviousVersions': show_keep_previous_versions and 'true' or 'false',
'showComment': show_comment and 'true' or 'false',
'showDescription': show_description and 'true' or 'false',
'description': description and escape_javascript_string(description, escape_for_html=False) or '',
'comment': comment and escape_javascript_string(comment, escape_for_html=False) or '',
'showRestrictions': show_restrictions and 'true' or 'false',
'restriction': escape_javascript_string(restriction, escape_for_html=False),
'doctypes': escape_javascript_string(doctypes, escape_for_html=False)}
return ('display_revise_panel(this, revise_panel_params_%(id)i)' % {'id': params_id},
javascript_prefix)
def get_uploaded_files_for_docname(log_dir, docname):
"""
Given a docname, returns the paths to the files uploaded for this
revision session.
@param log_dir: the path to the directory that should contain the
uploaded files.
@param docname: the name of the bibdoc for which we want to
retrieve files.
"""
return [file_path for action, bibdoc_name, file_path, rename, \
description, comment, doctype, keep_previous_versions , \
file_restriction in read_actions_log(log_dir) \
if bibdoc_name == docname and os.path.exists(file_path)]
def get_bibdoc_for_docname(docname, abstract_bibdocs):
"""
Given a docname, returns the corresponding bibdoc from the
'abstract' bibdocs.
Return None if not found
@param docname: the name of the bibdoc we want to retrieve
@param abstract_bibdocs: the list of bibdocs from which we want to
retrieve the bibdoc
"""
bibdocs = [bibdoc for bibdoc in abstract_bibdocs \
if bibdoc['get_docname'] == docname]
if len(bibdocs) > 0:
return bibdocs[0]
else:
return None
def get_extensions_for_docname(docname, abstract_bibdocs):
"""
Returns the list of extensions that exists for given bibdoc
name in the given 'abstract' bibdocs.
@param docname: the name of the bibdoc for wich we want to
retrieve the available extensions
@param abstract_bibdocs: the list of bibdocs from which we want to
retrieve the bibdoc extensions
"""
bibdocfiles = [bibdoc['list_latest_files'] for bibdoc \
in abstract_bibdocs \
if bibdoc['get_docname'] == docname]
if len(bibdocfiles) > 0:
# There should always be at most 1 matching docname, or 0 if
# it is a new file
return [bibdocfile.get_format() for bibdocfile \
in bibdocfiles[0]]
return []
def is_hidden_for_docname(docname, abstract_bibdocs):
"""
Returns True if the bibdoc with given docname in abstract_bibdocs
should be hidden. Also return True if docname cannot be found in
abstract_bibdocs.
@param docname: the name of the bibdoc for wich we want to
check if it is hidden or not
@param abstract_bibdocs: the list of bibdocs from which we want to
look for the given docname
"""
bibdocs = [bibdoc for bibdoc in abstract_bibdocs \
if bibdoc['get_docname'] == docname]
if len(bibdocs) > 0:
return bibdocs[0]['hidden_p']
return True
def get_description_and_comment(bibdocfiles):
"""
Returns the first description and comment as tuple (description,
comment) found in the given list of bibdocfile
description and/or comment can be None.
This function is needed since we do consider that there is one
comment/description per bibdoc, and not per bibdocfile as APIs
state.
@param bibdocfiles: the list of files of a given bibdoc for which
we want to extract the description and comment.
"""
description = None
comment = None
all_descriptions = [bibdocfile.get_description() for bibdocfile \
in bibdocfiles
if bibdocfile.get_description() not in ['', None]]
if len(all_descriptions) > 0:
description = all_descriptions[0]
all_comments = [bibdocfile.get_comment() for bibdocfile \
in bibdocfiles
if bibdocfile.get_comment() not in ['', None]]
if len(all_comments) > 0:
comment = all_comments[0]
return (description, comment)
def set_description_and_comment(abstract_bibdocfiles, description, comment):
"""
Set the description and comment to the given (abstract)
bibdocfiles.
description and/or comment can be None.
This function is needed since we do consider that there is one
comment/description per bibdoc, and not per bibdocfile as APIs
state.
@param abstract_bibdocfiles: the list of 'abstract' files of a
given bibdoc for which we want to set the
description and comment.
@param description: the new description
@param comment: the new comment
"""
for bibdocfile in abstract_bibdocfiles:
bibdocfile.description = description
bibdocfile.comment = comment
def delete_file(working_dir, file_path):
"""
Deletes a file at given path from the file.
In fact, we just move it to working_dir/files/trash
@param working_dir: the path to the working directory
@param file_path: the path to the file to delete
"""
if os.path.exists(file_path):
filename = os.path.split(file_path)[1]
move_to = os.path.join(working_dir, 'files', 'trash',
filename +'_' + str(time.time()))
os.renames(file_path, move_to)
def wash_form_parameters(form, abstract_bibdocs, can_keep_doctypes,
keep_default, can_describe_doctypes,
can_comment_doctypes, can_rename_doctypes,
can_name_new_files, can_restrict_doctypes,
doctypes_to_default_filename, working_dir):
"""
Washes the (user-defined) form parameters, taking into account the
current state of the files and the admin defaults.
@param form: the form of the function
@param abstract_bibdocs: a representation of the current state of
the files, as returned by
build_updated_file_list(..)
@param can_keep_doctypes: the list of doctypes for which we allow
users to choose to keep or not the
previous versions when revising.
@type can_keep_doctypes: list
@param keep_default: the admin-defined default for when users
cannot choose to keep or not previous version
of a revised file
@type keep_default: boolean
@param can_describe_doctypes: the list of doctypes for which we
let users define descriptions.
@type can_describe_doctypes: list
@param can_comment_doctypes: the list of doctypes for which we let
users define comments.
@type can_comment_doctypes: list
@param can_rename_doctypes: the list of doctypes for which we let
users rename bibdoc when revising.
@type can_rename_doctypes: list
@param can_name_new_files: if we let users choose a name when
adding new files.
@type can_name_new_files: boolean
@param can_restrict_doctypes: the list of doctypes for which we
let users define access
restrictions.
@type can_restrict_doctypes: list
@param doctypes_to_default_filename: mapping from doctype to
admin-chosen name for
uploaded file.
@type doctypes_to_default_filename: dict
@param working_dir: the path to the current working directory
@type working_dir: string
@return: tuple (file_action, file_target, file_target_doctype,
keep_previous_files, file_description, file_comment,
file_rename, file_doctype, file_restriction) where::
file_action: *str* the performed action ('add',
'revise','addFormat' or 'delete')
file_target: *str* the bibdocname of the file on which the
action is performed (empty string when
file_action=='add')
file_target_doctype: *str* the doctype of the file we will
work on. Eg: ('main',
'additional'). Empty string with
file_action=='add'.
keep_previous_files: *bool* if we keep the previous version of
the file or not. Only useful when
revising files.
file_description: *str* the user-defined description to apply
to the file. Empty string when no
description defined or when not applicable
file_comment: *str* the user-defined comment to apply to the
file. Empty string when no comment defined or
when not applicable
file_rename: *str* the new name chosen by user for the
bibdoc. Empty string when not defined or when not
applicable.
file_doctype: *str* the user-chosen doctype for the bibdoc
when file_action=='add', or the current doctype
of the file_target in other cases (doctype must
be preserved).
file_restriction: *str* the user-selected restriction for the
file. Emptry string if not defined or when
not applicable.
file_name: *str* the original name of the uploaded file. None
if no file uploaded
file_path: *str* the full path to the file
@rtype: tuple(string, string, string, boolean, string, string,
string, string, string, string, string)
"""
# Action performed ...
if form.has_key("fileAction") and \
form['fileAction'] in CFG_ALLOWED_ACTIONS:
file_action = str(form['fileAction']) # "add", "revise",
# "addFormat" or "delete"
else:
file_action = ""
# ... on file ...
if form.has_key("fileTarget"):
file_target = str(form['fileTarget']) # contains bibdocname
# Also remember its doctype to make sure we do valid actions
# on it
corresponding_bibdoc = get_bibdoc_for_docname(file_target,
abstract_bibdocs)
if corresponding_bibdoc is not None:
file_target_doctype = corresponding_bibdoc['get_type']
else:
file_target_doctype = ""
else:
file_target = ""
file_target_doctype = ""
# ... with doctype?
# Only useful when adding file: otherwise fileTarget doctype is
# preserved
file_doctype = file_target_doctype
if form.has_key("fileDoctype") and \
file_action == 'add':
file_doctype = str(form['fileDoctype'])
# ... keeping previous version? ...
if file_target_doctype != '' and \
not form.has_key("keepPreviousFiles"):
# no corresponding key. Two possibilities:
if file_target_doctype in can_keep_doctypes or \
'*' in can_keep_doctypes:
# User decided no to keep
keep_previous_files = 0
else:
# No choice for user. Use default admin has chosen
keep_previous_files = keep_default
else:
# Checkbox seems to be checked ...
if file_target_doctype in can_keep_doctypes or \
'*' in can_keep_doctypes:
# ...and this is allowed
keep_previous_files = 1
else:
# ...but this is not allowed
keep_previous_files = keep_default
# ... and decription? ...
if form.has_key("description") and \
(((file_action == 'revise' and \
(file_target_doctype in can_describe_doctypes)) or \
(file_action == 'add' and \
(file_doctype in can_describe_doctypes))) \
or '*' in can_describe_doctypes):
file_description = str(form['description'])
else:
file_description = ''
# ... and comment? ...
if form.has_key("comment") and \
(((file_action == 'revise' and \
(file_target_doctype in can_comment_doctypes)) or \
(file_action == 'add' and \
(file_doctype in can_comment_doctypes))) \
or '*' in can_comment_doctypes):
file_comment = str(form['comment'])
else:
file_comment = ''
# ... and rename to ? ...
if form.has_key("rename") and \
((file_action == "revise" and \
((file_target_doctype in can_rename_doctypes) or \
'*' in can_rename_doctypes)) or \
(file_action == "add" and \
can_name_new_files)):
file_rename = str(form['rename']) # contains new bibdocname if applicable
elif file_action == "add" and \
doctypes_to_default_filename.has_key(file_doctype):
# Admin-chosen name.
file_rename = doctypes_to_default_filename[file_doctype]
if file_rename.lower().startswith('file:'):
# We will define name at a later stage, i.e. when
# submitting the file with bibdocfile. The name will be
# chosen by reading content of a file in curdir
file_rename = ''
else:
# Ensure name is unique, by appending a suffix
file_rename = doctypes_to_default_filename[file_doctype]
file_counter = 2
while get_bibdoc_for_docname(file_rename, abstract_bibdocs):
if file_counter == 2:
file_rename += '-2'
else:
file_rename = file_rename[:-len(str(file_counter))] + \
str(file_counter)
file_counter += 1
else:
file_rename = ''
# ... and file restriction ? ...
file_restriction = ''
if form.has_key("fileRestriction"):
# We cannot clean that value as it could be a restriction
# declared in another submission. We keep this value.
file_restriction = str(form['fileRestriction'])
# ... and the file itself ? ...
if form.has_key('myfile') and \
hasattr(form['myfile'], "filename") and \
form['myfile'].filename:
dir_to_open = os.path.join(working_dir, 'files', 'myfile')
if not os.path.exists(dir_to_open):
try:
os.makedirs(dir_to_open)
except:
pass
# Shall we continue?
if os.path.exists(dir_to_open):
form_field = form['myfile']
file_name = form_field.filename
form_file = form_field.file
## Before saving the file to disk, wash the filename (in particular
## washing away UNIX and Windows (e.g. DFS) paths):
file_name = os.path.basename(file_name.split('\\')[-1])
file_name = file_name.strip()
if file_name != "":
# This may be dangerous if the file size is bigger than
# the available memory
file_path = os.path.join(dir_to_open, file_name)
if not os.path.exists(file_path):
# If file already exists, it means that it was
# handled by WebSubmit
fp = file(file_path, "wb")
chunk = form_file.read(10240)
while chunk:
fp.write(chunk)
chunk = form_file.read(10240)
fp.close()
fp = open(os.path.join(working_dir, "lastuploadedfile"), "w")
fp.write(file_name)
fp.close()
fp = open(os.path.join(working_dir, 'myfile'), "w")
fp.write(file_name)
fp.close()
else:
file_name = None
file_path = None
return (file_action, file_target, file_target_doctype,
keep_previous_files, file_description, file_comment,
file_rename, file_doctype, file_restriction, file_name,
file_path)
def move_uploaded_files_to_storage(working_dir, recid, icon_sizes,
create_icon_doctypes,
force_file_revision):
"""
Apply the modifications on files (add/remove/revise etc.) made by
users with one of the compatible interfaces (WebSubmit function
`Create_Upload_Files_Interface.py'; WebSubmit element or WebSubmit
File management interface using function
`create_file_upload_interface').
This function needs a "working directory" (working_dir) that contains a
bibdocactions.log file with the list of actions to perform.
@param working_dir: a path to the working directory containing actions to perform and files to attach
@type working_dir: string
@param recid: the recid to modify
@type recid: int
@param icon_sizes: the sizes of icons to create, as understood by
the websubmit icon creation tool
@type icon_sizes: list(string)
@param create_icon_doctypes: a list of doctype for which we want
to create icons
@type create_icon_doctypes: list(string)
@param force_file_revision: when revising attributes of a file
(comment, description) without
uploading a new file, force a revision
of the current version (so that old
comment, description, etc. is kept
or not)
@type force_file_revision: bool
"""
# We need to remember of some actions that cannot be performed,
# because files have been deleted or moved after a renaming.
# Those pending action must be applied when revising the bibdoc
# with a file that exists (that means that the bibdoc has not been
# deleted nor renamed by a later action)
pending_bibdocs = {}
newly_added_bibdocs = [] # Does not consider new formats/revisions
performed_actions = read_actions_log(working_dir)
for action, bibdoc_name, file_path, rename, description, \
comment, doctype, keep_previous_versions, \
file_restriction in performed_actions:
# FIXME: get this out of the loop once changes to bibrecdocs
# are immediately visible. For the moment, reload the
# structure from scratch at each step
bibrecdocs = BibRecDocs(recid)
if action == 'add':
new_bibdoc = \
add(file_path, bibdoc_name, rename, doctype, description,
comment, file_restriction, recid, working_dir, icon_sizes,
create_icon_doctypes, pending_bibdocs, bibrecdocs)
if new_bibdoc:
newly_added_bibdocs.append(new_bibdoc)
elif action == 'addFormat':
add_format(file_path, bibdoc_name, recid, doctype, working_dir,
icon_sizes, create_icon_doctypes,
pending_bibdocs, bibrecdocs)
elif action == 'revise':
new_bibdoc = \
revise(file_path, bibdoc_name, rename, doctype,
description, comment, file_restriction, icon_sizes,
create_icon_doctypes, keep_previous_versions,
recid, working_dir, pending_bibdocs,
bibrecdocs, force_file_revision)
if new_bibdoc:
newly_added_bibdocs.append(new_bibdoc)
elif action == 'delete':
delete(bibdoc_name, recid, working_dir, pending_bibdocs,
bibrecdocs)
# Finally rename bibdocs that should be named according to a file in
# curdir (eg. naming according to report number). Only consider
# file that have just been added.
parameters = _read_file_revision_interface_configuration_from_disk(working_dir)
new_names = []
doctypes_to_default_filename = parameters[22]
for bibdoc_to_rename in newly_added_bibdocs:
bibdoc_to_rename_doctype = bibdoc_to_rename.doctype
rename_to = doctypes_to_default_filename.get(bibdoc_to_rename_doctype, '')
if rename_to.startswith('file:'):
# This BibDoc must be renamed. Look for name in working dir
name_at_filepath = os.path.join(working_dir, rename_to[5:])
if os.path.exists(name_at_filepath) and \
os.path.abspath(name_at_filepath).startswith(working_dir):
try:
rename = file(name_at_filepath).read()
except:
register_exception(prefix='Move_Uploaded_Files_to_Storage ' \
'could not read file %s in curdir to rename bibdoc' % \
(name_at_filepath,),
alert_admin=True)
if rename:
file_counter = 2
new_filename = rename
while bibrecdocs.has_docname_p(new_filename) or (new_filename in new_names):
new_filename = rename + '_%i' % file_counter
file_counter += 1
bibdoc_to_rename.change_name(new_filename)
new_names.append(new_filename) # keep track of name, or we have to reload bibrecdoc...
_do_log(working_dir, 'Renamed ' + bibdoc_to_rename.get_docname())
# Delete the HB BibFormat cache in the DB, so that the fulltext
# links do not point to possible dead files
run_sql("DELETE LOW_PRIORITY from bibfmt WHERE format='HB' AND id_bibrec=%s", (recid,))
# Update the MARC
cli_fix_marc(None, [recid], interactive=False)
def add(file_path, bibdoc_name, rename, doctype, description, comment,
file_restriction, recid, working_dir, icon_sizes, create_icon_doctypes,
pending_bibdocs, bibrecdocs):
"""
Adds the file using bibdocfile CLI
Return the bibdoc that has been newly added.
"""
try:
brd = BibRecDocs(recid)
if os.path.exists(file_path):
# Add file
bibdoc = bibrecdocs.add_new_file(file_path,
doctype,
rename or bibdoc_name,
never_fail=True)
_do_log(working_dir, 'Added ' + brd.get_docname(bibdoc.id) + ': ' + \
file_path)
# Add icon
iconpath = ''
has_added_default_icon_subformat_p = False
for icon_size in icon_sizes:
if doctype in create_icon_doctypes or \
'*' in create_icon_doctypes:
iconpath = _create_icon(file_path, icon_size)
if iconpath is not None:
try:
if not has_added_default_icon_subformat_p:
bibdoc.add_icon(iconpath)
has_added_default_icon_subformat_p = True
else:
icon_suffix = icon_size.replace('>', '').replace('<', '').replace('^', '').replace('!', '')
bibdoc.add_icon(iconpath, subformat=CFG_BIBDOCFILE_DEFAULT_ICON_SUBFORMAT + "-" + icon_suffix)
_do_log(working_dir, 'Added icon to ' + \
brd.get_docname(bibdoc.id) + ': ' + iconpath)
except InvenioBibDocFileError, e:
# Most probably icon already existed.
pass
# Add description
if description:
bibdocfiles = bibdoc.list_latest_files()
for bibdocfile in bibdocfiles:
bibdoc.set_description(description,
bibdocfile.get_format())
_do_log(working_dir, 'Described ' + \
brd.get_docname(bibdoc.id) + ': ' + description)
# Add comment
if comment:
bibdocfiles = bibdoc.list_latest_files()
for bibdocfile in bibdocfiles:
bibdoc.set_comment(comment,
bibdocfile.get_format())
_do_log(working_dir, 'Commented ' + \
brd.get_docname(bibdoc.id) + ': ' + comment)
# Set restriction
bibdoc.set_status(file_restriction)
_do_log(working_dir, 'Set restriction of ' + \
brd.get_docname(bibdoc.id) + ': ' + \
file_restriction or '(no restriction)')
return bibdoc
else:
# File has been later renamed or deleted.
# Remember to add it later if file is found (ie
# it was renamed)
pending_bibdocs[bibdoc_name] = (doctype, comment, description, [])
except InvenioBibDocFileError, e:
# Format already existed. How come? We should
# have checked this in Create_Upload_Files_Interface.py
register_exception(prefix='Move_Uploaded_Files_to_Storage ' \
'tried to add already existing file %s ' \
'with name %s to record %i.' % \
(file_path, bibdoc_name, recid),
alert_admin=True)
def add_format(file_path, bibdoc_name, recid, doctype, working_dir,
icon_sizes, create_icon_doctypes, pending_bibdocs,
bibrecdocs):
"""
Adds a new format to a bibdoc using bibdocfile CLI
"""
try:
brd = BibRecDocs(recid)
if os.path.exists(file_path):
# We must retrieve previous description and comment as
# adding a file using the APIs reset these values
prev_desc, prev_comment = None, None
if bibrecdocs.has_docname_p(bibdoc_name):
(prev_desc, prev_comment) = \
get_description_and_comment(bibrecdocs.get_bibdoc(bibdoc_name).list_latest_files())
# Add file
bibdoc = bibrecdocs.add_new_format(file_path,
bibdoc_name,
prev_desc,
prev_comment)
_do_log(working_dir, 'Added new format to ' + \
brd.get_docname(bibdoc.id) + ': ' + file_path)
# Add icons
has_added_default_icon_subformat_p = False
for icon_size in icon_sizes:
iconpath = ''
if doctype in create_icon_doctypes or \
'*' in create_icon_doctypes:
iconpath = _create_icon(file_path, icon_size)
if iconpath is not None:
try:
if not has_added_default_icon_subformat_p:
bibdoc.add_icon(iconpath)
has_added_default_icon_subformat_p = True
else:
# We have already added the "default" icon subformat
icon_suffix = icon_size.replace('>', '').replace('<', '').replace('^', '').replace('!', '')
bibdoc.add_icon(iconpath, subformat=CFG_BIBDOCFILE_DEFAULT_ICON_SUBFORMAT + "-" + icon_suffix)
_do_log(working_dir, 'Added icon to ' + \
brd.get_docname(bibdoc.id) + ': ' + iconpath)
except InvenioBibDocFileError, e:
# Most probably icon already existed.
pass
else:
# File has been later renamed or deleted.
# Remember to add it later if file is found
if pending_bibdocs.has_key(bibdoc_name):
pending_bibdocs[bibdoc_name][3].append(file_path)
# else: we previously added a file by mistake. Do
# not care, it will be deleted
except InvenioBibDocFileError, e:
# Format already existed. How come? We should
# have checked this in Create_Upload_Files_Interface.py
register_exception(prefix='Move_Uploaded_Files_to_Storage ' \
'tried to add already existing format %s ' \
'named %s in record %i.' % \
(file_path, bibdoc_name, recid),
alert_admin=True)
def revise(file_path, bibdoc_name, rename, doctype, description,
comment, file_restriction, icon_sizes, create_icon_doctypes,
keep_previous_versions, recid, working_dir, pending_bibdocs,
bibrecdocs, force_file_revision):
"""
Revises the given bibdoc with a new file.
Return the bibdoc that has been newly added. (later: if needed,
return as tuple the bibdoc that has been revised, or deleted,
etc.)
"""
added_bibdoc = None
try:
if os.path.exists(file_path) or not file_path:
brd = BibRecDocs(recid)
# Perform pending actions
if pending_bibdocs.has_key(bibdoc_name):
# We have some pending actions to apply before
# going further.
if description == '':
# Last revision did not include a description.
# Use the one of the pending actions
description = pending_bibdocs[bibdoc_name][2]
if comment == '':
# Last revision did not include a comment.
# Use the one of the pending actions
comment = pending_bibdocs[bibdoc_name][1]
original_bibdoc_name = pending_bibdocs[bibdoc_name][0]
if not bibrecdocs.has_docname_p(original_bibdoc_name) and file_path:
# the bibdoc did not originaly exist, so it
# must be added first
bibdoc = bibrecdocs.add_new_file(file_path,
pending_bibdocs[bibdoc_name][0],
bibdoc_name,
never_fail=True)
_do_log(working_dir, 'Added ' + brd.get_docname(bibdoc.id) + ': ' + \
file_path)
added_bibdoc = bibdoc
# Set restriction
bibdoc.set_status(file_restriction)
_do_log(working_dir, 'Set restriction of ' + \
bibrecdocs.get_docname(bibdoc.id) + ': ' + \
file_restriction or '(no restriction)')
# We must retrieve previous description and comment as
# revising a file using the APIs reset these values
prev_desc, prev_comment = None, None
if bibrecdocs.has_docname_p(bibdoc_name):
(prev_desc, prev_comment) = \
get_description_and_comment(bibrecdocs.get_bibdoc(bibdoc_name).list_latest_files())
# Do we have additional formats?
for additional_format in pending_bibdocs[bibdoc_name][3]:
if os.path.exists(additional_format):
bibdoc.add_file_new_format(additional_format,
description=bibdoc.get_description(),
comment=bibdoc.get_comment())
_do_log(working_dir, 'Added new format to' + \
brd.get_docname(bibdoc.id) + ': ' + file_path)
# All pending modification have been applied,
# so delete
del pending_bibdocs[bibdoc_name]
# We must retrieve previous description and comment as
# revising a file using the APIs reset these values
prev_desc, prev_comment = None, None
if bibrecdocs.has_docname_p(bibdoc_name):
(prev_desc, prev_comment) = \
get_description_and_comment(bibrecdocs.get_bibdoc(bibdoc_name).list_latest_files())
if keep_previous_versions and file_path:
# Standard procedure, keep previous version
bibdoc = bibrecdocs.add_new_version(file_path,
bibdoc_name,
prev_desc,
prev_comment)
_do_log(working_dir, 'Revised ' + brd.get_docname(bibdoc.id) + \
' with : ' + file_path)
elif file_path:
# Soft-delete previous versions, and add new file
# (we need to get the doctype before deleting)
if bibrecdocs.has_docname_p(bibdoc_name):
# Delete only if bibdoc originally
# existed
bibrecdocs.delete_bibdoc(bibdoc_name)
_do_log(working_dir, 'Deleted ' + bibdoc_name)
try:
bibdoc = bibrecdocs.add_new_file(file_path,
doctype,
bibdoc_name,
never_fail=True,
description=prev_desc,
comment=prev_comment)
_do_log(working_dir, 'Added ' + brd.get_docname(bibdoc.id) + ': ' + \
file_path)
except InvenioBibDocFileError, e:
_do_log(working_dir, str(e))
register_exception(prefix='Move_Uploaded_Files_to_Storage ' \
'tried to revise a file %s ' \
'named %s in record %i.' % \
(file_path, bibdoc_name, recid),
alert_admin=True)
else:
# User just wanted to change attribute of the file,
# not the file itself
bibdoc = bibrecdocs.get_bibdoc(bibdoc_name)
(prev_desc, prev_comment) = \
get_description_and_comment(bibdoc.list_latest_files())
if prev_desc is None:
prev_desc = ""
if prev_comment is None:
prev_comment = ""
if force_file_revision and \
(description != prev_desc or comment != prev_comment):
# FIXME: If we are going to create a new version,
# then we should honour the keep_previous_versions
# parameter (soft-delete, then add bibdoc, etc)
# But it is a bit complex right now...
# Trick: we revert to current version, which
# creates a revision of the BibDoc
bibdoc.revert(bibdoc.get_latest_version())
bibdoc = bibrecdocs.get_bibdoc(bibdoc_name)
# Rename
if rename and rename != bibdoc_name:
bibrecdocs.change_name(newname=rename, docid=bibdoc.id)
_do_log(working_dir, 'renamed ' + bibdoc_name +' to '+ rename)
# Add icons
if file_path:
has_added_default_icon_subformat_p = False
for icon_size in icon_sizes:
iconpath = ''
if doctype in create_icon_doctypes or \
'*' in create_icon_doctypes:
iconpath = _create_icon(file_path, icon_size)
if iconpath is not None:
try:
if not has_added_default_icon_subformat_p:
bibdoc.add_icon(iconpath)
has_added_default_icon_subformat_p = True
else:
# We have already added the "default" icon subformat
icon_suffix = icon_size.replace('>', '').replace('<', '').replace('^', '').replace('!', '')
bibdoc.add_icon(iconpath, subformat=CFG_BIBDOCFILE_DEFAULT_ICON_SUBFORMAT + "-" + icon_suffix)
_do_log(working_dir, 'Added icon to ' + \
brd.get_docname(bibdoc.id) + ': ' + iconpath)
except InvenioBibDocFileError, e:
# Most probably icon already existed.
pass
# Description
if description:
bibdocfiles = bibdoc.list_latest_files()
for bibdocfile in bibdocfiles:
bibdoc.set_description(description,
bibdocfile.get_format())
_do_log(working_dir, 'Described ' + \
brd.get_docname(bibdoc.id) + ': ' + description)
# Comment
if comment:
bibdocfiles = bibdoc.list_latest_files()
for bibdocfile in bibdocfiles:
bibdoc.set_comment(comment,
bibdocfile.get_format())
_do_log(working_dir, 'Commented ' + \
brd.get_docname(bibdoc.id) + ': ' + comment)
# Set restriction
bibdoc.set_status(file_restriction)
_do_log(working_dir, 'Set restriction of ' + \
brd.get_docname(bibdoc.id) + ': ' + \
file_restriction or '(no restriction)')
else:
# File has been later renamed or deleted.
# Remember it
if rename and rename != bibdoc_name:
pending_bibdocs[rename] = pending_bibdocs[bibdoc_name]
except InvenioBibDocFileError, e:
# Format already existed. How come? We should
# have checked this in Create_Upload_Files_Interface.py
register_exception(prefix='Move_Uploaded_Files_to_Storage ' \
'tried to revise a file %s ' \
'named %s in record %i.' % \
(file_path, bibdoc_name, recid),
alert_admin=True)
return added_bibdoc
def delete(bibdoc_name, recid, working_dir, pending_bibdocs,
bibrecdocs):
"""
Deletes the given bibdoc
"""
try:
if bibrecdocs.has_docname_p(bibdoc_name):
bibrecdocs.delete_bibdoc(bibdoc_name)
_do_log(working_dir, 'Deleted ' + bibdoc_name)
if pending_bibdocs.has_key(bibdoc_name):
del pending_bibdocs[bibdoc_name]
except InvenioBibDocFileError, e:
# Mmh most probably we deleted two files at the same
# second. Sleep 1 second and retry... This might go
# away one bibdoc improves its way to delete files
try:
time.sleep(1)
bibrecdocs.delete_bibdoc(bibdoc_name)
_do_log(working_dir, 'Deleted ' + bibdoc_name)
if pending_bibdocs.has_key(bibdoc_name):
del pending_bibdocs[bibdoc_name]
except InvenioBibDocFileError, e:
_do_log(working_dir, str(e))
_do_log(working_dir, repr(bibrecdocs.list_bibdocs()))
register_exception(prefix='Move_Uploaded_Files_to_Storage ' \
'tried to delete a file' \
'named %s in record %i.' % \
(bibdoc_name, recid),
alert_admin=True)
def _do_log(log_dir, msg):
"""
Log what we have done, in case something went wrong.
Nice to compare with bibdocactions.log
Should be removed when the development is over.
@param log_dir: the path to the working directory
@type log_dir: string
@param msg: the message to log
@type msg: string
"""
log_file = os.path.join(log_dir, 'performed_actions.log')
file_desc = open(log_file, "a+")
file_desc.write("%s --> %s\n" %(time.strftime("%Y-%m-%d %H:%M:%S"), msg))
file_desc.close()
def _create_icon(file_path, icon_size, docformat='gif', verbosity=9):
"""
Creates icon of given file.
Returns path to the icon. If creation fails, return None, and
register exception (send email to admin).
@param file_path: full path to icon
@type file_path: string
@param icon_size: the scaling information to be used for the
creation of the new icon.
@type icon_size: int
@param verbosity: the verbosity level under which the program
is to run;
@type verbosity: int
"""
icon_path = None
try:
filename = os.path.splitext(os.path.basename(file_path))[0]
(icon_dir, icon_name) = create_icon(
{'input-file':file_path,
'icon-name': "icon-%s" % filename,
'multipage-icon': False,
'multipage-icon-delay': 0,
'icon-scale': icon_size,
'icon-file-format': format,
'verbosity': verbosity})
icon_path = icon_dir + os.sep + icon_name
except InvenioWebSubmitIconCreatorError, e:
register_exception(prefix='Icon for file %s could not be created: %s' % \
(file_path, str(e)),
alert_admin=False)
return icon_path
def get_upload_file_interface_javascript(form_url_params):
"""
Returns the Javascript code necessary to run the upload file
interface.
"""
javascript = '''
<script type="text/javascript" src="/js/jquery.form.js"></script>
<script type="text/javascript">
<!--
'''
if form_url_params:
javascript += '''
// prepare the form when the DOM is ready
$(document).ready(function() {
var progress = $('.progress');
var rotatingprogress = $('.rotatingprogress');
var bar = $('.bar');
var percent = $('.percent');
var options = {
target: '#uploadFileInterface', // target element(s) to be updated with server response
uploadProgress: function(event, position, total, percentComplete) {
update_progress(progress, bar, percent, percentComplete, rotatingprogress);},
beforeSubmit: function(arr, $form, options) {
show_upload_progress();
return true;},
success: showResponse, // post-submit callback
url: '/%(CFG_SITE_RECORD)s/managedocfilesasync%(form_url_params)s' // override for form's 'action' attribute
};
// bind form using 'ajaxForm'
var this_form = $('form:has(#balloonReviseFileInput)')
$('#bibdocfilemanagedocfileuploadbutton').click(function() {
this_form.bibdocfilemanagedocfileuploadbuttonpressed=true;
this_form.ajaxSubmit(options);
})
});
// post-submit callback
function showResponse(responseText, statusText) {
hide_upload_progress();
hide_revise_panel();
}
''' % {
'form_url_params': form_url_params,
'CFG_SITE_RECORD': CFG_SITE_RECORD}
javascript += '''
/* Record position of the last clicked link that triggered the display
* of the revise panel
*/
var last_clicked_link = null;
function display_revise_panel(link, params){
var action = params['action'];
var target = params['target'];
var showDoctypes = params['showDoctypes'];
var showKeepPreviousVersions = params['showKeepPreviousVersions'];
var showRename = params['showRename'];
var showDescription = params['showDescription'];
var showComment = params['showComment'];
var bibdocname = params['bibdocname'];
var description = params['description'];
var comment = params['comment'];
var showRestrictions = params['showRestrictions'];
var restriction = params['restriction'];
var doctypes = params['doctypes'];
var balloon = document.getElementById("balloon");
var file_input_block = document.getElementById("balloonReviseFileInputBlock");
var doctype = document.getElementById("fileDoctypesRow");
var warningFormats = document.getElementById("warningFormats");
var keepPreviousVersions = document.getElementById("keepPreviousVersions");
var renameBox = document.getElementById("renameBox");
var descriptionBox = document.getElementById("descriptionBox");
var commentBox = document.getElementById("commentBox");
var restrictionBox = document.getElementById("restrictionBox");
var apply_button = document.getElementById("applyChanges");
var mainForm = getMainForm();
last_clicked_link = link;
var pos;
/* Show/hide parts of the form */
if (showDoctypes) {
doctype.style.display = ''
} else {
doctype.style.display = 'none'
}
if (action == 'revise' && showKeepPreviousVersions == true){
warningFormats.style.display = ''
} else {
warningFormats.style.display = 'none'
}
if ((action == 'revise' || action == 'add') && showRename == true){
renameBox.style.display = ''
} else {
renameBox.style.display = 'none'
}
if ((action == 'revise' || action == 'add') && showDescription == true){
descriptionBox.style.display = ''
} else {
descriptionBox.style.display = 'none'
}
if ((action == 'revise' || action == 'add') && showComment == true){
commentBox.style.display = ''
} else {
commentBox.style.display = 'none'
}
if ((action == 'revise' || action == 'add') && showRestrictions == true){
restrictionBox.style.display = ''
} else {
restrictionBox.style.display = 'none'
}
if (action == 'revise' && showKeepPreviousVersions == true) {
keepPreviousVersions.style.display = ''
} else {
keepPreviousVersions.style.display = 'none'
}
if (action == 'add') {
updateForm();
}
/* Reset values */
file_input_block.innerHTML = file_input_block.innerHTML; // Trick to reset input field
doctype.innerHTML = doctypes;
mainForm.balloonReviseFileKeep.checked = true;
mainForm.rename.value = bibdocname;
mainForm.comment.value = comment;
mainForm.description.value = description;
var fileRestrictionFound = false;
for (var i=0; i < mainForm.fileRestriction.length; i++) {
if (mainForm.fileRestriction[i].value == restriction) {
mainForm.fileRestriction.selectedIndex = i;
fileRestrictionFound = true;
}
}
if (!fileRestrictionFound) {
var restrictionItem = new Option(restriction, restriction);
mainForm.fileRestriction.appendChild(restrictionItem);
var lastIndex = mainForm.fileRestriction.length - 1;
mainForm.fileRestriction.selectedIndex = lastIndex;
}
/* Display and move to correct position*/
pos = findPosition(link)
balloon.style.display = '';
balloon.style.position="absolute";
balloon.style.left = pos[0] + link.offsetWidth +"px";
balloon.style.top = pos[1] - Math.round(balloon.offsetHeight/2) + 5 + "px";
balloon.style.zIndex = 1001;
balloon.style.display = '';
/* Set the correct action and target file*/
mainForm.fileAction.value = action;
mainForm.fileTarget.value = target;
/* Disable other controls */
if (apply_button) {
apply_button.disabled = true;
}
/*gray_out(true);*/
}
function hide_revise_panel(){
var balloon = document.getElementById("balloon");
var apply_button = document.getElementById("applyChanges");
balloon.style.display = 'none';
if (apply_button) {
apply_button.disabled = false;
}
/*gray_out(false);*/
}
/* Intercept ESC key in order to close revise panel*/
document.onkeyup = keycheck;
function keycheck(e){
var KeyID = (window.event) ? event.keyCode : e.keyCode;
var upload_in_progress_p = $('.progress').is(":visible") || $('.rotatingprogress').is(":visible")
if(KeyID==27){
if (upload_in_progress_p) {
hide_upload_progress();
} else {
hide_revise_panel();
}
}
}
/* Update progress bar, show if necessary (and then hide rotating progress indicator) */
function update_progress(progress, bar, percent, percentComplete, rotatingprogress){
if (rotatingprogress.is(":visible")) {
$('.rotatingprogress').hide();
$('.progress').show();
}
var percentVal = percentComplete + '%%';
bar.width(percentVal)
percent.html(percentVal);
if (percentComplete == '100') {
// There might be some lengthy post-processing to do.
show_upload_progress(post_process_label=true);
}
}
/* Hide upload/cancel button, show rotating progress indicator */
function show_upload_progress(post_process_label_p) {
if (!post_process_label_p) { post_process_label_p = false;}
if (post_process_label_p) {
/* Show post-process label */
$('.progress').hide();
$('.rotatingprogress').hide();
$('.rotatingpostprocess').show();
} else {
/* Show uploading label */
$('#canceluploadbuttongroup').hide();
$('.rotatingprogress').show();
}
}
/* show upload/cancel button, hide any progress indicator */
function hide_upload_progress() {
$('.progress').hide();
$('.rotatingprogress').hide();
$('.rotatingpostprocess').hide();
$('#canceluploadbuttongroup').show();
$('.percent').html('0%%');
}
function findPosition( oElement ) {
/*Return the x,y position on page of the given object*/
if( typeof( oElement.offsetParent ) != 'undefined' ) {
for( var posX = 0, posY = 0; oElement; oElement = oElement.offsetParent ) {
posX += oElement.offsetLeft;
posY += oElement.offsetTop;
}
return [ posX, posY ];
} else {
return [ oElement.x, oElement.y ];
}
}
function getMainForm()
{
return $('form:has(#balloonReviseFileInput)')[0];
}
function nextStep()
{
if(confirm("You are about to submit the files and end the upload process."))
{
var mainForm = getMainForm();
mainForm.step.value = 2;
user_must_confirm_before_leaving_page = false;
mainForm.submit();
}
return true;
}
function updateForm(doctype, can_describe_doctypes, can_comment_doctypes, can_restrict_doctypes) {
/* Update the revision panel to hide or not part of the interface
* based on selected doctype
*
* Note: we use a small trick here to use the javascript 'in' operator, which
* does not work for arrays, but for object => we transform our arrays into
* objects literal
*/
/* Get the elements we are going to affect */
var renameBox = document.getElementById("renameBox");
var descriptionBox = document.getElementById("descriptionBox");
var commentBox = document.getElementById("commentBox");
var restrictionBox = document.getElementById("restrictionBox");
if (!can_describe_doctypes) {var can_describe_doctypes = [];}
if (!can_comment_doctypes) {var can_comment_doctypes = [];}
if (!can_restrict_doctypes) {var can_restrict_doctypes = [];}
if ((doctype in can_describe_doctypes) ||
('*' in can_describe_doctypes)){
descriptionBox.style.display = ''
} else {
descriptionBox.style.display = 'none'
}
if ((doctype in can_comment_doctypes) ||
('*' in can_comment_doctypes)){
commentBox.style.display = ''
} else {
commentBox.style.display = 'none'
}
if ((doctype in can_restrict_doctypes) ||
('*' in can_restrict_doctypes)){
restrictionBox.style.display = ''
} else {
restrictionBox.style.display = 'none'
}
/* Move the revise panel accordingly */
var balloon = document.getElementById("balloon");
pos = findPosition(last_clicked_link)
balloon.style.display = '';
balloon.style.position="absolute";
balloon.style.left = pos[0] + last_clicked_link.offsetWidth +"px";
balloon.style.top = pos[1] - Math.round(balloon.offsetHeight/2) + 5 + "px";
balloon.style.zIndex = 1001;
balloon.style.display = '';
}
function askDelete(bibdocname, form_url_params){
/*
Ask user if she wants to delete file
*/
if (confirm('Are you sure you want to delete '+bibdocname+'?'))
{
if (form_url_params) {
var mainForm = getMainForm();
mainForm.fileTarget.value = bibdocname;
mainForm.fileAction.value='delete';
user_must_confirm_before_leaving_page = false;
var options = {
target: '#uploadFileInterface',
success: showResponse,
url: '/%(CFG_SITE_RECORD)s/managedocfilesasync' + form_url_params
};
$(mainForm).ajaxSubmit(options);
} else {
/*WebSubmit function*/
document.forms[0].fileTarget.value = bibdocname;
document.forms[0].fileAction.value='delete';
user_must_confirm_before_leaving_page = false;
document.forms[0].submit();
}
}
return false;
}
function gray_out(visible) {
/* Gray out the screen so that user cannot click anywhere else.
Based on <http://www.hunlock.com/blogs/Snippets:_Howto_Grey-Out_The_Screen>
*/
var modalShield = document.getElementById('modalShield');
if (!modalShield) {
var tbody = document.getElementsByTagName("body")[0];
var tnode = document.createElement('div');
tnode.style.position = 'absolute';
tnode.style.top = '0px';
tnode.style.left = '0px';
tnode.style.overflow = 'hidden';
tnode.style.display = 'none';
tnode.id = 'modalShield';
tbody.appendChild(tnode);
modalShield = document.getElementById('modalShield');
}
if (visible){
// Calculate the page width and height
var pageWidth = '100%%';
var pageHeight = '100%%';
//set the shader to cover the entire page and make it visible.
modalShield.style.opacity = 0.7;
modalShield.style.MozOpacity = 0.7;
modalShield.style.filter = 'alpha(opacity=70)';
modalShield.style.zIndex = 1000;
modalShield.style.backgroundColor = '#000000';
modalShield.style.width = pageWidth;
modalShield.style.height = pageHeight;
modalShield.style.display = 'block';
} else {
modalShield.style.display = 'none';
}
}
-->
</script>
''' % {'CFG_SITE_RECORD': CFG_SITE_RECORD}
return javascript
def get_upload_file_interface_css():
"""
Returns the CSS to embed in the page for the upload file interface.
"""
# The CSS embedded in the page for the revise panel
css = '''
<style type="text/css">
<!--
#reviseControl{
overflow:auto;
width: 600px;
padding:1px;
}
.reviseControlBrowser{
padding:5px;
background-color:#fff;
border-collapse:collapse;
border-spacing: 0px;
border: 1px solid #999;
}
.reviseControlFileColumn {
padding-right:60px;
padding-left:5px;
text-align: left;
color:#00f;
}
.reviseControlActionColumn,
.reviseControlFormatColumn{
font-size:small;
}
.reviseControlActionColumn,
.reviseControlActionColumn a,
.reviseControlActionColumn a:link,
.reviseControlActionColumn a:hover
.reviseControlActionColumn a:visited{
font-size:small;
color: #060;
text-align:right;
}
.reviseControlFormatColumn,
.reviseControlFormatColumn a,
.reviseControlFormatColumn a:link,
.reviseControlFormatColumn a:hover
.reviseControlFormatColumn a:visited{
font-size:small;
color: #555;
text-align:left;
}
.optional{
color: #555;
font-size:0.9em;
font-weight:normal
}
.even{
background-color:#ecf3fe;
}
/*
.buttonLikeLink, .buttonLikeLink:visited, .buttonLikeLink:hover{
background-color:#fff;
border:2px outset #555;
color:#000;
padding: 2px 5px;
display:inline-block;
margin:2px;
text-decoration:none;
font-size:small;
cursor: default
}
*/
#balloon table{
border-collapse:collapse;
border-spacing: 0px;
}
#balloon table td.topleft{
background: transparent url(%(CFG_SITE_URL)s/img/balloon_top_left_shadow.png) no-repeat bottom right;
}
#balloon table td.bottomleft{
background: transparent url(%(CFG_SITE_URL)s/img/balloon_bottom_left_shadow.png) no-repeat top right;
}
#balloon table td.topright{
background: transparent url(%(CFG_SITE_URL)s/img/balloon_top_right_shadow.png) no-repeat bottom left;
}
#balloon table td.bottomright{
background: transparent url(%(CFG_SITE_URL)s/img/balloon_bottom_right_shadow.png) no-repeat top left;
}
#balloon table td.top{
background: transparent url(%(CFG_SITE_URL)s/img/balloon_top_shadow.png) repeat-x bottom left;
}
#balloon table td.bottom{
background: transparent url(%(CFG_SITE_URL)s/img/balloon_bottom_shadow.png) repeat-x top left;
}
#balloon table td.left{
background: transparent url(%(CFG_SITE_URL)s/img/balloon_left_shadow.png) repeat-y top right;
text-align:right;
padding:0;
}
#balloon table td.right{
background: transparent url(%(CFG_SITE_URL)s/img/balloon_right_shadow.png) repeat-y top left;
}
#balloon table td.arrowleft{
background: transparent url(%(CFG_SITE_URL)s/img/balloon_arrow_left_shadow.png) no-repeat bottom right;
width:24px;
height:27px;
}
#balloon table td.center{
background-color:#ffffea;
}
#balloon label{
font-size:small;
}
#balloonReviseFile{
width:220px;
text-align:left;
}
#warningFormats{
color:#432e11;
font-size:x-small;
text-align:center;
margin: 4px auto 4px auto;
}
#fileDoctype {
margin-bottom:3px;
}
#renameBox, #descriptionBox, #commentBox, #keepPreviousVersions{
margin-top:6px;
}
#description, #comment, #rename {
width:90%%;
}
.rotatingprogress, .rotatingpostprocess {
position:relative;
float:right;
padding: 1px;
font-style:italic;
font-size:small;
margin-right: 5px;
display:none;
}
.progress {
position:relative;
width:100%%;
float:left;
border: 1px solid #ddd;
padding: 1px;
border-radius: 3px;
display:none;
}
.bar {
background-color: #dd9700;
width:0%%; height:20px;
border-radius: 3px; }
.percent {
position:absolute;
display:inline-block;
top:3px;
left:45%%;
font-size:small;
color: #514100;
}
-->
</style>
''' % {'CFG_SITE_URL': CFG_SITE_URL}
return css
# The HTML markup of the revise panel
revise_balloon = '''
<div id="balloon" style="display:none;">
<input type="hidden" name="fileAction" value="" />
<input type="hidden" name="fileTarget" value="" />
<table>
<tr>
<td class="topleft">&nbsp;</td>
<td class="top">&nbsp;</td>
<td class="topright">&nbsp;</td>
</tr>
<tr>
<td class="left" vertical-align="center" width="24"><img alt=" " src="../img/balloon_arrow_left_shadow.png" /></td>
<td class="center">
<table id="balloonReviseFile">
<tr>
<td><label for="balloonReviseFileInput">%(file_label)s:</label><br/>
<div style="display:none" id="fileDoctypesRow"></div>
<div id="balloonReviseFileInputBlock"><input type="file" name="myfile" id="balloonReviseFileInput" size="20" /></div>
<!-- <input type="file" name="myfile" id="balloonReviseFileInput" size="20" onchange="var name=getElementById('rename');var filename=this.value.split('/').pop().split('.')[0];name.value=filename;"/> -->
<div id="renameBox" style=""><label for="rename">%(filename_label)s:</label><br/><input type="text" name="rename" id="rename" size="20" autocomplete="off"/></div>
<div id="descriptionBox" style=""><label for="description">%(description_label)s:</label><br/><input type="text" name="description" id="description" size="20" autocomplete="off"/></div>
<div id="commentBox" style=""><label for="comment">%(comment_label)s:</label><br/><textarea name="comment" id="comment" rows="3"/></textarea></div>
<div id="restrictionBox" style="display:none;white-space:nowrap;">%(restrictions)s</div>
<div id="keepPreviousVersions" style="display:none"><input type="checkbox" id="balloonReviseFileKeep" name="keepPreviousFiles" checked="checked" /><label for="balloonReviseFileKeep">%(previous_versions_label)s</label>&nbsp;<small>[<a href="" onclick="alert('%(previous_versions_help)s');return false;">?</a>]</small></div>
<p id="warningFormats" style="display:none"><img src="%(CFG_SITE_URL)s/img/warning.png" alt="Warning"/> %(revise_format_warning)s&nbsp;[<a href="" onclick="alert('%(revise_format_help)s');return false;">?</a>]</p>
<div class="progress"><div class="bar"></div ><div class="percent">0%%</div ></div>
<div class="rotatingprogress"><img src="/img/ui-anim_basic_16x16.gif" /> %(uploading_label)s</div><div class="rotatingpostprocess"><img src="/img/ui-anim_basic_16x16.gif" /> %(postprocess_label)s</div><div id="canceluploadbuttongroup" style="text-align:right;margin-top:5px"><input type="button" value="%(cancel)s" onclick="javascript:hide_revise_panel();"/> <input type="%(submit_or_button)s" id="bibdocfilemanagedocfileuploadbutton" onclick="show_upload_progress()" value="%(upload)s"/></div>
</td>
</tr>
</table>
</td>
<td class="right">&nbsp;</td>
</tr>
<tr>
<td class="bottomleft">&nbsp;</td>
<td class="bottom">&nbsp;</td>
<td class="bottomright">&nbsp;</td>
</tr>
</table>
</div>
'''
diff --git a/invenio/legacy/bibdocfile/plugins/bom_textdoc.py b/invenio/legacy/bibdocfile/plugins/bom_textdoc.py
index c7de69ec9..828674da9 100644
--- a/invenio/legacy/bibdocfile/plugins/bom_textdoc.py
+++ b/invenio/legacy/bibdocfile/plugins/bom_textdoc.py
@@ -1,142 +1,142 @@
## This file is part of Invenio.
## Copyright (C) 2007, 2008, 2009, 2010, 2011 CERN.
##
## Invenio is free software; you can redistribute it and/or
## modify it under the terms of the GNU General Public License as
## published by the Free Software Foundation; either version 2 of the
## License, or (at your option) any later version.
##
## Invenio is distributed in the hope that it will be useful, but
## WITHOUT ANY WARRANTY; without even the implied warranty of
## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
## General Public License for more details.
##
## You should have received a copy of the GNU General Public License
## along with Invenio; if not, write to the Free Software Foundation, Inc.,
## 59 Temple Place, Suite 330, Boston, MA 02111-1307, USA.
"""BibObject Module providing BibObject prividing features for documents containing text (not necessarily as the main part of the content)"""
from invenio.bibdocfile import BibDoc, InvenioBibDocFileError
from invenio.dbquery import run_sql
from datetime import datetime
-from invenio.errorlib import register_exception
+from invenio.ext.logging import register_exception
import os
class BibTextDoc(BibDoc):
def get_text(self, version=None):
"""
@param version: the requested version. If not set, the latest version
will be used.
@type version: integer
@return: the textual content corresponding to the specified version
of the document.
@rtype: string
"""
if version is None:
version = self.get_latest_version()
if self.has_text(version):
return open(os.path.join(self.basedir, '.text;%i' % version)).read()
else:
return ""
def get_text_path(self, version=None):
"""
@param version: the requested version. If not set, the latest version
will be used.
@type version: int
@return: the full path to the textual content corresponding to the specified version
of the document.
@rtype: string
"""
if version is None:
version = self.get_latest_version()
if self.has_text(version):
return os.path.join(self.basedir, '.text;%i' % version)
else:
return ""
def extract_text(self, version=None, perform_ocr=False, ln='en'):
"""
Try what is necessary to extract the textual information of a document.
@param version: the version of the document for which text is required.
If not specified the text will be retrieved from the last version.
@type version: integer
@param perform_ocr: whether to perform OCR.
@type perform_ocr: bool
@param ln: a two letter language code to give as a hint to the OCR
procedure.
@type ln: string
@raise InvenioBibDocFileError: in case of error.
@note: the text is extracted and cached for later use. Use L{get_text}
to retrieve it.
"""
from invenio.websubmit_file_converter import get_best_format_to_extract_text_from, convert_file, InvenioWebSubmitFileConverterError
if version is None:
version = self.get_latest_version()
docfiles = self.list_version_files(version)
## We try to extract text only from original or OCRed documents.
filenames = [docfile.get_full_path() for docfile in docfiles if 'CONVERTED' not in docfile.flags or 'OCRED' in docfile.flags]
try:
filename = get_best_format_to_extract_text_from(filenames)
except InvenioWebSubmitFileConverterError:
## We fall back on considering all the documents
filenames = [docfile.get_full_path() for docfile in docfiles]
try:
filename = get_best_format_to_extract_text_from(filenames)
except InvenioWebSubmitFileConverterError:
open(os.path.join(self.basedir, '.text;%i' % version), 'w').write('')
return
try:
convert_file(filename, os.path.join(self.basedir, '.text;%i' % version), '.txt', perform_ocr=perform_ocr, ln=ln)
if version == self.get_latest_version():
run_sql("UPDATE bibdoc SET text_extraction_date=NOW() WHERE id=%s", (self.id, ))
except InvenioWebSubmitFileConverterError, e:
register_exception(alert_admin=True, prefix="Error in extracting text from bibdoc %i, version %i" % (self.id, version))
raise InvenioBibDocFileError, str(e)
def pdf_a_p(self):
"""
@return: True if this document contains a PDF in PDF/A format.
@rtype: bool"""
return self.has_flag('PDF/A', 'pdf')
def has_text(self, require_up_to_date=False, version=None):
"""
Return True if the text of this document has already been extracted.
@param require_up_to_date: if True check the text was actually
extracted after the most recent format of the given version.
@type require_up_to_date: bool
@param version: a version for which the text should have been
extracted. If not specified the latest version is considered.
@type version: integer
@return: True if the text has already been extracted.
@rtype: bool
"""
if version is None:
version = self.get_latest_version()
if os.path.exists(os.path.join(self.basedir, '.text;%i' % version)):
if not require_up_to_date:
return True
else:
docfiles = self.list_version_files(version)
text_md = datetime.fromtimestamp(os.path.getmtime(os.path.join(self.basedir, '.text;%i' % version)))
for docfile in docfiles:
if text_md <= docfile.md:
return False
return True
return False
def __repr__(self):
return 'BibTextDoc(%s, %s, %s)' % (repr(self.id), repr(self.doctype), repr(self.human_readable))
def supports(doctype, extensions):
return doctype == "Fulltext" or reduce(lambda x, y: x or y.startswith(".pdf") or y.startswith(".ps") , extensions, False)
def create_instance(docid=None, doctype='Main', human_readable=False, # pylint: disable=W0613
initial_data = None):
return BibTextDoc(docid=docid, human_readable=human_readable,
initial_data = initial_data)
diff --git a/invenio/legacy/bibdocfile/webinterface.py b/invenio/legacy/bibdocfile/webinterface.py
index 8bfde140b..4495a3be2 100644
--- a/invenio/legacy/bibdocfile/webinterface.py
+++ b/invenio/legacy/bibdocfile/webinterface.py
@@ -1,540 +1,540 @@
## This file is part of Invenio.
## Copyright (C) 2012, 2013 CERN.
##
## Invenio is free software; you can redistribute it and/or
## modify it under the terms of the GNU General Public License as
## published by the Free Software Foundation; either version 2 of the
## License, or (at your option) any later version.
##
## Invenio is distributed in the hope that it will be useful, but
## WITHOUT ANY WARRANTY; without even the implied warranty of
## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
## General Public License for more details.
##
## You should have received a copy of the GNU General Public License
## along with Invenio; if not, write to the Free Software Foundation, Inc.,
## 59 Temple Place, Suite 330, Boston, MA 02111-1307, USA.
import cgi
import os
import time
import shutil
from invenio.config import \
CFG_ACCESS_CONTROL_LEVEL_SITE, \
CFG_SITE_LANG, \
CFG_TMPSHAREDDIR, \
CFG_SITE_URL, \
CFG_SITE_SECURE_URL, \
CFG_WEBSUBMIT_STORAGEDIR, \
CFG_SITE_RECORD
from invenio.bibdocfile_config import CFG_BIBDOCFILE_DOCUMENT_FILE_MANAGER_DOCTYPES, \
CFG_BIBDOCFILE_DOCUMENT_FILE_MANAGER_MISC, \
CFG_BIBDOCFILE_DOCUMENT_FILE_MANAGER_RESTRICTIONS, \
CFG_BIBDOCFILE_ICON_SUBFORMAT_RE
from invenio.utils import apache
from invenio.access_control_config import VIEWRESTRCOLL
from invenio.access_control_mailcookie import mail_cookie_create_authorize_action
from invenio.access_control_engine import acc_authorize_action
from invenio.access_control_admin import acc_is_role
from invenio.webpage import page, pageheaderonly, \
pagefooteronly, warning_page, write_warning
from invenio.webuser import getUid, page_not_authorized, collect_user_info, isUserSuperAdmin, \
isGuestUser
from invenio import webjournal_utils
from invenio.webinterface_handler import wash_urlargd, WebInterfaceDirectory
from invenio.utils.url import make_canonical_urlargd, redirect_to_url
from invenio.base.i18n import gettext_set_language
from invenio.search_engine import \
guess_primary_collection_of_a_record, get_colID, record_exists, \
create_navtrail_links, check_user_can_view_record, record_empty, \
is_user_owner_of_record
from invenio.bibdocfile import BibRecDocs, normalize_format, file_strip_ext, \
stream_restricted_icon, BibDoc, InvenioBibDocFileError, \
get_subformat_from_format
-from invenio.errorlib import register_exception
+from invenio.ext.logging import register_exception
from invenio.websearchadminlib import get_detailed_page_tabs
import invenio.template
bibdocfile_templates = invenio.template.load('bibdocfile')
webstyle_templates = invenio.template.load('webstyle')
websubmit_templates = invenio.template.load('websubmit')
websearch_templates = invenio.template.load('websearch')
from invenio.bibdocfile_managedocfiles import \
create_file_upload_interface, \
get_upload_file_interface_javascript, \
get_upload_file_interface_css, \
move_uploaded_files_to_storage
bibdocfile_templates = invenio.template.load('bibdocfile')
class WebInterfaceFilesPages(WebInterfaceDirectory):
def __init__(self, recid):
self.recid = recid
def _lookup(self, component, path):
# after /<CFG_SITE_RECORD>/<recid>/files/ every part is used as the file
# name
filename = component
def getfile(req, form):
args = wash_urlargd(form, bibdocfile_templates.files_default_urlargd)
ln = args['ln']
_ = gettext_set_language(ln)
uid = getUid(req)
user_info = collect_user_info(req)
verbose = args['verbose']
if verbose >= 1 and not isUserSuperAdmin(user_info):
# Only SuperUser can see all the details!
verbose = 0
if uid == -1 or CFG_ACCESS_CONTROL_LEVEL_SITE > 1:
return page_not_authorized(req, "/%s/%s" % (CFG_SITE_RECORD, self.recid),
navmenuid='submit')
if record_exists(self.recid) < 1:
msg = "<p>%s</p>" % _("Requested record does not seem to exist.")
return warning_page(msg, req, ln)
if record_empty(self.recid):
msg = "<p>%s</p>" % _("Requested record does not seem to have been integrated.")
return warning_page(msg, req, ln)
(auth_code, auth_message) = check_user_can_view_record(user_info, self.recid)
if auth_code and user_info['email'] == 'guest':
if webjournal_utils.is_recid_in_released_issue(self.recid):
# We can serve the file
pass
else:
cookie = mail_cookie_create_authorize_action(VIEWRESTRCOLL, {'collection' : guess_primary_collection_of_a_record(self.recid)})
target = CFG_SITE_SECURE_URL + '/youraccount/login' + \
make_canonical_urlargd({'action': cookie, 'ln' : ln, 'referer' : \
CFG_SITE_SECURE_URL + user_info['uri']}, {})
return redirect_to_url(req, target, norobot=True)
elif auth_code:
if webjournal_utils.is_recid_in_released_issue(self.recid):
# We can serve the file
pass
else:
return page_not_authorized(req, "../", \
text = auth_message)
readonly = CFG_ACCESS_CONTROL_LEVEL_SITE == 1
# From now on: either the user provided a specific file
# name (and a possible version), or we return a list of
# all the available files. In no case are the docids
# visible.
try:
bibarchive = BibRecDocs(self.recid)
except InvenioBibDocFileError:
register_exception(req=req, alert_admin=True)
msg = "<p>%s</p><p>%s</p>" % (
_("The system has encountered an error in retrieving the list of files for this document."),
_("The error has been logged and will be taken in consideration as soon as possible."))
return warning_page(msg, req, ln)
if bibarchive.deleted_p():
req.status = apache.HTTP_GONE
return warning_page(_("Requested record does not seem to exist."), req, ln)
docname = ''
docformat = ''
version = ''
warn = ''
if filename:
# We know the complete file name, guess which docid it
# refers to
## TODO: Change the extension system according to ext.py from setlink
## and have a uniform extension mechanism...
docname = file_strip_ext(filename)
docformat = filename[len(docname):]
if docformat and docformat[0] != '.':
docformat = '.' + docformat
if args['subformat']:
docformat += ';%s' % args['subformat']
else:
docname = args['docname']
if not docformat:
docformat = args['format']
if args['subformat']:
docformat += ';%s' % args['subformat']
if not version:
version = args['version']
## Download as attachment
is_download = False
if args['download']:
is_download = True
# version could be either empty, or all or an integer
try:
int(version)
except ValueError:
if version != 'all':
version = ''
display_hidden = isUserSuperAdmin(user_info)
if version != 'all':
# search this filename in the complete list of files
for doc in bibarchive.list_bibdocs():
if docname == bibarchive.get_docname(doc.id):
try:
try:
docfile = doc.get_file(docformat, version)
except InvenioBibDocFileError, msg:
req.status = apache.HTTP_NOT_FOUND
if req.headers_in.get('referer'):
## There must be a broken link somewhere.
## Maybe it's good to alert the admin
register_exception(req=req, alert_admin=True)
warn += write_warning(_("The format %s does not exist for the given version: %s") % (cgi.escape(docformat), cgi.escape(str(msg))))
break
(auth_code, auth_message) = docfile.is_restricted(user_info)
if auth_code != 0 and not is_user_owner_of_record(user_info, self.recid):
if CFG_BIBDOCFILE_ICON_SUBFORMAT_RE.match(get_subformat_from_format(docformat)):
return stream_restricted_icon(req)
if user_info['email'] == 'guest':
cookie = mail_cookie_create_authorize_action('viewrestrdoc', {'status' : docfile.get_status()})
target = CFG_SITE_SECURE_URL + '/youraccount/login' + \
make_canonical_urlargd({'action': cookie, 'ln' : ln, 'referer' : \
CFG_SITE_SECURE_URL + user_info['uri']}, {})
redirect_to_url(req, target)
else:
req.status = apache.HTTP_UNAUTHORIZED
warn += write_warning(_("This file is restricted: ") + str(auth_message))
break
if not docfile.hidden_p():
if not readonly:
ip = str(req.remote_ip)
doc.register_download(ip, docfile.get_version(), docformat, uid, self.recid)
try:
return docfile.stream(req, download=is_download)
except InvenioBibDocFileError, msg:
register_exception(req=req, alert_admin=True)
req.status = apache.HTTP_INTERNAL_SERVER_ERROR
warn += write_warning(_("An error has happened in trying to stream the request file."))
else:
req.status = apache.HTTP_UNAUTHORIZED
warn += write_warning(_("The requested file is hidden and can not be accessed."))
except InvenioBibDocFileError, msg:
register_exception(req=req, alert_admin=True)
if docname and docformat and not warn:
req.status = apache.HTTP_NOT_FOUND
warn += write_warning(_("Requested file does not seem to exist."))
# filelist = bibarchive.display("", version, ln=ln, verbose=verbose, display_hidden=display_hidden)
filelist = bibdocfile_templates.tmpl_display_bibrecdocs(bibarchive, "", version, ln=ln, verbose=verbose, display_hidden=display_hidden)
t = warn + bibdocfile_templates.tmpl_filelist(
ln=ln,
filelist=filelist)
cc = guess_primary_collection_of_a_record(self.recid)
unordered_tabs = get_detailed_page_tabs(get_colID(cc), self.recid, ln)
ordered_tabs_id = [(tab_id, values['order']) for (tab_id, values) in unordered_tabs.iteritems()]
ordered_tabs_id.sort(lambda x, y: cmp(x[1], y[1]))
link_ln = ''
if ln != CFG_SITE_LANG:
link_ln = '?ln=%s' % ln
tabs = [(unordered_tabs[tab_id]['label'], \
'%s/%s/%s/%s%s' % (CFG_SITE_URL, CFG_SITE_RECORD, self.recid, tab_id, link_ln), \
tab_id == 'files',
unordered_tabs[tab_id]['enabled']) \
for (tab_id, dummy_order) in ordered_tabs_id
if unordered_tabs[tab_id]['visible'] == True]
top = webstyle_templates.detailed_record_container_top(self.recid,
tabs,
args['ln'])
bottom = webstyle_templates.detailed_record_container_bottom(self.recid,
tabs,
args['ln'])
title, description, keywords = websearch_templates.tmpl_record_page_header_content(req, self.recid, args['ln'])
return pageheaderonly(title=title,
navtrail=create_navtrail_links(cc=cc, aas=0, ln=ln) + \
''' &gt; <a class="navtrail" href="%s/%s/%s">%s</a>
&gt; %s''' % \
(CFG_SITE_URL, CFG_SITE_RECORD, self.recid, title, _("Access to Fulltext")),
description=description,
keywords=keywords,
uid=uid,
language=ln,
req=req,
navmenuid='search',
navtrail_append_title_p=0) + \
websearch_templates.tmpl_search_pagestart(ln) + \
top + t + bottom + \
websearch_templates.tmpl_search_pageend(ln) + \
pagefooteronly(language=ln, req=req)
return getfile, []
def __call__(self, req, form):
"""Called in case of URLs like /CFG_SITE_RECORD/123/files without
trailing slash.
"""
args = wash_urlargd(form, bibdocfile_templates.files_default_urlargd)
ln = args['ln']
link_ln = ''
if ln != CFG_SITE_LANG:
link_ln = '?ln=%s' % ln
return redirect_to_url(req, '%s/%s/%s/files/%s' % (CFG_SITE_URL, CFG_SITE_RECORD, self.recid, link_ln))
def bibdocfile_legacy_getfile(req, form):
""" Handle legacy /getfile.py URLs """
args = wash_urlargd(form, {
'recid': (int, 0),
'docid': (int, 0),
'version': (str, ''),
'name': (str, ''),
'format': (str, ''),
'ln' : (str, CFG_SITE_LANG)
})
_ = gettext_set_language(args['ln'])
def _getfile_py(req, recid=0, docid=0, version="", name="", docformat="", ln=CFG_SITE_LANG):
if not recid:
## Let's obtain the recid from the docid
if docid:
try:
bibdoc = BibDoc(docid=docid)
recid = bibdoc.bibrec_links[0]["recid"]
except InvenioBibDocFileError:
return warning_page(_("An error has happened in trying to retrieve the requested file."), req, ln)
else:
return warning_page(_('Not enough information to retrieve the document'), req, ln)
else:
brd = BibRecDocs(recid)
if not name and docid:
## Let's obtain the name from the docid
try:
name = brd.get_docname(docid)
except InvenioBibDocFileError:
return warning_page(_("An error has happened in trying to retrieving the requested file."), req, ln)
docformat = normalize_format(docformat)
redirect_to_url(req, '%s/%s/%s/files/%s%s?ln=%s%s' % (CFG_SITE_URL, CFG_SITE_RECORD, recid, name, docformat, ln, version and 'version=%s' % version or ''), apache.HTTP_MOVED_PERMANENTLY)
return _getfile_py(req, **args)
# --------------------------------------------------
class WebInterfaceManageDocFilesPages(WebInterfaceDirectory):
_exports = ['', 'managedocfiles', 'managedocfilesasync']
def managedocfiles(self, req, form):
"""
Display admin interface to manage files of a record
"""
argd = wash_urlargd(form, {
'ln': (str, ''),
'access': (str, ''),
'recid': (int, None),
'do': (int, 0),
'cancel': (str, None),
})
_ = gettext_set_language(argd['ln'])
uid = getUid(req)
user_info = collect_user_info(req)
# Check authorization
(auth_code, auth_msg) = acc_authorize_action(req,
'runbibdocfile')
if auth_code and user_info['email'] == 'guest':
# Ask to login
target = CFG_SITE_SECURE_URL + '/youraccount/login' + \
make_canonical_urlargd({'ln' : argd['ln'],
'referer' : CFG_SITE_SECURE_URL + user_info['uri']}, {})
return redirect_to_url(req, target)
elif auth_code:
return page_not_authorized(req, referer="/%s/managedocfiles" % CFG_SITE_RECORD,
uid=uid, text=auth_msg,
ln=argd['ln'],
navmenuid="admin")
# Prepare navtrail
navtrail = '''<a class="navtrail" href="%(CFG_SITE_URL)s/help/admin">Admin Area</a> &gt; %(manage_files)s''' \
% {'CFG_SITE_URL': CFG_SITE_URL,
'manage_files': _("Manage Document Files")}
body = ''
if argd['do'] != 0 and not argd['cancel']:
# Apply modifications
working_dir = os.path.join(CFG_TMPSHAREDDIR,
'websubmit_upload_interface_config_' + str(uid),
argd['access'])
move_uploaded_files_to_storage(working_dir=working_dir,
recid=argd['recid'],
icon_sizes=['180>','700>'],
create_icon_doctypes=['*'],
force_file_revision=False)
# Clean temporary directory
shutil.rmtree(working_dir)
# Confirm modifications
body += '<p style="color:#0f0">%s</p>' % \
(_('Your modifications to record #%i have been submitted') % argd['recid'])
elif argd['cancel']:
# Clean temporary directory
working_dir = os.path.join(CFG_TMPSHAREDDIR,
'websubmit_upload_interface_config_' + str(uid),
argd['access'])
shutil.rmtree(working_dir)
body += '<p style="color:#c00">%s</p>' % \
(_('Your modifications to record #%i have been cancelled') % argd['recid'])
if not argd['recid'] or argd['do'] != 0:
body += '''
<form method="post" action="%(CFG_SITE_URL)s/%(CFG_SITE_RECORD)s/managedocfiles">
<label for="recid">%(edit_record)s:</label>
<input type="text" name="recid" id="recid" />
<input type="submit" value="%(edit)s" class="adminbutton" />
</form>
''' % {'edit': _('Edit'),
'edit_record': _('Edit record'),
'CFG_SITE_URL': CFG_SITE_URL,
'CFG_SITE_RECORD': CFG_SITE_RECORD}
access = time.strftime('%Y%m%d_%H%M%S')
if argd['recid'] and argd['do'] == 0:
# Displaying interface to manage files
# Prepare navtrail
title, dummy_description, dummy_keywords = websearch_templates.tmpl_record_page_header_content(req, argd['recid'],
argd['ln'])
navtrail = '''<a class="navtrail" href="%(CFG_SITE_URL)s/help/admin">Admin Area</a> &gt;
<a class="navtrail" href="%(CFG_SITE_URL)s/%(CFG_SITE_RECORD)s/managedocfiles">%(manage_files)s</a> &gt;
%(record)s: %(title)s
''' \
% {'CFG_SITE_URL': CFG_SITE_URL,
'title': title,
'manage_files': _("Document File Manager"),
'record': _("Record #%i") % argd['recid'],
'CFG_SITE_RECORD': CFG_SITE_RECORD}
body += create_file_upload_interface(\
recid=argd['recid'],
ln=argd['ln'],
uid=uid,
sbm_access=access,
display_hidden_files=True,
restrictions_and_desc=CFG_BIBDOCFILE_DOCUMENT_FILE_MANAGER_RESTRICTIONS,
doctypes_and_desc=CFG_BIBDOCFILE_DOCUMENT_FILE_MANAGER_DOCTYPES,
**CFG_BIBDOCFILE_DOCUMENT_FILE_MANAGER_MISC)[1]
body += '''<br />
<form method="post" action="%(CFG_SITE_URL)s/%(CFG_SITE_RECORD)s/managedocfiles">
<input type="hidden" name="recid" value="%(recid)s" />
<input type="hidden" name="do" value="1" />
<input type="hidden" name="access" value="%(access)s" />
<input type="hidden" name="ln" value="%(ln)s" />
<div style="font-size:small">
<input type="submit" name="cancel" value="%(cancel_changes)s" />
<input type="submit" onclick="user_must_confirm_before_leaving_page=false;return true;" class="adminbutton" name="submit" id="applyChanges" value="%(apply_changes)s" />
</div></form>''' % \
{'apply_changes': _("Apply changes"),
'cancel_changes': _("Cancel all changes"),
'recid': argd['recid'],
'access': access,
'ln': argd['ln'],
'CFG_SITE_URL': CFG_SITE_URL,
'CFG_SITE_RECORD': CFG_SITE_RECORD}
body += websubmit_templates.tmpl_page_do_not_leave_submission_js(argd['ln'], enabled=True)
return page(title = _("Document File Manager") + (argd['recid'] and (': ' + _("Record #%i") % argd['recid']) or ''),
navtrail=navtrail,
navtrail_append_title_p=0,
metaheaderadd = get_upload_file_interface_javascript(form_url_params='?access='+access) + \
get_upload_file_interface_css(),
body = body,
uid = uid,
language=argd['ln'],
req=req,
navmenuid='admin')
def managedocfilesasync(self, req, form):
"Upload file and returns upload interface"
argd = wash_urlargd(form, {
'ln': (str, ''),
'recid': (int, 1),
'doctype': (str, ''),
'access': (str, ''),
'indir': (str, ''),
})
user_info = collect_user_info(req)
include_headers = False
# User submitted either through WebSubmit, or admin interface.
if form.has_key('doctype') and form.has_key('indir') \
and form.has_key('access'):
# Submitted through WebSubmit. Check rights
include_headers = True
working_dir = os.path.join(CFG_WEBSUBMIT_STORAGEDIR,
argd['indir'], argd['doctype'],
argd['access'])
try:
assert(working_dir == os.path.abspath(working_dir))
except AssertionError:
raise apache.SERVER_RETURN(apache.HTTP_UNAUTHORIZED)
try:
# Retrieve recid from working_dir, safer.
recid_fd = file(os.path.join(working_dir, 'SN'))
recid = int(recid_fd.read())
recid_fd.close()
except:
recid = ""
try:
act_fd = file(os.path.join(working_dir, 'act'))
action = act_fd.read()
act_fd.close()
except:
action = ""
# Is user authorized to perform this action?
auth_code = acc_authorize_action(user_info,
"submit",
authorized_if_no_roles=not isGuestUser(getUid(req)),
doctype=argd['doctype'],
act=action)[0]
if auth_code and not acc_is_role("submit", doctype=argd['doctype'], act=action):
# There is NO authorization plugged. User should have access
auth_code = 0
else:
# User must be allowed to attach files
auth_code = acc_authorize_action(user_info, 'runbibdocfile')[0]
recid = argd['recid']
if auth_code:
raise apache.SERVER_RETURN(apache.HTTP_UNAUTHORIZED)
return create_file_upload_interface(recid=recid,
ln=argd['ln'],
print_outside_form_tag=False,
print_envelope=False,
form=form,
include_headers=include_headers,
sbm_indir=argd['indir'],
sbm_access=argd['access'],
sbm_doctype=argd['doctype'],
uid=user_info['uid'])[1]
__call__ = managedocfiles
diff --git a/invenio/legacy/bibedit/engine.py b/invenio/legacy/bibedit/engine.py
index 74ef15b3a..605bd5878 100644
--- a/invenio/legacy/bibedit/engine.py
+++ b/invenio/legacy/bibedit/engine.py
@@ -1,1679 +1,1680 @@
## This file is part of Invenio.
## Copyright (C) 2006, 2007, 2008, 2009, 2010, 2011, 2013 CERN.
##
## Invenio is free software; you can redistribute it and/or
## modify it under the terms of the GNU General Public License as
## published by the Free Software Foundation; either version 2 of the
## License, or (at your option) any later version.
##
## Invenio is distributed in the hope that it will be useful, but
## WITHOUT ANY WARRANTY; without even the implied warranty of
## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
## General Public License for more details.
##
## You should have received a copy of the GNU General Public License
## along with Invenio; if not, write to the Free Software Foundation, Inc.,
## 59 Temple Place, Suite 330, Boston, MA 02111-1307, USA.
# pylint: disable=C0103
"""Invenio BibEdit Engine."""
__revision__ = "$Id"
from datetime import datetime
import re
import difflib
import zlib
import copy
import urllib
import urllib2
import cookielib
+import json
from invenio.modules import formatter as bibformat
-from invenio.utils.json import json, CFG_JSON_AVAILABLE
+from invenio.utils.json import CFG_JSON_AVAILABLE
from invenio.utils.url import auto_version_url
from invenio.legacy.bibrecord.scripts.xmlmarc2textmarc import create_marc_record
from invenio.bibedit_config import CFG_BIBEDIT_AJAX_RESULT_CODES, \
CFG_BIBEDIT_JS_CHECK_SCROLL_INTERVAL, CFG_BIBEDIT_JS_HASH_CHECK_INTERVAL, \
CFG_BIBEDIT_JS_CLONED_RECORD_COLOR, \
CFG_BIBEDIT_JS_CLONED_RECORD_COLOR_FADE_DURATION, \
CFG_BIBEDIT_JS_NEW_ADD_FIELD_FORM_COLOR, \
CFG_BIBEDIT_JS_NEW_ADD_FIELD_FORM_COLOR_FADE_DURATION, \
CFG_BIBEDIT_JS_NEW_CONTENT_COLOR, \
CFG_BIBEDIT_JS_NEW_CONTENT_COLOR_FADE_DURATION, \
CFG_BIBEDIT_JS_NEW_CONTENT_HIGHLIGHT_DELAY, \
CFG_BIBEDIT_JS_STATUS_ERROR_TIME, CFG_BIBEDIT_JS_STATUS_INFO_TIME, \
CFG_BIBEDIT_JS_TICKET_REFRESH_DELAY, CFG_BIBEDIT_MAX_SEARCH_RESULTS, \
CFG_BIBEDIT_TAG_FORMAT, CFG_BIBEDIT_AJAX_RESULT_CODES_REV, \
CFG_BIBEDIT_AUTOSUGGEST_TAGS, CFG_BIBEDIT_AUTOCOMPLETE_TAGS_KBS,\
CFG_BIBEDIT_KEYWORD_TAXONOMY, CFG_BIBEDIT_KEYWORD_TAG, \
CFG_BIBEDIT_KEYWORD_RDFLABEL, CFG_BIBEDIT_REQUESTS_UNTIL_SAVE, \
CFG_BIBEDIT_DOI_LOOKUP_FIELD, CFG_DOI_USER_AGENT, \
CFG_BIBEDIT_DISPLAY_REFERENCE_TAGS, CFG_BIBEDIT_DISPLAY_AUTHOR_TAGS
from invenio.config import CFG_SITE_LANG, CFG_DEVEL_SITE
from invenio.bibedit_dblayer import get_name_tags_all, reserve_record_id, \
get_related_hp_changesets, get_hp_update_xml, delete_hp_change, \
get_record_last_modification_date, get_record_revision_author, \
get_marcxml_of_record_revision, delete_related_holdingpen_changes, \
get_record_revisions
from invenio.bibedit_utils import cache_exists, cache_expired, \
create_cache_file, delete_cache_file, get_bibrecord, \
get_cache_file_contents, get_cache_mtime, get_record_templates, \
get_record_template, latest_record_revision, record_locked_by_other_user, \
record_locked_by_queue, save_xml_record, touch_cache_file, \
update_cache_file_contents, get_field_templates, get_marcxml_of_revision, \
revision_to_timestamp, timestamp_to_revision, \
get_record_revision_timestamps, record_revision_exists, \
can_record_have_physical_copies, extend_record_with_template, \
replace_references, merge_record_with_template, record_xml_output, \
record_is_conference, add_record_cnum, get_xml_from_textmarc, \
record_locked_by_user_details, crossref_process_template, \
modify_record_timestamp
from invenio.legacy.bibrecord import create_record, print_rec, record_add_field, \
record_add_subfield_into, record_delete_field, \
record_delete_subfield_from, \
record_modify_subfield, record_move_subfield, \
create_field, record_replace_field, record_move_fields, \
record_modify_controlfield, record_get_field_values, \
record_get_subfields, record_get_field_instances, record_add_fields, \
record_strip_empty_fields, record_strip_empty_volatile_subfields, \
record_strip_controlfields, record_order_subfields, field_xml_output
from invenio.config import CFG_BIBEDIT_PROTECTED_FIELDS, CFG_CERN_SITE, \
CFG_SITE_URL, CFG_SITE_RECORD, CFG_BIBEDIT_KB_SUBJECTS, \
CFG_BIBEDIT_KB_INSTITUTIONS, CFG_BIBEDIT_AUTOCOMPLETE_INSTITUTIONS_FIELDS, \
CFG_INSPIRE_SITE
from invenio.search_engine import record_exists, perform_request_search
from invenio.webuser import session_param_get, session_param_set
from invenio.bibcatalog import bibcatalog_system
from invenio.webpage import page
from invenio.htmlutils import get_mathjax_header
from invenio.utils.text import wash_for_xml, show_diff
from invenio.bibknowledge import get_kbd_values_for_bibedit, get_kbr_values, \
get_kbt_items_for_bibedit, kb_exists
from invenio.batchuploader_engine import perform_upload_check
from invenio.bibcirculation_dblayer import get_number_copies, has_copies
from invenio.bibcirculation_utils import create_item_details_url
from invenio.refextract_api import FullTextNotAvailable
from invenio.legacy.bibrecord.scripts import xmlmarc2textmarc as xmlmarc2textmarc
from invenio.bibdocfile import BibRecDocs, InvenioBibDocFileError
from invenio.crossrefutils import get_marcxml_for_doi, CrossrefError
import invenio.template
bibedit_templates = invenio.template.load('bibedit')
re_revdate_split = re.compile('^(\d\d\d\d)(\d\d)(\d\d)(\d\d)(\d\d)(\d\d)')
def get_empty_fields_templates():
"""
Returning the templates of empty fields::
-an empty data field
-an empty control field
"""
return [{
"name": "Empty field",
"description": "The data field not containing any " + \
"information filled in",
"tag" : "",
"ind1" : "",
"ind2" : "",
"subfields" : [("","")],
"isControlfield" : False
},{
"name" : "Empty control field",
"description" : "The controlfield not containing any " + \
"data or tag description",
"isControlfield" : True,
"tag" : "",
"value" : ""
}]
def get_available_fields_templates():
"""
A method returning all the available field templates
Returns a list of descriptors. Each descriptor has
the same structure as a full field descriptor inside the
record
"""
templates = get_field_templates()
result = get_empty_fields_templates()
for template in templates:
tplTag = template[3].keys()[0]
field = template[3][tplTag][0]
if (field[0] == []):
# if the field is a controlField, add different structure
result.append({
"name" : template[1],
"description" : template[2],
"isControlfield" : True,
"tag" : tplTag,
"value" : field[3]
})
else:
result.append({
"name": template[1],
"description": template[2],
"tag" : tplTag,
"ind1" : field[1],
"ind2" : field[2],
"subfields" : field[0],
"isControlfield" : False
})
return result
def perform_request_init(uid, ln, req, lastupdated):
"""Handle the initial request by adding menu and JavaScript to the page."""
errors = []
warnings = []
body = ''
# Add script data.
record_templates = get_record_templates()
record_templates.sort()
tag_names = get_name_tags_all()
protected_fields = ['001']
protected_fields.extend(CFG_BIBEDIT_PROTECTED_FIELDS.split(','))
cern_site = 'false'
if not CFG_JSON_AVAILABLE:
title = 'Record Editor'
body = '''Sorry, the record editor cannot operate when the
`simplejson' module is not installed. Please see the INSTALL
file.'''
return page(title = title,
body = body,
errors = [],
warnings = [],
uid = uid,
language = ln,
navtrail = "",
lastupdated = lastupdated,
req = req)
body += '<link rel="stylesheet" type="text/css" href="/img/jquery-ui.css" />'
body += '<link rel="stylesheet" type="text/css" href="%s/%s" />' % (CFG_SITE_URL,
auto_version_url("img/" + 'bibedit.css'))
if CFG_CERN_SITE:
cern_site = 'true'
data = {'gRECORD_TEMPLATES': record_templates,
'gTAG_NAMES': tag_names,
'gPROTECTED_FIELDS': protected_fields,
'gSITE_URL': '"' + CFG_SITE_URL + '"',
'gSITE_RECORD': '"' + CFG_SITE_RECORD + '"',
'gCERN_SITE': cern_site,
'gHASH_CHECK_INTERVAL': CFG_BIBEDIT_JS_HASH_CHECK_INTERVAL,
'gCHECK_SCROLL_INTERVAL': CFG_BIBEDIT_JS_CHECK_SCROLL_INTERVAL,
'gSTATUS_ERROR_TIME': CFG_BIBEDIT_JS_STATUS_ERROR_TIME,
'gSTATUS_INFO_TIME': CFG_BIBEDIT_JS_STATUS_INFO_TIME,
'gCLONED_RECORD_COLOR':
'"' + CFG_BIBEDIT_JS_CLONED_RECORD_COLOR + '"',
'gCLONED_RECORD_COLOR_FADE_DURATION':
CFG_BIBEDIT_JS_CLONED_RECORD_COLOR_FADE_DURATION,
'gNEW_ADD_FIELD_FORM_COLOR':
'"' + CFG_BIBEDIT_JS_NEW_ADD_FIELD_FORM_COLOR + '"',
'gNEW_ADD_FIELD_FORM_COLOR_FADE_DURATION':
CFG_BIBEDIT_JS_NEW_ADD_FIELD_FORM_COLOR_FADE_DURATION,
'gNEW_CONTENT_COLOR': '"' + CFG_BIBEDIT_JS_NEW_CONTENT_COLOR + '"',
'gNEW_CONTENT_COLOR_FADE_DURATION':
CFG_BIBEDIT_JS_NEW_CONTENT_COLOR_FADE_DURATION,
'gNEW_CONTENT_HIGHLIGHT_DELAY':
CFG_BIBEDIT_JS_NEW_CONTENT_HIGHLIGHT_DELAY,
'gTICKET_REFRESH_DELAY': CFG_BIBEDIT_JS_TICKET_REFRESH_DELAY,
'gRESULT_CODES': CFG_BIBEDIT_AJAX_RESULT_CODES,
'gAUTOSUGGEST_TAGS' : CFG_BIBEDIT_AUTOSUGGEST_TAGS,
'gAUTOCOMPLETE_TAGS' : CFG_BIBEDIT_AUTOCOMPLETE_TAGS_KBS.keys(),
'gKEYWORD_TAG' : '"' + CFG_BIBEDIT_KEYWORD_TAG + '"',
'gREQUESTS_UNTIL_SAVE' : CFG_BIBEDIT_REQUESTS_UNTIL_SAVE,
'gAVAILABLE_KBS': get_available_kbs(),
'gTagsToAutocomplete': CFG_BIBEDIT_AUTOCOMPLETE_INSTITUTIONS_FIELDS,
'gDOILookupField': '"' + CFG_BIBEDIT_DOI_LOOKUP_FIELD + '"',
'gDisplayReferenceTags': CFG_BIBEDIT_DISPLAY_REFERENCE_TAGS,
'gDisplayAuthorTags': CFG_BIBEDIT_DISPLAY_AUTHOR_TAGS
}
body += '<script type="text/javascript">\n'
for key in data:
body += ' var %s = %s;\n' % (key, data[key])
body += ' </script>\n'
# Adding the information about field templates
fieldTemplates = get_available_fields_templates()
body += "<script>\n" + \
" var fieldTemplates = %s\n" % (json.dumps(fieldTemplates), ) + \
"</script>\n"
# Add scripts (the ordering is NOT irrelevant).
scripts = ['jquery-ui.min.js', 'jquery.jeditable.mini.js', 'jquery.hotkeys.js',
'json2.js', 'bibedit_refextract.js', 'bibedit_display.js', 'bibedit_engine.js', 'bibedit_keys.js',
'bibedit_menu.js', 'bibedit_holdingpen.js', 'marcxml.js',
'bibedit_clipboard.js']
for script in scripts:
body += ' <script type="text/javascript" src="%s/%s">' \
'</script>\n' % (CFG_SITE_URL, auto_version_url("js/" + script))
# Init BibEdit
body += '<script>$(init_bibedit);</script>'
# Build page structure and menu.
# rec = create_record(format_record(235, "xm"))[0]
#oaiId = record_extract_oai_id(rec)
body += bibedit_templates.menu()
body += bibedit_templates.focuson()
body += """<div id="bibEditContent">
<div class="revisionLine"></div>
<div id="Toptoolbar"></div>
<div id="bibEditMessage"></div>
<div id="bibEditContentTable"></div>
</div>"""
return body, errors, warnings
def get_available_kbs():
"""
Return list of KBs that are available in the system to be used with
BibEdit
"""
kb_list = [CFG_BIBEDIT_KB_INSTITUTIONS, CFG_BIBEDIT_KB_SUBJECTS]
available_kbs = [kb for kb in kb_list if kb_exists(kb)]
return available_kbs
def record_has_pdf(recid):
""" Check if record has a pdf attached
"""
rec_info = BibRecDocs(recid)
docs = rec_info.list_bibdocs()
return bool(docs)
def get_marcxml_of_revision_id(recid, revid):
"""
Return MARCXML string with corresponding to revision REVID
(=RECID.REVDATE) of a record. Return empty string if revision
does not exist.
"""
job_date = "%s-%s-%s %s:%s:%s" % re_revdate_split.search(revid).groups()
tmp_res = get_marcxml_of_record_revision(recid, job_date)
if tmp_res:
for row in tmp_res:
xml = zlib.decompress(row[0]) + "\n"
# xml contains marcxml of record
# now we create a record object from this xml and sort fields and subfields
# and return marcxml
rec = create_record(xml)[0]
record_order_subfields(rec)
marcxml = record_xml_output(rec, order_fn="_order_by_tags")
return marcxml
def perform_request_compare(ln, recid, rev1, rev2):
"""Handle a request for comparing two records"""
body = ""
errors = []
warnings = []
if (not record_revision_exists(recid, rev1)) or \
(not record_revision_exists(recid, rev2)):
body = "The requested record revision does not exist !"
else:
xml1 = get_marcxml_of_revision_id(recid, rev1)
xml2 = get_marcxml_of_revision_id(recid, rev2)
# Create MARC representations of the records
marc1 = create_marc_record(create_record(xml1)[0], '', {"text-marc": 1, "aleph-marc": 0})
marc2 = create_marc_record(create_record(xml2)[0], '', {"text-marc": 1, "aleph-marc": 0})
comparison = show_diff(marc1, marc2)
job_date1 = "%s-%s-%s %s:%s:%s" % re_revdate_split.search(rev1).groups()
job_date2 = "%s-%s-%s %s:%s:%s" % re_revdate_split.search(rev2).groups()
body += bibedit_templates.history_comparebox(ln, job_date1,
job_date2, comparison)
return body, errors, warnings
def perform_request_newticket(recid, uid):
"""create a new ticket with this record's number
@param recid: record id
@param uid: user id
@return: (error_msg, url)
"""
t_url = ""
errmsg = ""
if bibcatalog_system is not None:
t_id = bibcatalog_system.ticket_submit(uid, "", recid, "")
if t_id:
#get the ticket's URL
t_url = bibcatalog_system.ticket_get_attribute(uid, t_id, 'url_modify')
else:
errmsg = "ticket_submit failed"
else:
errmsg = "No ticket system configured"
return (errmsg, t_url)
def perform_request_ajax(req, recid, uid, data, isBulk = False):
"""Handle Ajax requests by redirecting to appropriate function."""
response = {}
request_type = data['requestType']
undo_redo = None
if data.has_key("undoRedo"):
undo_redo = data["undoRedo"]
# Call function based on request type.
if request_type == 'searchForRecord':
# Search request.
response.update(perform_request_bibedit_search(data, req))
elif request_type in ['changeTagFormat']:
# User related requests.
response.update(perform_request_user(req, request_type, recid, data))
elif request_type in ('getRecord', 'submit', 'cancel', 'newRecord',
'deleteRecord', 'deleteRecordCache', 'prepareRecordMerge', 'revert',
'updateCacheRef', 'submittextmarc'):
# 'Major' record related requests.
response.update(perform_request_record(req, request_type, recid, uid,
data))
elif request_type in ('addField', 'addSubfields', \
'addFieldsSubfieldsOnPositions', 'modifyContent', \
'modifySubfieldTag', 'modifyFieldTag', \
'moveSubfield', 'deleteFields', 'moveField', \
'modifyField', 'otherUpdateRequest', \
'disableHpChange', 'deactivateHoldingPenChangeset'):
# Record updates.
cacheMTime = data['cacheMTime']
if data.has_key('hpChanges'):
hpChanges = data['hpChanges']
else:
hpChanges = {}
response.update(perform_request_update_record(request_type, recid, \
uid, cacheMTime, data, \
hpChanges, undo_redo, \
isBulk))
elif request_type in ('autosuggest', 'autocomplete', 'autokeyword'):
response.update(perform_request_autocomplete(request_type, recid, uid, \
data))
elif request_type in ('getTickets', ):
# BibCatalog requests.
response.update(perform_request_bibcatalog(request_type, recid, uid))
elif request_type in ('getHoldingPenUpdates', ):
response.update(perform_request_holdingpen(request_type, recid))
elif request_type in ('getHoldingPenUpdateDetails', \
'deleteHoldingPenChangeset'):
updateId = data['changesetNumber']
response.update(perform_request_holdingpen(request_type, recid, \
updateId))
elif request_type in ('applyBulkUpdates', ):
# a general version of a bulk request
changes = data['requestsData']
cacheMTime = data['cacheMTime']
response.update(perform_bulk_request_ajax(req, recid, uid, changes, \
undo_redo, cacheMTime))
elif request_type in ('preview', ):
response.update(perform_request_preview_record(request_type, recid, uid, data))
elif request_type in ('get_pdf_url', ):
response.update(perform_request_get_pdf_url(recid))
elif request_type in ('refextract', ):
txt = None
if data.has_key('txt'):
txt = data["txt"]
response.update(perform_request_ref_extract(recid, uid, txt))
elif request_type in ('refextracturl', ):
response.update(perform_request_ref_extract_url(recid, uid, data['url']))
elif request_type == 'getTextMarc':
response.update(perform_request_get_textmarc(recid, uid))
elif request_type == "getTableView":
response.update(perform_request_get_tableview(recid, uid, data))
elif request_type == "DOISearch":
response.update(perform_doi_search(data['doi']))
return response
def perform_bulk_request_ajax(req, recid, uid, reqsData, undoRedo, cacheMTime):
""" An AJAX handler used when treating bulk updates """
lastResult = {}
lastTime = cacheMTime
isFirst = True
for data in reqsData:
assert data != None
data['cacheMTime'] = lastTime
if isFirst and undoRedo != None:
# we add the undo/redo handler to the first operation in order to
# save the handler on the server side !
data['undoRedo'] = undoRedo
isFirst = False
lastResult = perform_request_ajax(req, recid, uid, data, isBulk=True)
# now we have to update the cacheMtime in next request !
# if lastResult.has_key('cacheMTime'):
try:
lastTime = lastResult['cacheMTime']
except:
raise Exception(str(lastResult))
return lastResult
def perform_request_bibedit_search(data, req):
"""Handle search requests."""
response = {}
searchType = data['searchType']
if searchType is None:
searchType = "anywhere"
searchPattern = data['searchPattern']
if searchType == 'anywhere':
pattern = searchPattern
else:
pattern = searchType + ':' + searchPattern
result_set = list(perform_request_search(req=req, p=pattern))
response['resultCode'] = 1
response['resultSet'] = result_set[0:CFG_BIBEDIT_MAX_SEARCH_RESULTS]
return response
def perform_request_user(req, request_type, recid, data):
"""Handle user related requests."""
response = {}
if request_type == 'changeTagFormat':
tagformat_settings = session_param_get(req, 'bibedit_tagformat', {})
tagformat_settings[recid] = data['tagFormat']
session_param_set(req, 'bibedit_tagformat', tagformat_settings)
response['resultCode'] = 2
return response
def perform_request_holdingpen(request_type, recId, changeId=None):
"""
A method performing the holdingPen ajax request. The following types of
requests can be made::
-getHoldingPenUpdates: retrieving the holding pen updates pending
for a given record
"""
response = {}
if request_type == 'getHoldingPenUpdates':
changeSet = get_related_hp_changesets(recId)
changes = []
for change in changeSet:
changes.append((str(change[0]), str(change[1])))
response["changes"] = changes
elif request_type == 'getHoldingPenUpdateDetails':
# returning the list of changes related to the holding pen update
# the format based on what the record difference xtool returns
assert(changeId != None)
hpContent = get_hp_update_xml(changeId)
holdingPenRecord = create_record(hpContent[0], "xm")[0]
# order subfields alphabetically
record_order_subfields(holdingPenRecord)
# databaseRecord = get_record(hpContent[1])
response['record'] = holdingPenRecord
response['changeset_number'] = changeId
elif request_type == 'deleteHoldingPenChangeset':
assert(changeId != None)
delete_hp_change(changeId)
return response
def perform_request_record(req, request_type, recid, uid, data, ln=CFG_SITE_LANG):
"""Handle 'major' record related requests like fetching, submitting or
deleting a record, cancel editing or preparing a record for merging.
"""
response = {}
if request_type == 'newRecord':
# Create a new record.
new_recid = reserve_record_id()
new_type = data['newType']
if new_type == 'empty':
# Create a new empty record.
create_cache_file(recid, uid)
response['resultCode'], response['newRecID'] = 6, new_recid
elif new_type == 'template':
# Create a new record from XML record template.
template_filename = data['templateFilename']
template = get_record_template(template_filename)
if not template:
response['resultCode'] = 108
else:
record = create_record(template)[0]
if not record:
response['resultCode'] = 109
else:
record_add_field(record, '001',
controlfield_value=str(new_recid))
create_cache_file(new_recid, uid, record, True)
response['resultCode'], response['newRecID'] = 7, new_recid
elif new_type == 'import':
# Import data from external source, using DOI
doi = data['doi']
if not doi:
response['resultCode'] = CFG_BIBEDIT_AJAX_RESULT_CODES_REV['error_no_doi_specified']
else:
try:
marcxml_template = get_marcxml_for_doi(doi)
except CrossrefError, inst:
response['resultCode'] = \
CFG_BIBEDIT_AJAX_RESULT_CODES_REV[inst.code]
except:
response['resultCode'] = 0
else:
record = crossref_process_template(marcxml_template, CFG_INSPIRE_SITE)
if not record:
response['resultCode'] = 109
else:
record_add_field(record, '001',
controlfield_value=str(new_recid))
create_cache_file(new_recid, uid, record, True)
response['resultCode'], response['newRecID'] = 7, new_recid
elif new_type == 'clone':
# Clone an existing record (from the users cache).
existing_cache = cache_exists(recid, uid)
if existing_cache:
try:
record = get_cache_file_contents(recid, uid)[2]
except:
# if, for example, the cache format was wrong (outdated)
record = get_bibrecord(recid)
else:
# Cache missing. Fall back to using original version.
record = get_bibrecord(recid)
record_delete_field(record, '001')
record_add_field(record, '001', controlfield_value=str(new_recid))
create_cache_file(new_recid, uid, record, True)
response['resultCode'], response['newRecID'] = 8, new_recid
elif request_type == 'getRecord':
# Fetch the record. Possible error situations:
# - Non-existing record
# - Deleted record
# - Record locked by other user
# - Record locked by queue
# A cache file will be created if it does not exist.
# If the cache is outdated (i.e., not based on the latest DB revision),
# cacheOutdated will be set to True in the response.
record_status = record_exists(recid)
existing_cache = cache_exists(recid, uid)
read_only_mode = False
if data.has_key("inReadOnlyMode"):
read_only_mode = data['inReadOnlyMode']
if record_status == 0:
response['resultCode'] = 102
elif not read_only_mode and not existing_cache and \
record_locked_by_other_user(recid, uid):
name, email, locked_since = record_locked_by_user_details(recid, uid)
response['locked_details'] = {'name': name,
'email': email,
'locked_since': locked_since}
response['resultCode'] = 104
elif not read_only_mode and existing_cache and \
cache_expired(recid, uid) and \
record_locked_by_other_user(recid, uid):
response['resultCode'] = 104
elif not read_only_mode and record_locked_by_queue(recid):
response['resultCode'] = 105
else:
if data.get('deleteRecordCache'):
delete_cache_file(recid, uid)
existing_cache = False
pending_changes = []
disabled_hp_changes = {}
if read_only_mode:
if data.has_key('recordRevision') and data['recordRevision'] != 'sampleValue':
record_revision_ts = data['recordRevision']
record_xml = get_marcxml_of_revision(recid, \
record_revision_ts)
record = create_record(record_xml)[0]
record_revision = timestamp_to_revision(record_revision_ts)
pending_changes = []
disabled_hp_changes = {}
else:
# a normal cacheless retrieval of a record
record = get_bibrecord(recid)
record_revision = get_record_last_modification_date(recid)
if record_revision == None:
record_revision = datetime.now().timetuple()
pending_changes = []
disabled_hp_changes = {}
cache_dirty = False
mtime = 0
undo_list = []
redo_list = []
elif not existing_cache:
record_revision, record = create_cache_file(recid, uid)
mtime = get_cache_mtime(recid, uid)
pending_changes = []
disabled_hp_changes = {}
undo_list = []
redo_list = []
cache_dirty = False
else:
#TODO: This try except should be replaced with something nicer,
# like an argument indicating if a new cache file is to
# be created
try:
cache_dirty, record_revision, record, pending_changes, \
disabled_hp_changes, undo_list, redo_list = \
get_cache_file_contents(recid, uid)
touch_cache_file(recid, uid)
mtime = get_cache_mtime(recid, uid)
if not latest_record_revision(recid, record_revision) and \
get_record_revisions(recid) != ():
# This sould prevent from using old cache in case of
# viewing old version. If there are no revisions,
# it means we should skip this step because this
# is a new record
response['cacheOutdated'] = True
except:
record_revision, record = create_cache_file(recid, uid)
mtime = get_cache_mtime(recid, uid)
pending_changes = []
disabled_hp_changes = {}
cache_dirty = False
undo_list = []
redo_list = []
if data.get('clonedRecord',''):
response['resultCode'] = 9
else:
response['resultCode'] = 3
revision_author = get_record_revision_author(recid, record_revision)
latest_revision = get_record_last_modification_date(recid)
if latest_revision == None:
latest_revision = datetime.now().timetuple()
last_revision_ts = revision_to_timestamp(latest_revision)
revisions_history = get_record_revision_timestamps(recid)
number_of_physical_copies = get_number_copies(recid)
bibcirc_details_URL = create_item_details_url(recid, ln)
can_have_copies = can_record_have_physical_copies(recid)
# For some collections, merge template with record
template_to_merge = extend_record_with_template(recid)
if template_to_merge:
merged_record = merge_record_with_template(record, template_to_merge)
if merged_record:
record = merged_record
create_cache_file(recid, uid, record, True)
if record_status == -1:
# The record was deleted
response['resultCode'] = 103
response['record_has_pdf'] = record_has_pdf(recid)
# order subfields alphabetically
record_order_subfields(record)
response['cacheDirty'], response['record'], \
response['cacheMTime'], response['recordRevision'], \
response['revisionAuthor'], response['lastRevision'], \
response['revisionsHistory'], response['inReadOnlyMode'], \
response['pendingHpChanges'], response['disabledHpChanges'], \
response['undoList'], response['redoList'] = cache_dirty, \
record, mtime, revision_to_timestamp(record_revision), \
revision_author, last_revision_ts, revisions_history, \
read_only_mode, pending_changes, disabled_hp_changes, \
undo_list, redo_list
response['numberOfCopies'] = number_of_physical_copies
response['bibCirculationUrl'] = bibcirc_details_URL
response['canRecordHavePhysicalCopies'] = can_have_copies
# Set tag format from user's session settings.
tagformat_settings = session_param_get(req, 'bibedit_tagformat')
tagformat = (tagformat_settings is not None) and tagformat_settings.get(recid, CFG_BIBEDIT_TAG_FORMAT) or CFG_BIBEDIT_TAG_FORMAT
response['tagFormat'] = tagformat
# KB information
response['KBSubject'] = CFG_BIBEDIT_KB_SUBJECTS
response['KBInstitution'] = CFG_BIBEDIT_KB_INSTITUTIONS
elif request_type == 'submit':
# Submit the record. Possible error situations:
# - Missing cache file
# - Cache file modified in other editor
# - Record locked by other user
# - Record locked by queue
# If the cache is outdated cacheOutdated will be set to True in the
# response.
if not cache_exists(recid, uid):
response['resultCode'] = 106
elif not get_cache_mtime(recid, uid) == data['cacheMTime']:
response['resultCode'] = 107
elif cache_expired(recid, uid) and \
record_locked_by_other_user(recid, uid):
response['resultCode'] = 104
elif record_locked_by_queue(recid):
response['resultCode'] = 105
else:
try:
tmp_result = get_cache_file_contents(recid, uid)
record_revision = tmp_result[1]
record = tmp_result[2]
pending_changes = tmp_result[3]
# disabled_changes = tmp_result[4]
xml_record = wash_for_xml(print_rec(record))
record, status_code, list_of_errors = create_record(xml_record)
# Simulate upload to catch errors
errors_upload = perform_upload_check(xml_record, '--replace')
if errors_upload:
response['resultCode'], response['errors'] = 113, \
errors_upload
return response
elif status_code == 0:
response['resultCode'], response['errors'] = 110, \
list_of_errors
if not data['force'] and not latest_record_revision(recid, record_revision):
response['cacheOutdated'] = True
else:
if record_is_conference(record):
new_cnum = add_record_cnum(recid, uid)
if new_cnum:
response["new_cnum"] = new_cnum
save_xml_record(recid, uid)
response['resultCode'] = 4
except Exception, e:
response['resultCode'] = CFG_BIBEDIT_AJAX_RESULT_CODES_REV[ \
'error_wrong_cache_file_format']
if CFG_DEVEL_SITE: # return debug information in the request
response['exception_message'] = e.__str__()
elif request_type == 'revert':
revId = data['revId']
job_date = "%s-%s-%s %s:%s:%s" % re_revdate_split.search(revId).groups()
revision_xml = get_marcxml_of_revision(recid, job_date)
# Modify the 005 tag in order to merge with the latest version of record
last_revision_ts = data['lastRevId'] + ".0"
revision_xml = modify_record_timestamp(revision_xml, last_revision_ts)
save_xml_record(recid, uid, revision_xml)
if (cache_exists(recid, uid)):
delete_cache_file(recid, uid)
response['resultCode'] = 4
elif request_type == 'cancel':
# Cancel editing by deleting the cache file. Possible error situations:
# - Cache file modified in other editor
if cache_exists(recid, uid):
if get_cache_mtime(recid, uid) == data['cacheMTime']:
delete_cache_file(recid, uid)
response['resultCode'] = 5
else:
response['resultCode'] = 107
else:
response['resultCode'] = 5
elif request_type == 'deleteRecord':
# Submit the record. Possible error situations:
# - Record locked by other user
# - Record locked by queue
# As the user is requesting deletion we proceed even if the cache file
# is missing and we don't check if the cache is outdated or has
# been modified in another editor.
existing_cache = cache_exists(recid, uid)
pending_changes = []
if has_copies(recid):
response['resultCode'] = \
CFG_BIBEDIT_AJAX_RESULT_CODES_REV['error_physical_copies_exist']
elif existing_cache and cache_expired(recid, uid) and \
record_locked_by_other_user(recid, uid):
response['resultCode'] = \
CFG_BIBEDIT_AJAX_RESULT_CODES_REV['error_rec_locked_by_user']
elif record_locked_by_queue(recid):
response['resultCode'] = \
CFG_BIBEDIT_AJAX_RESULT_CODES_REV['error_rec_locked_by_queue']
else:
if not existing_cache:
record_revision, record, pending_changes, \
deactivated_hp_changes, undo_list, redo_list = \
create_cache_file(recid, uid)
else:
try:
record_revision, record, pending_changes, \
deactivated_hp_changes, undo_list, redo_list = \
get_cache_file_contents(recid, uid)[1:]
except:
record_revision, record, pending_changes, \
deactivated_hp_changes = create_cache_file(recid, uid)
record_add_field(record, '980', ' ', ' ', '', [('c', 'DELETED')])
undo_list = []
redo_list = []
update_cache_file_contents(recid, uid, record_revision, record, \
pending_changes, \
deactivated_hp_changes, undo_list, \
redo_list)
save_xml_record(recid, uid)
delete_related_holdingpen_changes(recid) # we don't need any changes
# related to a deleted record
response['resultCode'] = 10
elif request_type == 'deleteRecordCache':
# Delete the cache file. Ignore the request if the cache has been
# modified in another editor.
if data.has_key('cacheMTime'):
if cache_exists(recid, uid) and get_cache_mtime(recid, uid) == \
data['cacheMTime']:
delete_cache_file(recid, uid)
response['resultCode'] = 11
elif request_type == 'updateCacheRef':
# Update cache with the contents coming from BibEdit JS interface
# Used when updating references using ref extractor
record_revision, record, pending_changes, \
deactivated_hp_changes, undo_list, redo_list = \
get_cache_file_contents(recid, uid)[1:]
record = create_record(data['recXML'])[0]
response['cacheMTime'], response['cacheDirty'] = update_cache_file_contents(recid, uid, record_revision, record, \
pending_changes, \
deactivated_hp_changes, undo_list, \
redo_list), True
response['resultCode'] = CFG_BIBEDIT_AJAX_RESULT_CODES_REV['cache_updated_with_references']
elif request_type == 'prepareRecordMerge':
# We want to merge the cache with the current DB version of the record,
# so prepare an XML file from the file cache, to be used by BibMerge.
# Possible error situations:
# - Missing cache file
# - Record locked by other user
# - Record locked by queue
# We don't check if cache is outdated (a likely scenario for this
# request) or if it has been modified in another editor.
if not cache_exists(recid, uid):
response['resultCode'] = 106
elif cache_expired(recid, uid) and \
record_locked_by_other_user(recid, uid):
response['resultCode'] = 104
elif record_locked_by_queue(recid):
response['resultCode'] = 105
else:
save_xml_record(recid, uid, to_upload=False, to_merge=True)
response['resultCode'] = 12
elif request_type == 'submittextmarc':
# Textmarc content coming from the user
textmarc_record = data['textmarc']
xml_conversion_status = get_xml_from_textmarc(recid, textmarc_record)
if xml_conversion_status['resultMsg'] == "textmarc_parsing_error":
response.update(xml_conversion_status)
return response
# Simulate upload to catch errors
errors_upload = perform_upload_check(xml_conversion_status['resultXML'], '--replace')
if errors_upload:
response['resultCode'], response['errors'] = 113, \
errors_upload
return response
response.update(xml_conversion_status)
if xml_conversion_status['resultMsg'] == 'textmarc_parsing_success':
create_cache_file(recid, uid,
create_record(response['resultXML'])[0])
save_xml_record(recid, uid)
response['resultCode'] = CFG_BIBEDIT_AJAX_RESULT_CODES_REV["record_submitted"]
return response
def perform_request_update_record(request_type, recid, uid, cacheMTime, data, \
hpChanges, undoRedoOp, isBulk=False):
"""
Handle record update requests like adding, modifying, moving or deleting
of fields or subfields. Possible common error situations::
- Missing cache file
- Cache file modified in other editor
@param undoRedoOp: Indicates in "undo"/"redo"/undo_descriptor operation is
performed by a current request.
"""
response = {}
if not cache_exists(recid, uid):
response['resultCode'] = 106
elif not get_cache_mtime(recid, uid) == cacheMTime and isBulk == False:
# In case of a bulk request, the changes are deliberately performed
# immediately one after another
response['resultCode'] = 107
else:
try:
record_revision, record, pending_changes, deactivated_hp_changes, \
undo_list, redo_list = get_cache_file_contents(recid, uid)[1:]
except:
response['resultCode'] = CFG_BIBEDIT_AJAX_RESULT_CODES_REV[ \
'error_wrong_cache_file_format']
return response
# process all the Holding Pen changes operations ... regardles the
# request type
if hpChanges.has_key("toDisable"):
for changeId in hpChanges["toDisable"]:
pending_changes[changeId]["applied_change"] = True
if hpChanges.has_key("toEnable"):
for changeId in hpChanges["toEnable"]:
pending_changes[changeId]["applied_change"] = False
if hpChanges.has_key("toOverride"):
pending_changes = hpChanges["toOverride"]
if hpChanges.has_key("changesetsToDeactivate"):
for changesetId in hpChanges["changesetsToDeactivate"]:
deactivated_hp_changes[changesetId] = True
if hpChanges.has_key("changesetsToActivate"):
for changesetId in hpChanges["changesetsToActivate"]:
deactivated_hp_changes[changesetId] = False
# processing the undo/redo entries
if undoRedoOp == "undo":
try:
redo_list = [undo_list[-1]] + redo_list
undo_list = undo_list[:-1]
except:
raise Exception("An exception occured when undoing previous" + \
" operation. Undo list: " + str(undo_list) + \
" Redo list " + str(redo_list))
elif undoRedoOp == "redo":
try:
undo_list = undo_list + [redo_list[0]]
redo_list = redo_list[1:]
except:
raise Exception("An exception occured when redoing previous" + \
" operation. Undo list: " + str(undo_list) + \
" Redo list " + str(redo_list))
else:
# This is a genuine operation - we have to add a new descriptor
# to the undo list and cancel the redo unless the operation is
# a bulk operation
if undoRedoOp != None:
undo_list = undo_list + [undoRedoOp]
redo_list = []
else:
assert isBulk == True
field_position_local = data.get('fieldPosition')
if field_position_local is not None:
field_position_local = int(field_position_local)
if request_type == 'otherUpdateRequest':
# An empty request. Might be useful if we want to perform
# operations that require only the actions performed globally,
# like modifying the holdingPen changes list
response['resultCode'] = CFG_BIBEDIT_AJAX_RESULT_CODES_REV[ \
'editor_modifications_changed']
elif request_type == 'deactivateHoldingPenChangeset':
# the changeset has been marked as processed ( user applied it in
# the editor). Marking as used in the cache file.
# CAUTION: This function has been implemented here because logically
# it fits with the modifications made to the cache file.
# No changes are made to the Holding Pen physically. The
# changesets are related to the cache because we want to
# cancel the removal every time the cache disappears for
# any reason
response['resultCode'] = CFG_BIBEDIT_AJAX_RESULT_CODES_REV[ \
'disabled_hp_changeset']
elif request_type == 'addField':
if data['controlfield']:
record_add_field(record, data['tag'],
controlfield_value=data['value'])
response['resultCode'] = 20
else:
record_add_field(record, data['tag'], data['ind1'],
data['ind2'], subfields=data['subfields'],
field_position_local=field_position_local)
response['resultCode'] = 21
elif request_type == 'addSubfields':
subfields = data['subfields']
for subfield in subfields:
record_add_subfield_into(record, data['tag'], subfield[0],
subfield[1], subfield_position=None,
field_position_local=field_position_local)
if len(subfields) == 1:
response['resultCode'] = 22
else:
response['resultCode'] = 23
elif request_type == 'addFieldsSubfieldsOnPositions':
#1) Sorting the fields by their identifiers
fieldsToAdd = data['fieldsToAdd']
subfieldsToAdd = data['subfieldsToAdd']
for tag in fieldsToAdd.keys():
positions = fieldsToAdd[tag].keys()
positions.sort()
for position in positions:
# now adding fields at a position
isControlfield = (len(fieldsToAdd[tag][position][0]) == 0)
# if there are n subfields, this is a control field
if isControlfield:
controlfieldValue = fieldsToAdd[tag][position][3]
record_add_field(record, tag, field_position_local = \
int(position), \
controlfield_value = \
controlfieldValue)
else:
subfields = fieldsToAdd[tag][position][0]
ind1 = fieldsToAdd[tag][position][1]
ind2 = fieldsToAdd[tag][position][2]
record_add_field(record, tag, ind1, ind2, subfields = \
subfields, field_position_local = \
int(position))
# now adding the subfields
for tag in subfieldsToAdd.keys():
for fieldPosition in subfieldsToAdd[tag].keys(): #now the fields
#order not important !
subfieldsPositions = subfieldsToAdd[tag][fieldPosition]. \
keys()
subfieldsPositions.sort()
for subfieldPosition in subfieldsPositions:
subfield = subfieldsToAdd[tag][fieldPosition]\
[subfieldPosition]
record_add_subfield_into(record, tag, subfield[0], \
subfield[1], \
subfield_position = \
int(subfieldPosition), \
field_position_local = \
int(fieldPosition))
response['resultCode'] = \
CFG_BIBEDIT_AJAX_RESULT_CODES_REV['added_positioned_subfields']
elif request_type == 'modifyField': # changing the field structure
# first remove subfields and then add new... change the indices
subfields = data['subFields'] # parse the JSON representation of
# the subfields here
new_field = create_field(subfields, data['ind1'], data['ind2'])
record_replace_field(record, data['tag'], new_field, \
field_position_local = data['fieldPosition'])
response['resultCode'] = 26
elif request_type == 'modifyContent':
if data['subfieldIndex'] != None:
record_modify_subfield(record, data['tag'],
data['subfieldCode'], data['value'],
int(data['subfieldIndex']),
field_position_local=field_position_local)
else:
record_modify_controlfield(record, data['tag'], data["value"],
field_position_local=field_position_local)
response['resultCode'] = 24
elif request_type == 'modifySubfieldTag':
record_add_subfield_into(record, data['tag'], data['subfieldCode'],
data["value"], subfield_position= int(data['subfieldIndex']),
field_position_local=field_position_local)
record_delete_subfield_from(record, data['tag'], int(data['subfieldIndex']) + 1,
field_position_local=field_position_local)
response['resultCode'] = 24
elif request_type == 'modifyFieldTag':
subfields = record_get_subfields(record, data['oldTag'],
field_position_local=field_position_local)
record_add_field(record, data['newTag'], data['ind1'],
data['ind2'] , subfields=subfields)
record_delete_field(record, data['oldTag'], ind1=data['oldInd1'], \
ind2=data['oldInd2'], field_position_local=field_position_local)
response['resultCode'] = 32
elif request_type == 'moveSubfield':
record_move_subfield(record, data['tag'],
int(data['subfieldIndex']), int(data['newSubfieldIndex']),
field_position_local=field_position_local)
response['resultCode'] = 25
elif request_type == 'moveField':
if data['direction'] == 'up':
final_position_local = field_position_local-1
else: # direction is 'down'
final_position_local = field_position_local+1
record_move_fields(record, data['tag'], [field_position_local],
final_position_local)
response['resultCode'] = 32
elif request_type == 'deleteFields':
to_delete = data['toDelete']
deleted_fields = 0
deleted_subfields = 0
for tag in to_delete:
#Sorting the fields in a edcreasing order by the local position!
fieldsOrder = to_delete[tag].keys()
fieldsOrder.sort(lambda a, b: int(b) - int(a))
for field_position_local in fieldsOrder:
if not to_delete[tag][field_position_local]:
# No subfields specified - delete entire field.
record_delete_field(record, tag,
field_position_local=int(field_position_local))
deleted_fields += 1
else:
for subfield_position in \
to_delete[tag][field_position_local][::-1]:
# Delete subfields in reverse order (to keep the
# indexing correct).
record_delete_subfield_from(record, tag,
int(subfield_position),
field_position_local=int(field_position_local))
deleted_subfields += 1
if deleted_fields == 1 and deleted_subfields == 0:
response['resultCode'] = 26
elif deleted_fields and deleted_subfields == 0:
response['resultCode'] = 27
elif deleted_subfields == 1 and deleted_fields == 0:
response['resultCode'] = 28
elif deleted_subfields and deleted_fields == 0:
response['resultCode'] = 29
else:
response['resultCode'] = 30
response['cacheMTime'], response['cacheDirty'] = \
update_cache_file_contents(recid, uid, record_revision,
record, \
pending_changes, \
deactivated_hp_changes, \
undo_list, redo_list), \
True
return response
def perform_request_autocomplete(request_type, recid, uid, data):
"""
Perfrom an AJAX request associated with the retrieval of autocomplete
data.
@param request_type: Type of the currently served request
@param recid: the identifer of the record
@param uid: The identifier of the user being currently logged in
@param data: The request data containing possibly important additional
arguments
"""
response = {}
# get the values based on which one needs to search
searchby = data['value']
#we check if the data is properly defined
fulltag = ''
if data.has_key('maintag') and data.has_key('subtag1') and \
data.has_key('subtag2') and data.has_key('subfieldcode'):
maintag = data['maintag']
subtag1 = data['subtag1']
subtag2 = data['subtag2']
u_subtag1 = subtag1
u_subtag2 = subtag2
if (not subtag1) or (subtag1 == ' '):
u_subtag1 = '_'
if (not subtag2) or (subtag2 == ' '):
u_subtag2 = '_'
subfieldcode = data['subfieldcode']
fulltag = maintag+u_subtag1+u_subtag2+subfieldcode
if (request_type == 'autokeyword'):
#call the keyword-form-ontology function
if fulltag and searchby:
items = get_kbt_items_for_bibedit(CFG_BIBEDIT_KEYWORD_TAXONOMY, \
CFG_BIBEDIT_KEYWORD_RDFLABEL, \
searchby)
response['autokeyword'] = items
if (request_type == 'autosuggest'):
#call knowledge base function to put the suggestions in an array..
if fulltag and searchby and len(searchby) > 3:
#add trailing '*' wildcard for 'search_unit_in_bibxxx()' if not already present
suggest_values = get_kbd_values_for_bibedit(fulltag, "", searchby+"*")
#remove ..
new_suggest_vals = []
for sugg in suggest_values:
if sugg.startswith(searchby):
new_suggest_vals.append(sugg)
response['autosuggest'] = new_suggest_vals
if (request_type == 'autocomplete'):
#call the values function with the correct kb_name
if CFG_BIBEDIT_AUTOCOMPLETE_TAGS_KBS.has_key(fulltag):
kbname = CFG_BIBEDIT_AUTOCOMPLETE_TAGS_KBS[fulltag]
#check if the seachby field has semicolons. Take all
#the semicolon-separated items..
items = []
vals = []
if searchby:
if searchby.rfind(';'):
items = searchby.split(';')
else:
items = [searchby.strip()]
for item in items:
item = item.strip()
kbrvals = get_kbr_values(kbname, item, '', 'e') #we want an exact match
if kbrvals and kbrvals[0]: #add the found val into vals
vals.append(kbrvals[0])
#check that the values are not already contained in other
#instances of this field
record = get_cache_file_contents(recid, uid)[2]
xml_rec = wash_for_xml(print_rec(record))
record, status_code, dummy_errors = create_record(xml_rec)
existing_values = []
if (status_code != 0):
existing_values = record_get_field_values(record,
maintag,
subtag1,
subtag2,
subfieldcode)
#get the new values.. i.e. vals not in existing
new_vals = vals
for val in new_vals:
if val in existing_values:
new_vals.remove(val)
response['autocomplete'] = new_vals
response['resultCode'] = CFG_BIBEDIT_AJAX_RESULT_CODES_REV['autosuggestion_scanned']
return response
def perform_request_bibcatalog(request_type, recid, uid):
"""Handle request to BibCatalog (RT).
"""
response = {}
if request_type == 'getTickets':
# Insert the ticket data in the response, if possible
if bibcatalog_system is None:
response['tickets'] = "<!--No ticket system configured-->"
elif bibcatalog_system and uid:
bibcat_resp = bibcatalog_system.check_system(uid)
if bibcat_resp == "":
tickets_found = bibcatalog_system.ticket_search(uid, \
status=['new', 'open'], recordid=recid)
t_url_str = '' #put ticket urls here, formatted for HTML display
for t_id in tickets_found:
#t_url = bibcatalog_system.ticket_get_attribute(uid, \
# t_id, 'url_display')
ticket_info = bibcatalog_system.ticket_get_info( \
uid, t_id, ['url_display', 'url_close'])
t_url = ticket_info['url_display']
t_close_url = ticket_info['url_close']
#format..
t_url_str += "#" + str(t_id) + '<a href="' + t_url + \
'">[read]</a> <a href="' + t_close_url + \
'">[close]</a><br/>'
#put ticket header and tickets links in the box
t_url_str = "<strong>Tickets</strong><br/>" + t_url_str + \
"<br/>" + '<a href="new_ticket?recid=' + str(recid) + \
'>[new ticket]</a>'
response['tickets'] = t_url_str
#add a new ticket link
else:
#put something in the tickets container, for debug
response['tickets'] = "<!--"+bibcat_resp+"-->"
response['resultCode'] = 31
return response
def _add_curated_references_to_record(recid, uid, bibrec):
"""
Adds references from the cache that have been curated (contain $$9CURATOR)
to the bibrecord object
@param recid: record id, used to retrieve cache
@param uid: id of the current user, used to retrieve cache
@param bibrec: bibrecord object to add references to
"""
dummy1, dummy2, record, dummy3, dummy4, dummy5, dummy6 = get_cache_file_contents(recid, uid)
for field_instance in record_get_field_instances(record, "999", "C", "5"):
for subfield_instance in field_instance[0]:
if subfield_instance[0] == '9' and subfield_instance[1] == 'CURATOR':
# Add reference field on top of references, removing first $$o
field_instance = ([subfield for subfield in field_instance[0]
if subfield[0] != 'o'], field_instance[1],
field_instance[2], field_instance[3],
field_instance[4])
record_add_fields(bibrec, '999', [field_instance],
field_position_local=0)
def _xml_to_textmarc_references(bibrec):
"""
Convert XML record to textmarc and return the lines related to references
@param bibrec: bibrecord object to be converted
@return: textmarc lines with references
@rtype: string
"""
sysno = ""
options = {"aleph-marc":0, "correct-mode":1, "append-mode":0,
"delete-mode":0, "insert-mode":0, "replace-mode":0,
"text-marc":1}
# Using deepcopy as function create_marc_record() modifies the record passed
textmarc_references = [ line.strip() for line
in xmlmarc2textmarc.create_marc_record(copy.deepcopy(bibrec),
sysno, options).split('\n')
if '999C5' in line ]
return textmarc_references
def perform_request_ref_extract_url(recid, uid, url):
"""
Making use of the refextractor API, extract references from the url
received from the client
@param recid: opened record id
@param uid: active user id
@param url: URL to extract references from
@return response to be returned to the client code
"""
response = {}
try:
recordExtended = replace_references(recid, uid, url=url)
except FullTextNotAvailable:
response['ref_xmlrecord'] = False
response['ref_msg'] = "File not found. Server returned code 404"
return response
except:
response['ref_xmlrecord'] = False
response['ref_msg'] = """Error while fetching PDF. Bad URL or file could
not be retrieved """
return response
if not recordExtended:
response['ref_msg'] = """No references were found in the given PDF """
return response
ref_bibrecord = create_record(recordExtended)[0]
_add_curated_references_to_record(recid, uid, ref_bibrecord)
response['ref_bibrecord'] = ref_bibrecord
response['ref_xmlrecord'] = record_xml_output(ref_bibrecord)
textmarc_references = _xml_to_textmarc_references(ref_bibrecord)
response['ref_textmarc'] = '<div class="refextracted">' + '<br />'.join(textmarc_references) + "</div>"
return response
def perform_request_ref_extract(recid, uid, txt=None):
""" Handle request to extract references in the given record
@param recid: record id from which the references should be extracted
@type recid: str
@param txt: string containing references
@type txt: str
@param uid: user id
@type uid: int
@return: xml record with references extracted
@rtype: dictionary
"""
text_no_references_found_msg = """ No references extracted. The automatic
extraction did not recognize any reference in the
pasted text.<br /><br />If you want to add the references
manually, an easily recognizable format is:<br/><br/>
&nbsp;&nbsp;&nbsp;&nbsp;[1] Phys. Rev A71 (2005) 42<br />
&nbsp;&nbsp;&nbsp;&nbsp;[2] ATLAS-CMS-2007-333
"""
pdf_no_references_found_msg = """ No references were found in the attached
PDF.
"""
response = {}
response['ref_xmlrecord'] = False
recordExtended = None
try:
if txt:
recordExtended = replace_references(recid, uid,
txt=txt.decode('utf-8'))
if not recordExtended:
response['ref_msg'] = text_no_references_found_msg
else:
recordExtended = replace_references(recid, uid)
if not recordExtended:
response['ref_msg'] = pdf_no_references_found_msg
except FullTextNotAvailable:
response['ref_msg'] = """ The fulltext is not available.
"""
except:
response['ref_msg'] = """ An error ocurred while extracting references.
"""
if not recordExtended:
return response
ref_bibrecord = create_record(recordExtended)[0]
_add_curated_references_to_record(recid, uid, ref_bibrecord)
response['ref_bibrecord'] = ref_bibrecord
response['ref_xmlrecord'] = record_xml_output(ref_bibrecord)
textmarc_references = _xml_to_textmarc_references(ref_bibrecord)
response['ref_textmarc'] = '<div class="refextracted">' + '<br />'.join(textmarc_references) + "</div>"
return response
def perform_request_preview_record(request_type, recid, uid, data):
""" Handle request to preview record with formatting
"""
response = {}
if request_type == "preview":
if data["submitMode"] == "textmarc":
textmarc_record = data['textmarc']
xml_conversion_status = get_xml_from_textmarc(recid, textmarc_record)
if xml_conversion_status['resultMsg'] == 'textmarc_parsing_error':
response['resultCode'] = CFG_BIBEDIT_AJAX_RESULT_CODES_REV['textmarc_parsing_error']
response.update(xml_conversion_status)
return response
record = create_record(xml_conversion_status["resultXML"])[0]
elif cache_exists(recid, uid):
dummy1, dummy2, record, dummy3, dummy4, dummy5, dummy6 = get_cache_file_contents(recid, uid)
else:
record = get_bibrecord(recid)
# clean the record from unfilled volatile fields
record_strip_empty_volatile_subfields(record)
record_strip_empty_fields(record)
response['html_preview'] = _get_formated_record(record, data['new_window'])
# clean the record from unfilled volatile fields
record_strip_empty_volatile_subfields(record)
record_strip_empty_fields(record)
response['html_preview'] = _get_formated_record(record, data['new_window'])
return response
def perform_request_get_pdf_url(recid):
""" Handle request to get the URL of the attached PDF
"""
response = {}
rec_info = BibRecDocs(recid)
docs = rec_info.list_bibdocs()
doc_pdf_url = ""
for doc in docs:
try:
doc_pdf_url = doc.get_file('pdf').get_url()
except InvenioBibDocFileError:
continue
if doc_pdf_url:
response['pdf_url'] = doc_pdf_url
break
if not doc_pdf_url:
response['pdf_url'] = ""
return response
def perform_request_get_textmarc(recid, uid):
""" Get record content from cache, convert it to textmarc and return it
"""
textmarc_options = {"aleph-marc":0, "correct-mode":1, "append-mode":0,
"delete-mode":0, "insert-mode":0, "replace-mode":0,
"text-marc":1}
bibrecord = get_cache_file_contents(recid, uid)[2]
record_strip_empty_fields(bibrecord)
record_strip_controlfields(bibrecord)
textmarc = xmlmarc2textmarc.create_marc_record(
copy.deepcopy(bibrecord), sysno="", options=textmarc_options)
return {'textmarc': textmarc}
def perform_request_get_tableview(recid, uid, data):
""" Convert textmarc inputed by user to marcxml and if there are no
parsing errors, create cache file
"""
response = {}
textmarc_record = data['textmarc']
xml_conversion_status = get_xml_from_textmarc(recid, textmarc_record)
response.update(xml_conversion_status)
if xml_conversion_status['resultMsg'] == 'textmarc_parsing_error':
response['resultCode'] = CFG_BIBEDIT_AJAX_RESULT_CODES_REV['textmarc_parsing_error']
else:
create_cache_file(recid, uid,
create_record(xml_conversion_status['resultXML'])[0], data['recordDirty'])
response['resultCode'] = CFG_BIBEDIT_AJAX_RESULT_CODES_REV['tableview_change_success']
return response
def _get_formated_record(record, new_window):
"""Returns a record in a given format
@param record: BibRecord object
@param new_window: Boolean, indicates if it is needed to add all the headers
to the page (used when clicking Preview button)
"""
from invenio.config import CFG_WEBSTYLE_TEMPLATE_SKIN
xml_record = wash_for_xml(record_xml_output(record))
result = ''
if new_window:
result = """ <html><head><title>Record preview</title>
<script type="text/javascript" src="%(site_url)s/js/jquery.min.js"></script>
<link rel="stylesheet" href="%(site_url)s/img/invenio%(cssskin)s.css" type="text/css"></head>
"""%{'site_url': CFG_SITE_URL,
'cssskin': CFG_WEBSTYLE_TEMPLATE_SKIN != 'default' and '_' + CFG_WEBSTYLE_TEMPLATE_SKIN or ''
}
result += get_mathjax_header(True) + '<body>'
result += "<h2> Brief format preview </h2><br />"
result += bibformat.format_record(recID=None,
of="hb",
xml_record=xml_record) + "<br />"
result += "<br /><h2> Detailed format preview </h2><br />"
result += bibformat.format_record(recID=None,
of="hd",
xml_record=xml_record)
#Preview references
result += "<br /><h2> References </h2><br />"
result += bibformat.format_record(0,
'hdref',
xml_record=xml_record)
result += """<script>
$('#referenceinp_link').hide();
$('#referenceinp_link_span').hide();
</script>
"""
if new_window:
result += "</body></html>"
return result
########### Functions related to templates web interface #############
def perform_request_init_template_interface():
"""Handle a request to manage templates"""
errors = []
warnings = []
body = ''
# Add script data.
record_templates = get_record_templates()
record_templates.sort()
data = {'gRECORD_TEMPLATES': record_templates,
'gSITE_RECORD': '"' + CFG_SITE_RECORD + '"',
'gSITE_URL': '"' + CFG_SITE_URL + '"'}
body += '<script type="text/javascript">\n'
for key in data:
body += ' var %s = %s;\n' % (key, data[key])
body += ' </script>\n'
# Add scripts (the ordering is NOT irrelevant).
scripts = ['jquery-ui.min.js',
'json2.js', 'bibedit_display.js',
'bibedit_template_interface.js']
for script in scripts:
body += ' <script type="text/javascript" src="%s/js/%s">' \
'</script>\n' % (CFG_SITE_URL, script)
body += ' <div id="bibEditTemplateList"></div>\n'
body += ' <div id="bibEditTemplateEdit"></div>\n'
return body, errors, warnings
def perform_request_ajax_template_interface(data):
"""Handle Ajax requests by redirecting to appropriate function."""
response = {}
request_type = data['requestType']
if request_type == 'editTemplate':
# Edit a template request.
response.update(perform_request_edit_template(data))
return response
def perform_request_edit_template(data):
""" Handle request to edit a template """
response = {}
template_filename = data['templateFilename']
template = get_record_template(template_filename)
if not template:
response['resultCode'] = 1
else:
response['templateMARCXML'] = template
return response
def perform_doi_search(doi):
"""Search for DOI on the dx.doi.org page
@return: the url returned by this page"""
response = {}
url = "http://dx.doi.org/"
val = {'hdl': doi}
url_data = urllib.urlencode(val)
cj = cookielib.CookieJar()
header = [('User-Agent', CFG_DOI_USER_AGENT)]
opener = urllib2.build_opener(urllib2.HTTPCookieProcessor(cj))
opener.addheaders = header
try:
resp = opener.open(url, url_data)
except:
return response
else:
response['doi_url'] = resp.geturl()
return response
diff --git a/invenio/legacy/bibfield/__init__.py b/invenio/legacy/bibfield/__init__.py
index 3febc0dff..7e4f0fa54 100644
--- a/invenio/legacy/bibfield/__init__.py
+++ b/invenio/legacy/bibfield/__init__.py
@@ -1,161 +1,161 @@
# -*- coding: utf-8 -*-
##
## This file is part of Invenio.
## Copyright (C) 2004, 2005, 2006, 2007, 2008, 2010, 2011, 2013 CERN.
##
## Invenio is free software; you can redistribute it and/or
## modify it under the terms of the GNU General Public License as
## published by the Free Software Foundation; either version 2 of the
## License, or (at your option) any later version.
##
## Invenio is distributed in the hope that it will be useful, but
## WITHOUT ANY WARRANTY; without even the implied warranty of
## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
## General Public License for more details.
##
## You should have received a copy of the GNU General Public License
## along with Invenio; if not, write to the Free Software Foundation, Inc.,
## 59 Temple Place, Suite 330, Boston, MA 02111-1307, USA.
"""
BibField engine
"""
__revision__ = "$Id$"
import os
try:
import cPickle as pickle
except:
import pickle
from pprint import pformat
from werkzeug import import_string
from invenio.utils.datastructures import LaziestDict
from invenio.dbquery import run_sql
-from invenio.errorlib import register_exception
+from invenio.ext.logging import register_exception
from invenio.base.signals import record_after_update
from invenio.bibfield_jsonreader import JsonReader
from invenio.bibfield_utils import BlobWrapper, BibFieldDict
# Lazy loader of bibfield readers
def reader_discover(key):
try:
candidate = import_string('invenio.bibfield_%sreader:readers' % (key, ))
if issubclass(candidate, JsonReader):
return candidate
except:
register_exception()
raise KeyError(key)
CFG_BIBFIELD_READERS = LaziestDict(reader_discover)
@record_after_update.connect
def delete_record_cache(sender, recid=None, **kwargs):
get_record(recid, reset_cache=True)
def create_record(blob, master_format='marc', verbose=0, **additional_info):
"""
Creates a record object from the blob description using the apropiate reader
for it.
@return Record object
"""
blob_wrapper = BlobWrapper(blob=blob, master_format=master_format, **additional_info)
return CFG_BIBFIELD_READERS[master_format](blob_wrapper, check=True)
def create_records(blob, master_format='marc', verbose=0, **additional_info):
"""
Creates a list of records from the blod descriptions using the split_records
function to divide then.
@see create_record()
@return List of record objects initiated by the functions create_record()
"""
record_blods = CFG_BIBFIELD_READERS[master_format].split_blob(blob, additional_info.get('schema', None))
return [create_record(record_blob, master_format, verbose=verbose, **additional_info) for record_blob in record_blods]
def get_record(recid, reset_cache=False, fields=()):
"""
Record factory, it retrieves the record from bibfmt table if it is there,
if not, or reset_cache is set to True, it searches for the appropriate
reader to create the representation of the record.
@return: Bibfield object representing the record or None if the recid is not
present in the system
"""
record = None
#Search for recjson
if not reset_cache:
res = run_sql("SELECT value FROM bibfmt WHERE id_bibrec=%s AND format='recjson'",
(recid,))
if res:
record = JsonReader(BlobWrapper(pickle.loads(res[0][0])))
#There is no version cached or we want to renew it
#Then retrieve information and blob
if not record or reset_cache:
blob_wrapper = _build_wrapper(recid)
if not blob_wrapper:
return None
record = CFG_BIBFIELD_READERS[blob_wrapper.master_format](blob_wrapper)
#Update bibfmt for future uses
run_sql("REPLACE INTO bibfmt(id_bibrec, format, last_updated, value) VALUES (%s, 'recjson', NOW(), %s)",
(recid, pickle.dumps((record.rec_json))))
if fields:
chunk = BibFieldDict()
for key in fields:
chunk[key] = record.get(key)
record = chunk
return record
def guess_legacy_field_names(fields, master_format='marc'):
"""
Using the legacy rules written in the config file (@legacy) tries to find
the equivalent json field for one or more legacy fields.
>>> guess_legacy_fields(('100__a', '245'), 'marc')
{'100__a':['authors[0].full_name'], '245':['title']}
"""
from invenio.core.record.definitions import legacy_field_matchings
res = {}
if isinstance(fields, basestring):
fields = (fields, )
for field in fields:
try:
res[field] = legacy_rules[master_format].get(field, [])
except:
res[field] = []
return res
def _build_wrapper(recid):
#TODO: update to look inside mongoDB for the parameters and the blob
# Now is just working for marc and recstruct
try:
master_format = run_sql("SELECT master_format FROM bibrec WHERE id=%s", (recid,))[0][0]
except:
return None
schema = 'recstruct'
if master_format == 'marc':
from invenio.search_engine import get_record as se_get_record
blob = se_get_record(recid)
else:
return None
return BlobWrapper(blob, master_format=master_format, schema=schema)
diff --git a/invenio/legacy/bibformat/adminlib.py b/invenio/legacy/bibformat/adminlib.py
index bd92e5478..156f4ed4f 100644
--- a/invenio/legacy/bibformat/adminlib.py
+++ b/invenio/legacy/bibformat/adminlib.py
@@ -1,1600 +1,1600 @@
# -*- coding: utf-8 -*-
##
## This file is part of Invenio.
## Copyright (C) 2006, 2007, 2008, 2009, 2010, 2011 CERN.
##
## Invenio is free software; you can redistribute it and/or
## modify it under the terms of the GNU General Public License as
## published by the Free Software Foundation; either version 2 of the
## License, or (at your option) any later version.
##
## Invenio is distributed in the hope that it will be useful, but
## WITHOUT ANY WARRANTY; without even the implied warranty of
## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
## General Public License for more details.
##
## You should have received a copy of the GNU General Public License
## along with Invenio; if not, write to the Free Software Foundation, Inc.,
## 59 Temple Place, Suite 330, Boston, MA 02111-1307, USA.
"""
Handle requests from the web interface to configure BibFormat.
"""
__revision__ = "$Id$"
import os
import re
import stat
import time
import cgi
from invenio.config import CFG_SITE_LANG, CFG_SITE_URL, CFG_ETCDIR
from invenio.modules.formatter.config import \
CFG_BIBFORMAT_TEMPLATES_PATH, \
CFG_BIBFORMAT_OUTPUTS_PATH, \
CFG_BIBFORMAT_ELEMENTS_PATH, \
CFG_BIBFORMAT_FORMAT_TEMPLATE_EXTENSION, \
InvenioBibFormatError
from invenio.utils.url import wash_url_argument
-from invenio.errorlib import register_exception
+from invenio.ext.logging import register_exception
from invenio.base.i18n import gettext_set_language, wash_language, language_list_long
from invenio.search_engine import perform_request_search
import invenio.modules.formatter.api as bibformat_dblayer
from invenio.modules.formatter import engine as bibformat_engine
from invenio.utils.text import encode_for_xml
import invenio.template
bibformat_templates = invenio.template.load('bibformat')
def getnavtrail(previous = '', ln=CFG_SITE_LANG):
"""
Get the navtrail
@param previous: suffix of the navtrail
@param ln: language
@return: HTML markup of the navigation trail
"""
previous = wash_url_argument(previous, 'str')
ln = wash_language(ln)
_ = gettext_set_language(ln)
navtrail = '''<a class="navtrail" href="%s/help/admin">%s</a> &gt; <a class="navtrail" href="%s/admin/bibformat/bibformatadmin.py?ln=%s">%s</a> ''' % \
(CFG_SITE_URL, _("Admin Area"), CFG_SITE_URL, ln, _("BibFormat Admin"))
navtrail = navtrail + previous
return navtrail
def perform_request_index(ln=CFG_SITE_LANG, warnings=None, is_admin=False):
"""
Returns the main BibFormat admin page.
@param ln: language
@param is_admin: indicate if user is authorized to use BibFormat
@return: the main admin page
"""
return bibformat_templates.tmpl_admin_index(ln, warnings, is_admin)
def perform_request_format_templates_management(ln=CFG_SITE_LANG, checking=0):
"""
Returns the main management console for format templates
@param ln: language
@param checking: the level of checking (0: basic, 1:extensive (time consuming) )
@return: the main page for format templates management
"""
# Reload in case a format was changed
bibformat_engine.clear_caches()
# Get formats lists of attributes
formats = bibformat_engine.get_format_templates(with_attributes=True)
formats_attrs = []
for filename in formats:
attrs = formats[filename]['attrs']
attrs['filename'] = filename
if filename.endswith('.xsl'):
attrs['name'] += ' (XSL)'
attrs['editable'] = can_write_format_template(filename)
path = CFG_BIBFORMAT_TEMPLATES_PATH + os.sep + filename
try:
attrs['last_mod_date'] = time.ctime(os.stat(path)[stat.ST_MTIME])
except OSError:
# File does not exist. Happens with temporary files
# created by editors.
continue
status = check_format_template(filename, checking)
import string
if len(status) > 1 or (len(status)==1 and status[0].find('Could not read format template named') == -1):
status = '''
<a style="color: rgb(255, 0, 0);"
href="%(siteurl)s/admin/bibformat/bibformatadmin.py/validate_format?ln=%(ln)s&amp;bft=%(bft)s">Not OK</a>
''' % {'siteurl':CFG_SITE_URL,
'ln':ln,
'bft':filename}
else:
status = '<span style="color: rgb(0, 255, 0);">OK</span>'
attrs['status'] = status
formats_attrs.append(attrs)
def sort_by_attr(seq):
"""
Sort 'seq' by attribute name.
@param seq: a list of dictionaries, containing each one key named 'name'
"""
intermed = [ (x['name'].lower(), i, x) for i, x in enumerate(seq)]
intermed.sort()
return [x[-1] for x in intermed]
sorted_format_templates = sort_by_attr(formats_attrs)
return bibformat_templates.tmpl_admin_format_templates_management(ln, sorted_format_templates)
def perform_request_format_template_show(bft, ln=CFG_SITE_LANG, code=None,
ln_for_preview=CFG_SITE_LANG, pattern_for_preview="",
content_type_for_preview="text/html"):
"""
Returns the editor for format templates.
@param ln: language
@param bft: the template to edit
@param code: the code being edited
@param ln_for_preview: the language for the preview (for bfo)
@param pattern_for_preview: the search pattern to be used for the preview (for bfo)
@param content_type_for_preview: content-type to use to serve preview
@return: the main page for formats management
"""
format_template = bibformat_engine.get_format_template(filename=bft, with_attributes=True)
# Either use code being edited, or the original code inside template
if code is None:
code = cgi.escape(format_template['code'])
# Build a default pattern if it is empty
if pattern_for_preview == "":
recIDs = perform_request_search()
if len(recIDs) > 0:
recID = recIDs[0]
pattern_for_preview = "recid:%s" % recID
editable = can_write_format_template(bft)
# Look for all existing content_types
content_types = bibformat_dblayer.get_existing_content_types()
# Add some standard content types if not already there
standard_content_types = ['text/xml', 'application/rss+xml', 'text/plain', 'text/html']
content_types.extend([content_type for content_type in standard_content_types
if content_type not in content_types])
return bibformat_templates.tmpl_admin_format_template_show(ln, format_template['attrs']['name'],
format_template['attrs']['description'],
code, bft,
ln_for_preview=ln_for_preview,
pattern_for_preview=pattern_for_preview,
editable=editable,
content_type_for_preview=content_type_for_preview,
content_types=content_types)
def perform_request_format_template_show_dependencies(bft, ln=CFG_SITE_LANG):
"""
Show the dependencies (on elements) of the given format.
@param ln: language
@param bft: the filename of the template to show
@return: HTML markup
"""
format_template = bibformat_engine.get_format_template(filename=bft, with_attributes=True)
name = format_template['attrs']['name']
output_formats = get_outputs_that_use_template(bft)
format_elements = get_elements_used_by_template(bft)
tags = []
for output_format in output_formats:
for tag in output_format['tags']:
tags.append(tag)
for format_element in format_elements:
for tag in format_element['tags']:
tags.append(tag)
tags.sort()
return bibformat_templates.tmpl_admin_format_template_show_dependencies(ln,
name,
bft,
output_formats,
format_elements,
tags)
def perform_request_format_template_show_attributes(bft, ln=CFG_SITE_LANG, new=False):
"""
Page for template name and descrition attributes edition.
If format template is new, offer the possibility to
make a duplicate of an existing format template.
@param ln: language
@param bft: the template to edit
@param new: if True, the template has just been added (is new)
@return: the main page for format templates attributes edition
"""
all_templates = []
if new:
all_templates_attrs = bibformat_engine.get_format_templates(with_attributes=True)
if all_templates_attrs.has_key(bft): # Sanity check. Should always be true at this stage
del all_templates_attrs[bft] # Remove in order not to make a duplicate of self..
# Sort according to name, inspired from Python Cookbook
def sort_by_name(seq, keys):
"""
Sort the sequence 'seq' by 'keys'
"""
intermed = [(x['attrs']['name'], keys[i], i, x) for i, x in enumerate(seq)]
intermed.sort()
return [(x[1], x[0]) for x in intermed]
all_templates = sort_by_name(all_templates_attrs.values(), all_templates_attrs.keys())
#keys = all_templates_attrs.keys()
#keys.sort()
#all_templates = map(lambda x: (x, all_templates_attrs.get(x)['attrs']['name']), keys)
format_template = bibformat_engine.get_format_template(filename=bft, with_attributes=True)
name = format_template['attrs']['name']
description = format_template['attrs']['description']
editable = can_write_format_template(bft)
return bibformat_templates.tmpl_admin_format_template_show_attributes(ln,
name,
description,
bft,
editable,
all_templates,
new)
def perform_request_format_template_show_short_doc(ln=CFG_SITE_LANG, search_doc_pattern=""):
"""
Returns the format elements documentation to be included inside format templated editor.
Keep only elements that have 'search_doc_pattern' text inside description,
if pattern not empty
@param ln: language
@param search_doc_pattern: a search pattern that specified which elements to display
@return: a brief version of the format element documentation
"""
# Get format elements lists of attributes
elements = bibformat_engine.get_format_elements(with_built_in_params=True)
keys = elements.keys()
keys.sort()
elements = map(elements.get, keys)
def filter_elem(element):
"""Keep element if is string representation contains all keywords of search_doc_pattern,
and if its name does not start with a number (to remove 'garbage' from elements in tags table)"""
if element['type'] != 'python' and \
element['attrs']['name'][0] in ['0', '1', '2', '3', '4', '5', '6', '7', '8', '9']:
return False
text = str(element).upper() # Basic text representation
if search_doc_pattern != "":
for word in search_doc_pattern.split():
if word.upper() != "AND" and text.find(word.upper()) == -1:
return False
return True
elements = filter(filter_elem, elements)
return bibformat_templates.tmpl_admin_format_template_show_short_doc(ln, elements)
def perform_request_format_elements_documentation(ln=CFG_SITE_LANG):
"""
Returns the main management console for format elements.
Includes list of format elements and associated administration tools.
@param ln: language
@return: the main page for format elements management
"""
# Get format elements lists of attributes
elements = bibformat_engine.get_format_elements(with_built_in_params=True)
keys = elements.keys()
keys.sort()
elements = map(elements.get, keys)
# Remove all elements found in table and that begin with a number (to remove 'garbage')
filtered_elements = [element for element in elements \
if element is not None and \
element['type'] == 'python' and \
element['attrs']['name'][0] not in \
['0', '1', '2', '3', '4', '5', '6', '7', '8', '9']]
return bibformat_templates.tmpl_admin_format_elements_documentation(ln, filtered_elements)
def perform_request_format_element_show_dependencies(bfe, ln=CFG_SITE_LANG):
"""
Show the dependencies of the given format.
@param ln: language
@param bfe: the filename of the format element to show
@return: HTML markup of elements dependencies page
"""
format_templates = get_templates_that_use_element(bfe)
tags = get_tags_used_by_element(bfe)
return bibformat_templates.tmpl_admin_format_element_show_dependencies(ln,
bfe,
format_templates,
tags)
def perform_request_format_element_test(bfe, ln=CFG_SITE_LANG, param_values=None, user_info=None):
"""
Show the dependencies of the given format.
'param_values' is the list of values to pass to 'format'
function of the element as parameters, in the order ...
If params is None, this means that they have not be defined by user yet.
@param ln: language
@param bfe: the name of the format element to show
@param param_values: the list of parameters to pass to element format function
@param user_info: the user_info of this request
@return: HTML markup of elements test page
"""
_ = gettext_set_language(ln)
format_element = bibformat_engine.get_format_element(bfe, with_built_in_params=True)
# Load parameter names and description
##
param_names = []
param_descriptions = []
# First value is a search pattern to choose the record
param_names.append(_("Test with record:")) # Caution: keep in sync with same text below
param_descriptions.append(_("Enter a search query here."))
# Parameters defined in this element
for param in format_element['attrs']['params']:
param_names.append(param['name'])
param_descriptions.append(param['description'])
# Parameters common to all elements of a kind
for param in format_element['attrs']['builtin_params']:
param_names.append(param['name'])
param_descriptions.append(param['description'])
# Load parameters values
##
if param_values is None: #First time the page is loaded
param_values = []
# Propose an existing record id by default
recIDs = perform_request_search()
if len(recIDs) > 0:
recID = recIDs[0]
param_values.append("recid:%s" % recID)
# Default values defined in this element
for param in format_element['attrs']['params']:
param_values.append(param['default'])
#Parameters common to all elements of a kind
for param in format_element['attrs']['builtin_params']:
param_values.append(param['default'])
# Execute element with parameters
##
params = dict(zip(param_names, param_values))
# Find a record corresponding to search pattern
search_pattern = params[_("Test with record:")] # Caution keep in sync with same text above and below
recIDs = perform_request_search(p=search_pattern)
del params[_("Test with record:")] # Caution keep in sync with same text above
if len(recIDs) > 0:
bfo = bibformat_engine.BibFormatObject(recID = recIDs[0],
ln = ln,
search_pattern = search_pattern.split(' '),
xml_record = None,
user_info = user_info)
(result, dummy) = bibformat_engine.eval_format_element(format_element, bfo, params)
else:
try:
raise InvenioBibFormatError(_('No Record Found for %s.') % search_pattern)
except InvenioBibFormatError, exc:
register_exception()
result = exc.message
return bibformat_templates.tmpl_admin_format_element_test(ln,
bfe,
format_element['attrs']['description'],
param_names,
param_values,
param_descriptions,
result)
def perform_request_output_formats_management(ln=CFG_SITE_LANG, sortby="code"):
"""
Returns the main management console for output formats.
Includes list of output formats and associated administration tools.
@param ln: language
@param sortby: the sorting crieteria (can be 'code' or 'name')
@return: the main page for output formats management
"""
# Reload in case a format was changed
bibformat_engine.clear_caches()
# Get output formats lists of attributes
output_formats_list = bibformat_engine.get_output_formats(with_attributes=True)
output_formats = {}
for filename in output_formats_list:
output_format = output_formats_list[filename]
code = output_format['attrs']['code']
path = CFG_BIBFORMAT_OUTPUTS_PATH + os.sep + filename
output_format['editable'] = can_write_output_format(code)
try:
output_format['last_mod_date'] = time.ctime(os.stat(path)[stat.ST_MTIME])
except OSError:
# File does not exist. Happens with temporary files
# created by editors.
continue
# Validate the output format
status = check_output_format(code)
# If there is an error but the error is just 'format is not writable', do not display as error
if len(status) > 1 or (len(status)==1 and status[0].find('BibFormat could not write to output format') == -1):
status = '''
<a style="color: rgb(255, 0, 0);"
href="%(siteurl)s/admin/bibformat/bibformatadmin.py/validate_format?ln=%(ln)s&bfo=%(bfo)s">Not OK</a>
''' % {'siteurl':CFG_SITE_URL,
'ln':ln,
'bfo':code}
else:
status = '<span style="color: rgb(0, 255, 0);">OK</span>'
output_format['status'] = status
output_formats[filename] = output_format
# Sort according to code or name, inspired from Python Cookbook
def get_attr(dic, attr):
"""
Returns the value given by 'attr' in the dictionary 'dic', representing
an output format attributes.
If attr is equal to 'code', returns the code attribute of the dictionary.
Else returns the generic name
@param dic: a dictionary of the attribute of an output format, as returned by bibformat_engine.get_output_format
@param the: attribute we want to fetch. Either 'code' or any other string
"""
if attr == "code":
return dic['attrs']['code']
else:
return dic['attrs']['names']['generic']
def sort_by_attr(seq, attr):
"""
Sort dictionaries given in 'seq' according to parameter 'attr'
"""
intermed = [ (get_attr(x, attr), i, x) for i, x in enumerate(seq)]
intermed.sort()
return [x[-1] for x in intermed]
if sortby != "code" and sortby != "name":
sortby = "code"
sorted_output_formats = sort_by_attr(output_formats.values(), sortby)
return bibformat_templates.tmpl_admin_output_formats_management(ln, sorted_output_formats)
def perform_request_output_format_show(bfo, ln=CFG_SITE_LANG, r_fld=[], r_val=[], r_tpl=[], default="", r_upd="", args={}):
"""
Returns the editing tools for a given output format.
The page either shows the output format from file, or from user's
POST session, as we want to let him edit the rules without
saving. Policy is: r_fld, r_val, rules_tpl are list of attributes
of the rules. If they are empty, load from file. Else use
POST. The i th value of each list is one of the attributes of rule
i. Rule i is the i th rule in order of evaluation. All list have
the same number of item.
r_upd contains an action that has to be performed on rules. It
can composed of a number (i, the rule we want to modify) and an
operator : "save" to save the rules, "add" or "del".
syntax: operator [number]
For eg: r_upd = _("Save Changes") saves all rules (no int should be specified).
For eg: r_upd = _("Add New Rule") adds a rule (no int should be specified).
For eg: r_upd = _("Remove Rule") + " 5" deletes rule at position 5.
The number is used only for operation delete.
An action can also be in **args. We must look there for string starting
with '(+|-) [number]' to increase (+) or decrease (-) a rule given by its
index (number).
For example "+ 5" increase priority of rule 5 (put it at fourth position).
The string in **args can be followed by some garbage that looks like .x
or .y, as this is returned as the coordinate of the click on the
<input type="image">. We HAVE to use args and reason on its keys, because for <input> of
type image, iexplorer does not return the value of the tag, but only the name.
Action is executed only if we are working from user's POST session
(means we must have loaded the output format first, which is
totally normal and expected behaviour)
IMPORTANT: we display rules evaluation index starting at 1 in
interface, but we start internally at 0
@param ln: language
@param bfo: the filename of the output format to show
@param r_fld: the list of 'field' attribute for each rule
@param r_val: the list of 'value' attribute for each rule
@param r_tpl: the list of 'template' attribute for each rule
@param default: the default format template used by this output format
@param r_upd: the rule that we want to increase/decrease in order of evaluation
@param args: additional parameters to move rules. See above
@return: HTML markuo for editing tools of a given output format.
"""
output_format = bibformat_engine.get_output_format(bfo, with_attributes=True)
format_templates = bibformat_engine.get_format_templates(with_attributes=True)
name = output_format['attrs']['names']['generic']
rules = []
debug = ""
if len(r_fld) == 0 and r_upd=="":
# Retrieve rules from file
rules = output_format['rules']
default = output_format['default']
else:
# Retrieve rules from given lists
# Transform a single rule (not considered as a list with length
# 1 by the templating system) into a list
if not isinstance(r_fld, list):
r_fld = [r_fld]
r_val = [r_val]
r_tpl = [r_tpl]
for i in range(len(r_fld)):
rule = {'field': r_fld[i],
'value': r_val[i],
'template': r_tpl[i]}
rules.append(rule)
# Execute action
_ = gettext_set_language(ln)
if r_upd.startswith(_("Remove Rule")):
# Remove rule
index = int(r_upd.split(" ")[-1]) -1
del rules[index]
elif r_upd.startswith(_("Save Changes")):
# Save
update_output_format_rules(bfo, rules, default)
elif r_upd.startswith(_("Add New Rule")):
# Add new rule
rule = {'field': "",
'value': "",
'template': ""}
rules.append(rule)
else:
# Get the action in 'args'
# The action must be constructed from string of the kind:
# + 5 or - 4 or + 5.x or -4.y
for button_val in args.keys():#for all elements of form not handled yet
action = button_val.split(" ")
if action[0] == '-' or action[0] == '+':
index = int(action[1].split(".")[0]) -1
if action[0] == '-':
# Decrease priority
rule = rules[index]
del rules[index]
rules.insert(index + 1, rule)
# debug = 'Decrease rule '+ str(index)
break
elif action[0] == '+':
# Increase priority
rule = rules[index]
del rules[index]
rules.insert(index - 1, rule)
# debug = 'Increase rule ' + str(index)
break
editable = can_write_output_format(bfo)
return bibformat_templates.tmpl_admin_output_format_show(ln,
bfo,
name,
rules,
default,
format_templates,
editable)
def perform_request_output_format_show_dependencies(bfo, ln=CFG_SITE_LANG):
"""
Show the dependencies of the given format.
@param ln: language
@param bfo: the filename of the output format to show
@return: HTML markup of the output format dependencies pages
"""
output_format = bibformat_engine.get_output_format(code=bfo, with_attributes=True)
name = output_format['attrs']['names']['generic']
format_templates = get_templates_used_by_output(bfo)
return bibformat_templates.tmpl_admin_output_format_show_dependencies(ln,
name,
bfo,
format_templates)
def perform_request_output_format_show_attributes(bfo, ln=CFG_SITE_LANG):
"""
Page for output format names and description attributes edition.
@param ln: language
@param bfo: filename of output format to edit
@return: the main page for output format attributes edition
"""
output_format = bibformat_engine.get_output_format(code=bfo, with_attributes=True)
name = output_format['attrs']['names']['generic']
description = output_format['attrs']['description']
content_type = output_format['attrs']['content_type']
visible = output_format['attrs']['visibility']
# Get translated names. Limit to long names now.
# Translation are given in order of languages in language_list_long()
names_trans = []
for lang in language_list_long():
name_trans = output_format['attrs']['names']['ln'].get(lang[0], "")
names_trans.append({'lang':lang[1], 'trans':name_trans})
editable = can_write_output_format(bfo)
return bibformat_templates.tmpl_admin_output_format_show_attributes(ln,
name,
description,
content_type,
bfo,
names_trans,
editable,
visible)
def add_format_template():
"""
Adds a new format template (mainly create file with unique name)
@return: the filename of the created format
"""
(filename, name) = bibformat_engine.get_fresh_format_template_filename("Untitled")
out = ""
if not filename.endswith(".xsl"):
out = '<name>%(name)s</name><description></description>' % {'name': name}
path = CFG_BIBFORMAT_TEMPLATES_PATH + os.sep + filename
format = open(path, 'w')
format.write(out)
format.close
return filename
def delete_format_template(filename):
"""
Delete a format template given by its filename
If format template is not writable, do not remove
@param filename: the format template filename
@return: None
"""
if not can_write_format_template(filename):
return
path = CFG_BIBFORMAT_TEMPLATES_PATH + os.sep + filename
os.remove(path)
bibformat_engine.clear_caches()
def update_format_template_code(filename, code=""):
"""
Saves code inside template given by filename
@param filename: filename of the template to edit
@param code: content of the template
@return: None
"""
format_template = bibformat_engine.get_format_template_attrs(filename)
name = format_template['name']
description = format_template['description']
code = re.sub("\r\n", "\n", code)
out = ""
if not filename.endswith(".xsl"):
out = """<name>%(name)s</name>
<description>%(description)s</description>
""" % {'name': name, 'description': description}
out += code
path = CFG_BIBFORMAT_TEMPLATES_PATH + os.sep + filename
format = open(path, 'w')
format.write(out)
format.close
bibformat_engine.clear_caches()
def update_format_template_attributes(filename, name="", description="", duplicate=None):
"""
Saves name and description inside template given by filename.
the filename must change according to name, and every output format
having reference to filename must be updated.
If name already exist, use fresh filename (we never overwrite other templates) amd
remove old one.
if duplicate is different from None and is not empty string, then it means that we must copy
the code of the template whoose filename is given in 'duplicate' for the code
of our template.
@param filename: filename of the template to update
@param name: name to use for the template
@param description: description to use for the template
@param duplicate: the filename of a template that we want to copy
@return: the filename of the modified format
"""
if filename.endswith('.'+CFG_BIBFORMAT_FORMAT_TEMPLATE_EXTENSION):
format_template = bibformat_engine.get_format_template(filename, with_attributes=True)
if duplicate is not None and duplicate != "":
format_template_to_copy = bibformat_engine.get_format_template(duplicate)
code = format_template_to_copy['code']
else:
code = format_template['code']
if format_template['attrs']['name'] != name:
# Name has changed, so update filename
old_filename = filename
old_path = CFG_BIBFORMAT_TEMPLATES_PATH + os.sep + old_filename
# Remove old one
os.remove(old_path)
(filename, name) = bibformat_engine.get_fresh_format_template_filename(name)
# Change output formats that calls this template
output_formats = bibformat_engine.get_output_formats()
for output_format_filename in output_formats:
if can_read_output_format(output_format_filename) and can_write_output_format(output_format_filename):
output_path = CFG_BIBFORMAT_OUTPUTS_PATH + os.sep + output_format_filename
format = open(output_path, 'r')
output_text = format.read()
format.close
output_pattern = re.compile("---(\s)*" + old_filename, re.IGNORECASE)
mod_output_text = output_pattern.sub("--- " + filename, output_text)
if output_text != mod_output_text:
format = open(output_path, 'w')
format.write(mod_output_text)
format.close
description = cgi.escape(description)
name = cgi.escape(name)
# Write updated format template
out = ""
if not filename.endswith(".xsl"):
out = """<name>%(name)s</name><description>%(description)s</description>""" % {'name': name,
'description': description,}
out += code
path = CFG_BIBFORMAT_TEMPLATES_PATH + os.sep + filename
format = open(path, 'w')
format.write(out)
format.close
bibformat_engine.clear_caches()
return filename
def add_output_format():
"""
Adds a new output format (mainly create file with unique name)
@return: the code of the created format, or None if it could not be created
"""
if not os.access(CFG_BIBFORMAT_OUTPUTS_PATH, os.W_OK):
return None
(filename, code) = bibformat_engine.get_fresh_output_format_filename("UNTLD")
# Add entry in database
bibformat_dblayer.add_output_format(code)
bibformat_dblayer.set_output_format_name(code, "Untitled", lang="generic")
bibformat_dblayer.set_output_format_content_type(code, "text/html")
# Add file
out = ""
path = CFG_BIBFORMAT_OUTPUTS_PATH + os.sep + filename
format = open(path, 'w')
format.write(out)
format.close
return code
def delete_output_format(code):
"""
Delete a format template given by its code
if file is not writable, don't remove
@param code: the 6 letters code of the output format to remove
@return: None
"""
if not can_write_output_format(code):
return
# Remove entry from database
bibformat_dblayer.remove_output_format(code)
# Remove file
filename = bibformat_engine.resolve_output_format_filename(code)
path = CFG_BIBFORMAT_OUTPUTS_PATH + os.sep + filename
os.remove(path)
bibformat_engine.clear_caches()
def update_output_format_rules(code, rules=[], default=""):
"""
Saves rules inside output format given by code
@param code: the code of the output format to update
@param rules: the rules to apply for the output format
@param default: the default template when no rule match
@return: None
"""
# Generate output format syntax
# Try to group rules by field
previous_field = ""
out = ""
for rule in rules:
field = rule["field"]
value = rule["value"]
template = rule["template"]
if previous_field != field:
out += "tag %s:\n" % field
out +="%(value)s --- %(template)s\n" % {'value':value, 'template':template}
previous_field = field
out += "default: %s" % default
filename = bibformat_engine.resolve_output_format_filename(code)
path = CFG_BIBFORMAT_OUTPUTS_PATH + os.sep + filename
format = open(path, 'w')
format.write(out)
format.close
bibformat_engine.clear_caches()
def update_output_format_attributes(code, name="", description="", new_code="",
content_type="", names_trans=[], visibility=1):
"""
Saves name and description inside output format given by filename.
If new_code already exist, use fresh code (we never overwrite other output).
@param description: the new description
@param name: the new name
@param new_code: the new short code (== new bfo) of the output format
@param code: the code of the output format to update
@param names_trans: the translations in the same order as the languages from get_languages()
@param content_type: the new content_type of the output format
@param visibility: the visibility of the output format in the output formats list (public pages)
@return: the filename of the modified format
"""
bibformat_dblayer.set_output_format_description(code, description)
bibformat_dblayer.set_output_format_content_type(code, content_type)
bibformat_dblayer.set_output_format_visibility(code, visibility)
bibformat_dblayer.set_output_format_name(code, name, lang="generic")
i = 0
for lang in language_list_long():
if names_trans[i] != "":
bibformat_dblayer.set_output_format_name(code, names_trans[i], lang[0])
i += 1
new_code = new_code.upper()
if code != new_code:
# If code has changed, we must update filename with a new unique code
old_filename = bibformat_engine.resolve_output_format_filename(code)
old_path = CFG_BIBFORMAT_OUTPUTS_PATH + os.sep + old_filename
(new_filename, new_code) = bibformat_engine.get_fresh_output_format_filename(new_code)
new_path = CFG_BIBFORMAT_OUTPUTS_PATH + os.sep + new_filename
os.rename(old_path, new_path)
bibformat_dblayer.change_output_format_code(code, new_code)
bibformat_engine.clear_caches()
return new_code
def can_read_format_template(filename):
"""
Returns 0 if we have read permission on given format template, else
returns other integer
@param filename: name of a format template
@return: True if template X{bft} can be read or not
"""
path = "%s%s%s" % (CFG_BIBFORMAT_TEMPLATES_PATH, os.sep, filename)
return os.access(path, os.R_OK)
def can_read_output_format(bfo):
"""
Returns 0 if we have read permission on given output format, else
returns other integer
@param bfo: name of an output format
@return: True if output format X{bfo} can be read or not
"""
filename = bibformat_engine.resolve_output_format_filename(bfo)
path = "%s%s%s" % (CFG_BIBFORMAT_OUTPUTS_PATH, os.sep, filename)
return os.access(path, os.R_OK)
def can_read_format_element(name):
"""
Returns 0 if we have read permission on given format element, else
returns other integer
@param name: name of a format element
@return: True if element X{name} can be read or not
"""
filename = bibformat_engine.resolve_format_element_filename(name)
path = "%s%s%s" % (CFG_BIBFORMAT_ELEMENTS_PATH, os.sep, filename)
return os.access(path, os.R_OK)
def can_write_format_template(bft):
"""
Returns 0 if we have write permission on given format template, else
returns other integer
@param bft: name of a format template
@return: True if template X{bft} can be edited or not
"""
if not can_read_format_template(bft):
return False
path = "%s%s%s" % (CFG_BIBFORMAT_TEMPLATES_PATH, os.sep, bft)
return os.access(path, os.W_OK)
def can_write_output_format(bfo):
"""
Returns 0 if we have write permission on given output format, else
returns other integer
@param bfo: name of an output format
@return: True if output format X{bfo} can be edited or not
"""
if not can_read_output_format(bfo):
return False
filename = bibformat_engine.resolve_output_format_filename(bfo)
path = "%s%s%s" % (CFG_BIBFORMAT_OUTPUTS_PATH, os.sep, filename)
return os.access(path, os.W_OK)
def can_write_etc_bibformat_dir():
"""
Returns true if we can write in etc/bibformat dir.
@return: True if can write, or False
"""
path = "%s%sbibformat" % (CFG_ETCDIR, os.sep)
return os.access(path, os.W_OK)
def get_outputs_that_use_template(filename):
"""
Returns a list of output formats that call the given format template.
The returned output formats also give their dependencies on tags.
We don't return the complete output formats but some reference to
them (filename + names)::
[ {'filename':"filename_1.bfo"
'names': {'en':"a name", 'fr': "un nom", 'generic':"a name"}
'tags': ['710__a', '920__']
},
...
]
@param filename: a format template filename
@return: output formats references sorted by (generic) name
"""
output_formats_list = {}
tags = []
output_formats = bibformat_engine.get_output_formats(with_attributes=True)
for output_format in output_formats:
name = output_formats[output_format]['attrs']['names']['generic']
# First look at default template, and add it if necessary
if output_formats[output_format]['default'] == filename:
output_formats_list[name] = {'filename':output_format,
'names':output_formats[output_format]['attrs']['names'],
'tags':[]}
# Second look at each rule
found = False
for rule in output_formats[output_format]['rules']:
if rule['template'] == filename:
found = True
tags.append(rule['field']) #Also build dependencies on tags
# Finally add dependency on template from rule (overwrite default dependency,
# which is weaker in term of tag)
if found:
output_formats_list[name] = {'filename':output_format,
'names':output_formats[output_format]['attrs']['names'],
'tags':tags}
keys = output_formats_list.keys()
keys.sort()
return map(output_formats_list.get, keys)
def get_elements_used_by_template(filename):
"""
Returns a list of format elements that are called by the given format template.
The returned elements also give their dependencies on tags.
Dependencies on tag might be approximative. See get_tags_used_by_element()
doc string.
We must handle usage of bfe_field in a special way if we want to retrieve
used tag: used tag is given in "tag" parameter, not inside element code.
The list is returned sorted by name::
[ {'filename':"filename_1.py"
'name':"filename_1"
'tags': ['710__a', '920__']
},
...
]
@param filename: a format template filename
@return: elements sorted by name
"""
format_elements = {}
format_template = bibformat_engine.get_format_template(filename=filename, with_attributes=True)
code = format_template['code']
format_elements_iter = bibformat_engine.pattern_tag.finditer(code)
for result in format_elements_iter:
function_name = result.group("function_name").lower()
if function_name is not None and not format_elements.has_key(function_name) \
and not function_name == "field":
filename = bibformat_engine.resolve_format_element_filename("BFE_"+function_name)
if filename is not None:
tags = get_tags_used_by_element(filename)
format_elements[function_name] = {'name':function_name.lower(),
'filename':filename,
'tags':tags}
elif function_name == "field":
# Handle bfe_field element in a special way
if not format_elements.has_key(function_name):
#Indicate usage of bfe_field if not already done
filename = bibformat_engine.resolve_format_element_filename("BFE_"+function_name)
format_elements[function_name] = {'name':function_name.lower(),
'filename':filename,
'tags':[]}
# Retrieve value of parameter "tag"
all_params = result.group('params')
function_params_iterator = bibformat_engine.pattern_function_params.finditer(all_params)
for param_match in function_params_iterator:
name = param_match.group('param')
if name == "tag":
value = param_match.group('value')
if not value in format_elements[function_name]['tags']:
format_elements[function_name]['tags'].append(value)
break
keys = format_elements.keys()
keys.sort()
return map(format_elements.get, keys)
# Format Elements Dependencies
##
def get_tags_used_by_element(filename):
"""
Returns a list of tags used by given format element
APPROXIMATIVE RESULTS: the tag are retrieved in field(), fields()
and control_field() function. If they are used computed, or saved
in a variable somewhere else, they are not retrieved
@TODO: There is room for improvements. For example catch
call to BibRecord functions.
@param filename: a format element filename
@return: tags sorted by value
"""
tags = {}
format_element = bibformat_engine.get_format_element(filename)
if format_element is None:
return []
elif format_element['type']=="field":
tags = format_element['attrs']['tags']
return tags
filename = bibformat_engine.resolve_format_element_filename(filename)
path = CFG_BIBFORMAT_ELEMENTS_PATH + os.sep + filename
format = open(path, 'r')
code = format.read()
format.close
tags_pattern = re.compile('''
(field|fields|control_field)\s* #Function call
\(\s* #Opening parenthesis
[\'"]+ #Single or double quote
(?P<tag>.+?) #Tag
[\'"]+\s* #Single or double quote
(,[^\)]+)* #Additional function param
\) #Closing parenthesis
''', re.VERBOSE | re.MULTILINE)
tags_iter = tags_pattern.finditer(code)
for result in tags_iter:
tags[result.group("tag")] = result.group("tag")
return tags.values()
def get_templates_that_use_element(name):
"""
Returns a list of format templates that call the given format element.
The returned format templates also give their dependencies on tags::
[ {'filename':"filename_1.bft"
'name': "a name"
'tags': ['710__a', '920__']
},
...
]
@param name: a format element name
@return: templates sorted by name
"""
format_templates = {}
tags = []
files = os.listdir(CFG_BIBFORMAT_TEMPLATES_PATH) #Retrieve all templates
for possible_template in files:
if possible_template.endswith(CFG_BIBFORMAT_FORMAT_TEMPLATE_EXTENSION):
format_elements = get_elements_used_by_template(possible_template) #Look for elements used in template
format_elements = map(lambda x: x['name'].lower(), format_elements)
try: #Look for element
format_elements.index(name.lower()) #If not found, get out of "try" statement
format_template = bibformat_engine.get_format_template(filename=possible_template, with_attributes=True)
template_name = format_template['attrs']['name']
format_templates[template_name] = {'name':template_name,
'filename':possible_template}
except:
pass
keys = format_templates.keys()
keys.sort()
return map(format_templates.get, keys)
# Output Formats Dependencies
##
def get_templates_used_by_output(code):
"""
Returns a list of templates used inside an output format give by its code
The returned format templates also give their dependencies on elements and tags::
[ {'filename':"filename_1.bft"
'name': "a name"
'elements': [{'filename':"filename_1.py", 'name':"filename_1", 'tags': ['710__a', '920__']
}, ...]
},
...
]
@param code: outpout format code
@return: templates sorted by name
"""
format_templates = {}
output_format = bibformat_engine.get_output_format(code, with_attributes=True)
filenames = map(lambda x: x['template'], output_format['rules'])
if output_format['default'] != "":
filenames.append(output_format['default'])
for filename in filenames:
template = bibformat_engine.get_format_template(filename, with_attributes=True)
name = template['attrs']['name']
elements = get_elements_used_by_template(filename)
format_templates[name] = {'name':name,
'filename':filename,
'elements':elements}
keys = format_templates.keys()
keys.sort()
return map(format_templates.get, keys)
# Validation tools
##
def perform_request_format_validate(ln=CFG_SITE_LANG, bfo=None, bft=None, bfe=None):
"""
Returns a page showing the status of an output format or format
template or format element. This page is called from output
formats management page or format template management page or
format elements documentation.
The page only shows the status of one of the format, depending on
the specified one. If multiple are specified, shows the first one.
@param ln: language
@param bfo: an output format 6 chars code
@param bft: a format element filename
@param bfe: a format element name
@return: HTML markup
"""
if bfo is not None:
messages = check_output_format(bfo)
elif bft is not None:
messages = check_format_template(bft, checking=1)
elif bfe is not None:
messages = check_format_element(bfe)
if messages is None:
messages = []
messages = [encode_for_xml(message) for message in messages]
return bibformat_templates.tmpl_admin_validate_format(ln, messages)
def check_output_format(code):
"""
Returns the list of errors in the output format given by code
The errors are the formatted errors defined in bibformat_config.py file.
@param code: the 6 chars code of the output format to check
@return: a list of errors
"""
_ = gettext_set_language(CFG_SITE_LANG)
errors = []
filename = bibformat_engine.resolve_output_format_filename(code)
if can_read_output_format(code):
path = CFG_BIBFORMAT_OUTPUTS_PATH + os.sep + filename
format = open(path)
current_tag = ''
i = 0
for line in format:
i += 1
if line.strip() == "":
# Ignore blank lines
continue
clean_line = line.rstrip("\n\r ") #remove spaces and eol
if line.strip().endswith(":") or (line.strip().lower().startswith("tag") and line.find('---') == -1):
# Check tag
if not clean_line.endswith(":"):
# Column misses at the end of line
try:
raise InvenioBibFormatError(_('Tag specification "%s" must end with column ":" at line %s.') % (line, i))
except InvenioBibFormatError, exc:
register_exception()
errors.append(exc.message)
if not clean_line.lower().startswith("tag"):
# Tag keyword is missing
try:
raise InvenioBibFormatError(_('Tag specification "%s" must start with "tag" at line %s.') % (line, i))
except InvenioBibFormatError, exc:
register_exception()
errors.append(exc.message)
elif not clean_line.startswith("tag"):
# Tag was not lower case
try:
raise InvenioBibFormatError(_('"tag" must be lowercase in "%s" at line %s.') % (line, i))
except InvenioBibFormatError, exc:
register_exception()
errors.append(exc.message)
clean_line = clean_line.rstrip(": ") #remove : and spaces at the end of line
current_tag = "".join(clean_line.split()[1:]).strip() #the tag starts at second position
if len(clean_line.split()) > 2: #We should only have 'tag' keyword and tag
try:
raise InvenioBibFormatError(_('Should be "tag field_number:" at line %s.') % i)
except InvenioBibFormatError, exc:
register_exception()
errors.append(exc.message)
else:
if len(check_tag(current_tag)) > 0:
# Invalid tag
try:
raise InvenioBibFormatError(_('Invalid tag "%s" at line %s.') % (current_tag, i))
except InvenioBibFormatError, exc:
register_exception()
errors.append(exc.message)
if not clean_line.startswith("tag"):
try:
raise InvenioBibFormatError(_('Should be "tag field_number:" at line %s.') % i)
except InvenioBibFormatError, exc:
register_exception()
errors.append(exc.message)
elif line.find('---') != -1:
# Check condition
if current_tag == "":
try:
raise InvenioBibFormatError(_('Condition "%s" is outside a tag specification at line %s.') % (line, i))
except InvenioBibFormatError, exc:
register_exception()
errors.append(exc.message)
words = line.split('---')
if len(words) != 2:
try:
raise InvenioBibFormatError(_('Condition "%s" can only have a single separator --- at line %s.') % (line, i))
except InvenioBibFormatError, exc:
register_exception()
errors.append(exc.message)
template = words[-1].strip()
path = CFG_BIBFORMAT_TEMPLATES_PATH + os.sep + template
if not os.path.exists(path):
try:
raise InvenioBibFormatError(_('Template "%s" does not exist at line %s.') % (template, i))
except InvenioBibFormatError, exc:
register_exception()
errors.append(exc.message)
elif line.find(':') != -1 or (line.strip().lower().startswith("default") and line.find('---') == -1):
# Check default template
clean_line = line.strip()
if line.find(':') == -1:
# Column misses after default
try:
raise InvenioBibFormatError(_('Missing column ":" after "default" in "%s" at line %s.') % (line, i))
except InvenioBibFormatError, exc:
register_exception()
errors.append(exc.message)
if not clean_line.startswith("default"):
# Default keyword is missing
try:
raise InvenioBibFormatError(_('Default template specification "%s" must start with "default :" at line %s.') % (line, i))
except InvenioBibFormatError, exc:
register_exception()
errors.append(exc.message)
if not clean_line.startswith("default"):
# Default was not lower case
try:
raise InvenioBibFormatError(_('"default" keyword must be lowercase in "%s" at line %s.') % (line, i))
except InvenioBibFormatError, exc:
register_exception()
errors.append(exc.message)
default = "".join(line.split(':')[1]).strip()
path = CFG_BIBFORMAT_TEMPLATES_PATH + os.sep + default
if not os.path.exists(path):
try:
raise InvenioBibFormatError(_('Template "%s" does not exist at line %s.') % (default, i))
except InvenioBibFormatError, exc:
register_exception()
errors.append(exc.message)
else:
# Check others
try:
raise InvenioBibFormatError(_('Line %s could not be understood at line %s.') % (line, i))
except InvenioBibFormatError, exc:
register_exception()
errors.append(exc.message)
else:
try:
raise InvenioBibFormatError(_('Output format %s cannot not be read. %s') % (filename, ""))
except InvenioBibFormatError, exc:
register_exception()
errors.append(exc.message)
return errors
def check_format_template(filename, checking=0):
"""
Returns the list of errors in the format template given by its filename
The errors are the formatted errors defined in bibformat_config.py file.
@param filename: the filename of the format template to check
@param checking: the level of checking (0:basic, >=1 extensive (time-consuming))
@return: a list of errors
"""
errors = []
_ = gettext_set_language(CFG_SITE_LANG)
if can_read_format_template(filename):#Can template be read?
if filename.endswith('.'+CFG_BIBFORMAT_FORMAT_TEMPLATE_EXTENSION):
#format_template = bibformat_engine.get_format_template(filename, with_attributes=True)
format = open("%s%s%s" % (CFG_BIBFORMAT_TEMPLATES_PATH, os.sep, filename))
code = format.read()
format.close()
# Look for name
match = bibformat_engine.pattern_format_template_name.search(code)
if match is None:#Is tag <name> defined in template?
try:
raise InvenioBibFormatError(_('Could not find a name specified in tag "<name>" inside format template %s.') % filename)
except InvenioBibFormatError, exc:
register_exception()
errors.append(exc.message)
# Look for description
match = bibformat_engine.pattern_format_template_desc.search(code)
if match is None:#Is tag <description> defined in template?
try:
raise InvenioBibFormatError(_('Could not find a description specified in tag "<description>" inside format template %s.') % filename)
except InvenioBibFormatError, exc:
register_exception()
errors.append(exc.message)
format_template = bibformat_engine.get_format_template(filename, with_attributes=False)
code = format_template['code']
# Look for calls to format elements
# Check existence of elements and attributes used in call
elements_call = bibformat_engine.pattern_tag.finditer(code)
for element_match in elements_call:
element_name = element_match.group("function_name")
filename = bibformat_engine.resolve_format_element_filename(element_name)
if filename is None and not bibformat_dblayer.tag_exists_for_name(element_name): #Is element defined?
try:
raise InvenioBibFormatError(_('Format template %s calls undefined element "%s".') % (filename, element_name))
except InvenioBibFormatError, exc:
register_exception()
errors.append(exc.message)
else:
format_element = bibformat_engine.get_format_element(element_name, with_built_in_params=True)
if format_element is None:#Can element be loaded?
if not can_read_format_element(element_name):
try:
raise InvenioBibFormatError(_('Format template %s calls unreadable element "%s". Check element file permissions.') % (filename, element_name))
except InvenioBibFormatError, exc:
register_exception()
errors.append(exc.message)
else:
try:
raise InvenioBibFormatError(_('Cannot load element "%s" in template %s. Check element code.') % (element_name, filename))
except InvenioBibFormatError, exc:
register_exception()
errors.append(exc.message)
else:
# Are the parameters used defined in element?
params_call = bibformat_engine.pattern_function_params.finditer(element_match.group())
all_params = {}
for param_match in params_call:
param = param_match.group("param")
value = param_match.group("value")
all_params[param] = value
allowed_params = []
# Built-in params
for allowed_param in format_element['attrs']['builtin_params']:
allowed_params.append(allowed_param['name'])
# Params defined in element
for allowed_param in format_element['attrs']['params']:
allowed_params.append(allowed_param['name'])
if not param in allowed_params:
try:
raise InvenioBibFormatError(_('Format element %s uses unknown parameter "%s" in format template %s.') % (element_name, param, filename))
except InvenioBibFormatError, exc:
register_exception()
errors.append(exc.message)
# The following code is too much time consuming. Only do where really requested
if checking > 0:
# Try to evaluate, with any object and pattern
recIDs = perform_request_search()
if len(recIDs) > 0:
recID = recIDs[0]
bfo = bibformat_engine.BibFormatObject(recID, search_pattern="Test")
(result, errors_) = bibformat_engine.eval_format_element(format_element, bfo, all_params, verbose=7)
errors.extend(errors_)
else:# Template cannot be read
try:
raise InvenioBibFormatError(_('Could not read format template named %s. %s') % (filename, ""))
except InvenioBibFormatError, exc:
register_exception()
errors.append(exc.message)
return errors
def check_format_element(name):
"""
Returns the list of errors in the format element given by its name
The errors are the formatted errors defined in bibformat_config.py file.
@param name: the name of the format element to check
@return: a list of errors
"""
errors = []
_ = gettext_set_language(CFG_SITE_LANG)
filename = bibformat_engine.resolve_format_element_filename(name)
if filename is not None:#Can element be found in files?
if can_read_format_element(name):#Can element be read?
# Try to load
try:
module_name = filename
if module_name.endswith(".py"):
module_name = module_name[:-3]
module = __import__("invenio.modules.formatter.format_elements."+module_name)
try:
function_format = module.bibformat_elements.__dict__[module_name].format_element
except AttributeError, e:
function_format = module.bibformat_elements.__dict__[module_name].format
# Try to evaluate, with any object and pattern
recIDs = perform_request_search()
if len(recIDs) > 0:
recID = recIDs[0]
bfo = bibformat_engine.BibFormatObject(recID, search_pattern="Test")
element = bibformat_engine.get_format_element(name)
(result, errors_) = bibformat_engine.eval_format_element(element, bfo, verbose=7)
errors.extend(errors_)
except Exception, e:
try:
raise InvenioBibFormatError(_('Error in format element %s. %s.') % (name, e))
except InvenioBibFormatError, exc:
register_exception()
errors.append(exc.message)
else:
try:
raise InvenioBibFormatError(_('Format element %s cannot not be read. %s') % (filename, ""))
except InvenioBibFormatError, exc:
register_exception()
errors.append(exc.message)
elif bibformat_dblayer.tag_exists_for_name(name):#Can element be found in database?
pass
else:
try:
raise InvenioBibFormatError(_('Could not find format element named %s.') % name)
except InvenioBibFormatError, exc:
register_exception()
errors.append(exc.message)
return errors
def check_tag(tag):
"""
Checks the validity of a tag
@param tag: tag to check
@return: list of errors for the tag
"""
errors = []
return errors
def perform_request_dreamweaver_floater():
"""
Returns a floater for Dreamweaver with all Format Elements available.
@return: HTML markup (according to Dreamweaver specs)
"""
# Get format elements lists of attributes
elements = bibformat_engine.get_format_elements(with_built_in_params=True)
keys = elements.keys()
keys.sort()
elements = map(elements.get, keys)
def filter_elem(element):
"""Keep element if is string representation contains all keywords of search_doc_pattern,
and if its name does not start with a number (to remove 'garbage' from elements in tags table)"""
if element['type'] != 'python' and \
element['attrs']['name'][0] in ['0', '1', '2', '3', '4', '5', '6', '7', '8', '9']:
return False
else:
return True
elements = filter(filter_elem, elements)
return bibformat_templates.tmpl_dreamweaver_floater(CFG_SITE_LANG, elements)
diff --git a/invenio/legacy/bibformat/bibreformat.py b/invenio/legacy/bibformat/bibreformat.py
index 917e0a4de..ba6407c3d 100644
--- a/invenio/legacy/bibformat/bibreformat.py
+++ b/invenio/legacy/bibformat/bibreformat.py
@@ -1,618 +1,618 @@
## -*- mode: python; coding: utf-8; -*-
##
## This file is part of Invenio.
## Copyright (C) 2007, 2008, 2010, 2011, 2012 CERN.
##
## Invenio is free software; you can redistribute it and/or
## modify it under the terms of the GNU General Public License as
## published by the Free Software Foundation; either version 2 of the
## License, or (at your option) any later version.
##
## Invenio is distributed in the hope that it will be useful, but
## WITHOUT ANY WARRANTY; without even the implied warranty of
## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
## General Public License for more details.
##
## You should have received a copy of the GNU General Public License
## along with Invenio; if not, write to the Free Software Foundation, Inc.,
## 59 Temple Place, Suite 330, Boston, MA 02111-1307, USA.
"""Call BibFormat engine and create HTML brief (and other) formats cache for
bibliographic records."""
__revision__ = "$Id$"
import sys
from invenio.base.factory import with_app_context
try:
from invenio.dbquery import run_sql
from invenio.config import \
CFG_SITE_URL,\
CFG_TMPDIR,\
CFG_BINDIR
from invenio.intbitset import intbitset
from invenio.search_engine import perform_request_search, search_pattern
from invenio.search_engine import print_record
from invenio.bibrank_citation_searcher import get_cited_by
from invenio.bibrank_citation_indexer import get_bibrankmethod_lastupdate
from invenio.modules.formatter import format_record
from invenio.modules.formatter.config import CFG_BIBFORMAT_USE_OLD_BIBFORMAT
from invenio.shellutils import split_cli_ids_arg
from invenio.bibtask import task_init, write_message, task_set_option, \
task_get_option, task_update_progress, task_has_option, \
task_low_level_submission, task_sleep_now_if_required, \
task_get_task_param
import os
import time
import zlib
from datetime import datetime
except ImportError, e:
print "Error: %s" % e
sys.exit(1)
def fetch_last_updated(format):
select_sql = "SELECT last_updated FROM format WHERE code = %s"
row = run_sql(select_sql, (format.lower(), ))
# Fallback in case we receive None instead of a valid date
last_date = row[0][0] or datetime(year=1900, month=1, day=1)
return last_date
def store_last_updated(format, update_date):
sql = "UPDATE format SET last_updated = %s " \
"WHERE code = %s AND (last_updated < %s or last_updated IS NULL)"
iso_date = update_date.strftime("%Y-%m-%d %H:%M:%S")
run_sql(sql, (iso_date, format.lower(), iso_date))
### run the bibreformat task bibsched scheduled
###
@with_app_context()
def bibreformat_task(fmt, sql, sql_queries, cds_query, process_format, process, recids):
"""
BibReformat main task
@param fmt: output format to use
@param sql: dictionary with pre-created sql queries for various cases (for selecting records). Some of these queries will be picked depending on the case
@param sql_queries: a list of sql queries to be executed to select records to reformat.
@param cds_query: a search query to be executed to select records to reformat
@param process_format:
@param process:
@param recids: a list of record IDs to reformat
@return: None
"""
write_message("Processing format %s" % fmt)
t1 = os.times()[4]
start_date = datetime.now()
### Query the database
###
task_update_progress('Fetching records to process')
if process_format: # '-without' parameter
write_message("Querying database for records without cache...")
without_format = without_fmt(sql)
recIDs = intbitset(recids)
if cds_query['field'] != "" or \
cds_query['collection'] != "" or \
cds_query['pattern'] != "":
write_message("Querying database (CDS query)...")
if cds_query['collection'] == "":
# use search_pattern() whenever possible, as it can search
# even in private collections
res = search_pattern(p=cds_query['pattern'],
f=cds_query['field'],
m=cds_query['matching'])
else:
# use perform_request_search when '-c' argument has been
# defined, as it is not supported by search_pattern()
res = intbitset(perform_request_search(req=None, of='id',
c=cds_query['collection'],
p=cds_query['pattern'],
f=cds_query['field']))
recIDs |= res
for sql_query in sql_queries:
write_message("Querying database (%s) ..." % sql_query, verbose=2)
recIDs |= intbitset(run_sql(sql_query))
if fmt == "HDREF" and recIDs:
# HDREF represents the references tab
# the tab needs to be recomputed not only when the record changes
# but also when one of the citations changes
latest_bibrank_run = get_bibrankmethod_lastupdate('citation')
start_date = latest_bibrank_run
sql = """SELECT id, modification_date FROM bibrec
WHERE id in (%s)""" % ','.join(str(r) for r in recIDs)
def check_date(mod_date):
return mod_date < latest_bibrank_run
recIDs = intbitset([recid for recid, mod_date in run_sql(sql) \
if check_date(mod_date)])
for r in recIDs:
recIDs |= intbitset(get_cited_by(r))
### list of corresponding record IDs was retrieved
### now format the selected records
if process_format:
write_message("Records to be processed: %d" % (len(recIDs) \
+ len(without_format)))
write_message("Out of it records without existing cache: %d" % len(without_format))
else:
write_message("Records to be processed: %d" % (len(recIDs)))
### Initialize main loop
total_rec = 0 # Total number of records
tbibformat = 0 # time taken up by external call
tbibupload = 0 # time taken up by external call
### Iterate over all records prepared in lists I (option)
if process:
if CFG_BIBFORMAT_USE_OLD_BIBFORMAT: # FIXME: remove this
# when migration from php to
# python bibformat is done
(total_rec_1, tbibformat_1, tbibupload_1) = iterate_over_old(recIDs,
fmt)
else:
(total_rec_1, tbibformat_1, tbibupload_1) = iterate_over_new(recIDs,
fmt)
total_rec += total_rec_1
tbibformat += tbibformat_1
tbibupload += tbibupload_1
### Iterate over all records prepared in list II (no_format)
if process_format and process:
if CFG_BIBFORMAT_USE_OLD_BIBFORMAT: # FIXME: remove this
# when migration from php to
# python bibformat is done
(total_rec_2, tbibformat_2, tbibupload_2) = iterate_over_old(without_format,
fmt)
else:
(total_rec_2, tbibformat_2, tbibupload_2) = iterate_over_new(without_format,
fmt)
total_rec += total_rec_2
tbibformat += tbibformat_2
tbibupload += tbibupload_2
### Store last run time
if task_has_option("last"):
write_message("storing run date to %s" % start_date)
store_last_updated(fmt, start_date)
### Final statistics
t2 = os.times()[4]
elapsed = t2 - t1
message = "total records processed: %d" % total_rec
write_message(message)
message = "total processing time: %2f sec" % elapsed
write_message(message)
message = "Time spent on external call (os.system):"
write_message(message)
message = " bibformat: %2f sec" % tbibformat
write_message(message)
message = " bibupload: %2f sec" % tbibupload
write_message(message)
def check_validity_input_formats(input_formats):
"""
Checks the validity of every input format.
@param input_formats: list of given formats
@type input_formats: list
@return: if there is any invalid input format it returns this value
@rtype: string
"""
from invenio.search_engine import get_available_output_formats
valid_formats = get_available_output_formats()
# let's to extract the values of the available formats
format_values = []
for aformat in valid_formats:
format_values.append(aformat['value'])
invalid_format = ''
for aformat in input_formats:
if aformat.lower() not in format_values:
invalid_format = aformat.lower()
break
return invalid_format
### Identify recIDs of records with missing format
###
def without_fmt(queries, chunk_size=2000):
"""
List of record IDs to be reformated, not having the specified format yet
@param sql: a dictionary with sql queries to pick from
@return: a list of record ID without pre-created format cache
"""
sql = queries['missing']
recids = intbitset()
max_id = run_sql("SELECT max(id) FROM bibrec")[0][0]
for start in xrange(1, max_id + 1, chunk_size):
end = start + chunk_size
recids += intbitset(run_sql(sql, (start, end)))
return recids
### Bibreformat all selected records (using new python bibformat)
### (see iterate_over_old further down)
def iterate_over_new(list, fmt):
"""
Iterate over list of IDs
@param list: the list of record IDs to format
@param fmt: the output format to use
@return: tuple (total number of records, time taken to format, time taken to insert)
"""
global total_rec
formatted_records = '' # (string-)List of formatted record of an iteration
tbibformat = 0 # time taken up by external call
tbibupload = 0 # time taken up by external call
start_date = task_get_task_param('task_starting_time') # Time at which the record was formatted
tot = len(list)
count = 0
for recID in list:
t1 = os.times()[4]
start_date = time.strftime('%Y-%m-%d %H:%M:%S')
write_message(format_record(recID, fmt, on_the_fly=True))
formatted_record = zlib.compress(format_record(recID, fmt, on_the_fly=True))
run_sql('REPLACE LOW_PRIORITY INTO bibfmt (id_bibrec, format, last_updated, value) VALUES (%s, %s, %s, %s)',
(recID, fmt, start_date, formatted_record))
t2 = os.times()[4]
tbibformat += (t2 - t1)
count += 1
if (count % 100) == 0:
write_message(" ... formatted %s records out of %s" % (count, tot))
task_update_progress('Formatted %s out of %s' % (count, tot))
task_sleep_now_if_required(can_stop_too=True)
if (tot % 100) != 0:
write_message(" ... formatted %s records out of %s" % (count, tot))
return (tot, tbibformat, tbibupload)
def iterate_over_old(list, fmt):
"""
Iterate over list of IDs
@param list: the list of record IDs to format
@param fmt: the output format to use
@return: tuple (total number of records, time taken to format, time taken to insert)
"""
n_rec = 0
n_max = 10000
xml_content = '' # hold the contents
tbibformat = 0 # time taken up by external call
tbibupload = 0 # time taken up by external call
total_rec = 0 # Number of formatted records
for record in list:
n_rec = n_rec + 1
total_rec = total_rec + 1
message = "Processing record: %d" % (record)
write_message(message, verbose=9)
query = "id=%d&of=xm" % (record)
count = 0
contents = print_record(record, 'xm')
while (contents == "") and (count < 10):
contents = print_record(record, 'xm')
count = count + 1
time.sleep(10)
if count == 10:
sys.stderr.write("Failed to download %s from %s after 10 attempts... terminating" % (query, CFG_SITE_URL))
sys.exit(0)
xml_content = xml_content + contents
if xml_content:
if n_rec >= n_max:
finalfilename = "%s/rec_fmt_%s.xml" % (CFG_TMPDIR, time.strftime('%Y%m%d_%H%M%S'))
filename = "%s/bibreformat.xml" % CFG_TMPDIR
filehandle = open(filename ,"w")
filehandle.write(xml_content)
filehandle.close()
### bibformat external call
###
task_sleep_now_if_required(can_stop_too=True)
t11 = os.times()[4]
message = "START bibformat external call"
write_message(message, verbose=9)
command = "%s/bibformat otype='%s' < %s/bibreformat.xml > %s 2> %s/bibreformat.err" % (CFG_BINDIR, fmt.upper(), CFG_TMPDIR, finalfilename, CFG_TMPDIR)
os.system(command)
t22 = os.times()[4]
message = "END bibformat external call (time elapsed:%2f)" % (t22-t11)
write_message(message, verbose=9)
task_sleep_now_if_required(can_stop_too=True)
tbibformat = tbibformat + (t22 - t11)
### bibupload external call
###
t11 = os.times()[4]
message = "START bibupload external call"
write_message(message, verbose=9)
task_id = task_low_level_submission('bibupload', 'bibreformat', '-f', finalfilename)
write_message("Task #%s submitted" % task_id)
t22 = os.times()[4]
message = "END bibupload external call (time elapsed:%2f)" % (t22-t11)
write_message(message, verbose=9)
tbibupload = tbibupload + (t22- t11)
n_rec = 0
xml_content = ''
### Process the last re-formated chunk
###
if n_rec > 0:
write_message("Processing last record set (%d)" % n_rec, verbose=9)
finalfilename = "%s/rec_fmt_%s.xml" % (CFG_TMPDIR, time.strftime('%Y%m%d_%H%M%S'))
filename = "%s/bibreformat.xml" % CFG_TMPDIR
filehandle = open(filename, "w")
filehandle.write(xml_content)
filehandle.close()
### bibformat external call
###
t11 = os.times()[4]
message = "START bibformat external call"
write_message(message, verbose=9)
command = "%s/bibformat otype='%s' < %s/bibreformat.xml > %s 2> %s/bibreformat.err" % (CFG_BINDIR, fmt.upper(), CFG_TMPDIR, finalfilename, CFG_TMPDIR)
os.system(command)
t22 = os.times()[4]
message = "END bibformat external call (time elapsed:%2f)" % (t22 - t11)
write_message(message, verbose=9)
tbibformat = tbibformat + (t22 - t11)
### bibupload external call
###
t11 = os.times()[4]
message = "START bibupload external call"
write_message(message, verbose=9)
task_id = task_low_level_submission('bibupload', 'bibreformat', '-f', finalfilename)
write_message("Task #%s submitted" % task_id)
t22 = os.times()[4]
message = "END bibupload external call (time elapsed:%2f)" % (t22 - t11)
write_message(message, verbose=9)
tbibupload = tbibupload + (t22 - t11)
return (total_rec, tbibformat, tbibupload)
def task_run_core():
"""Runs the task by fetching arguments from the BibSched task queue. This is what BibSched will be invoking via daemon call."""
## initialize parameters
if task_get_option('format'):
fmts = task_get_option('format')
else:
fmts = 'HB' # default value if no format option given
for fmt in fmts.split(','):
last_updated = fetch_last_updated(fmt)
write_message("last stored run date is %s" % last_updated)
sql = {
"all" : """SELECT br.id FROM bibrec AS br, bibfmt AS bf
WHERE bf.id_bibrec = br.id AND bf.format = '%s'""" % fmt,
"last": """SELECT br.id FROM bibrec AS br
INNER JOIN bibfmt AS bf ON bf.id_bibrec = br.id
WHERE br.modification_date >= '%(last_updated)s'
AND bf.format='%(format)s'
AND bf.last_updated < br.modification_date""" \
% {'format': fmt,
'last_updated': last_updated.strftime('%Y-%m-%d %H:%M:%S')},
"missing" : """SELECT br.id
FROM bibrec as br
LEFT JOIN bibfmt as bf
ON bf.id_bibrec = br.id AND bf.format ='%s'
WHERE bf.id_bibrec IS NULL
AND br.id BETWEEN %%s AND %%s
""" % fmt,
}
sql_queries = []
cds_query = {}
if task_has_option("all"):
sql_queries.append(sql['all'])
if task_has_option("last"):
sql_queries.append(sql['last'])
if task_has_option("collection"):
cds_query['collection'] = task_get_option('collection')
else:
cds_query['collection'] = ""
if task_has_option("field"):
cds_query['field'] = task_get_option('field')
else:
cds_query['field'] = ""
if task_has_option("pattern"):
cds_query['pattern'] = task_get_option('pattern')
else:
cds_query['pattern'] = ""
if task_has_option("matching"):
cds_query['matching'] = task_get_option('matching')
else:
cds_query['matching'] = ""
if task_has_option("recids"):
recids = list(split_cli_ids_arg(task_get_option('recids')))
else:
recids = []
### sql commands to be executed during the script run
###
bibreformat_task(fmt, sql, sql_queries, cds_query, task_has_option('without'), not task_has_option('noprocess'), recids)
return True
def main():
"""Main that construct all the bibtask."""
task_init(authorization_action='runbibformat',
authorization_msg="BibReformat Task Submission",
description="""
BibReformat formats the records and saves the produced outputs for
later retrieval.
BibReformat is usually run periodically via BibSched in order to (1)
format new records in the database and to (2) reformat records for
which the meta data has been modified.
BibReformat has to be run manually when (3) format config files have
been modified, in order to see the changes in the web interface.
Although it is not necessary to run BibReformat to display formatted
records in the web interface, BibReformat allows to improve serving
speed by precreating the outputs. It is suggested to run
BibReformat for 'HB' output.
Option -m cannot be used at the same time as option -c.
Option -c prevents from finding records in private collections.
Examples:
bibreformat Format all new or modified records (in HB).
bibreformat -o HD Format all new or modified records in HD.
bibreformat -o HD,HB Format all new or modified records in HD and HB.
bibreformat -a Force reformatting all records (in HB).
bibreformat -c 'Photos' Force reformatting all records in 'Photos' collection (in HB).
bibreformat -c 'Photos' -o HD Force reformatting all records in 'Photos' collection in HD.
bibreformat -i 15 Force reformatting record 15 (in HB).
bibreformat -i 15:20 Force reformatting records 15 to 20 (in HB).
bibreformat -i 15,16,17 Force reformatting records 15, 16 and 17 (in HB).
bibreformat -n Show how many records are to be (re)formatted.
bibreformat -n -c 'Articles' Show how many records are to be (re)formatted in 'Articles' collection.
bibreformat -oHB -s1h Format all new and modified records every hour, in HB.
""", help_specific_usage=""" -o, --formats \t Specify output format/s (default HB)
-n, --noprocess \t Count records to be formatted (no processing done)
Reformatting options:
-a, --all \t Force reformatting all records
-c, --collection \t Force reformatting records by collection
-f, --field \t Force reformatting records by field
-p, --pattern \t Force reformatting records by pattern
-i, --id \t Force reformatting records by record id(s)
Pattern options:
-m, --matching \t Specify if pattern is exact (e), regular expression (r),
\t partial (p), any of the words (o) or all of the words (a)
""",
version=__revision__,
specific_params=("ac:f:p:lo:nm:i:",
["all",
"collection=",
"matching=",
"field=",
"pattern=",
"format=",
"noprocess",
"id="]),
task_submit_check_options_fnc=task_submit_check_options,
task_submit_elaborate_specific_parameter_fnc=task_submit_elaborate_specific_parameter,
task_run_fnc=task_run_core)
def task_submit_check_options():
"""Last checks and updating on the options..."""
if not (task_has_option('all') or task_has_option('collection') \
or task_has_option('field') or task_has_option('pattern') \
or task_has_option('matching') or task_has_option('recids')):
task_set_option('without', 1)
task_set_option('last', 1)
return True
def task_submit_elaborate_specific_parameter(key, value, opts, args):
"""
Elaborate specific CLI parameters of BibReformat.
@param key: a parameter key to check
@param value: a value associated to parameter X{Key}
@return: True for known X{Key} else False.
"""
if key in ("-a", "--all"):
task_set_option("all", 1)
task_set_option("without", 1)
elif key in ("-c", "--collection"):
task_set_option("collection", value)
elif key in ("-n", "--noprocess"):
task_set_option("noprocess", 1)
elif key in ("-f", "--field"):
task_set_option("field", value)
elif key in ("-p", "--pattern"):
task_set_option("pattern", value)
elif key in ("-m", "--matching"):
task_set_option("matching", value)
elif key in ("-o", "--format"):
input_formats = value.split(',')
## check the validity of the given output formats
invalid_format = check_validity_input_formats(input_formats)
if invalid_format:
try:
raise Exception('Invalid output format.')
except Exception:
- from invenio.errorlib import register_exception
+ from invenio.ext.logging import register_exception
register_exception(prefix="The given output format '%s' is not available or is invalid. Please try again" % invalid_format, alert_admin=True)
return
else: # every given format is available
task_set_option("format", value)
elif key in ("-i", "--id"):
task_set_option("recids", value)
else:
return False
return True
### okay, here we go:
if __name__ == '__main__':
main()
diff --git a/invenio/legacy/bibformat/web/admin/bibformatadmin.py b/invenio/legacy/bibformat/web/admin/bibformatadmin.py
index 026eb6bf6..f97ab76d9 100644
--- a/invenio/legacy/bibformat/web/admin/bibformatadmin.py
+++ b/invenio/legacy/bibformat/web/admin/bibformatadmin.py
@@ -1,1139 +1,1139 @@
## This file is part of Invenio.
## Copyright (C) 2006, 2007, 2008, 2009, 2010, 2011, 2012 CERN.
##
## Invenio is free software; you can redistribute it and/or
## modify it under the terms of the GNU General Public License as
## published by the Free Software Foundation; either version 2 of the
## License, or (at your option) any later version.
##
## Invenio is distributed in the hope that it will be useful, but
## WITHOUT ANY WARRANTY; without even the implied warranty of
## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
## General Public License for more details.
##
## You should have received a copy of the GNU General Public License
## along with Invenio; if not, write to the Free Software Foundation, Inc.,
## 59 Temple Place, Suite 330, Boston, MA 02111-1307, USA.
"""Invenio BibFormat Administrator Interface."""
__revision__ = "$Id$"
__lastupdated__ = """$Date$"""
import MySQLdb
from invenio import bibformatadminlib
from invenio.modules.formatter import engine as bibformat_engine
import invenio.modules.formatter.api as bibformat_dblayer
from invenio.bibrankadminlib import check_user
from invenio.webpage import page, error_page
from invenio.webuser import getUid, page_not_authorized, collect_user_info
from invenio.base.i18n import wash_language, gettext_set_language
from invenio.utils.url import wash_url_argument, redirect_to_url
from invenio.search_engine import search_pattern, \
create_basic_search_units
from invenio.modules.formatter.config import InvenioBibFormatError, InvenioBibFormatWarning
-from invenio.errorlib import register_exception
+from invenio.ext.logging import register_exception
from invenio.config import CFG_SITE_LANG, CFG_SITE_NAME, CFG_SITE_SECURE_URL
def index(req, ln=CFG_SITE_LANG):
"""
Main BibFormat administration page.
Displays a warning if we find out that etc/biformat dir is not writable by us
(as most opeation of BibFormat must write in this directory).
@param req: the request object
@param ln: language
@return: a web page
"""
warnings = []
ln = wash_language(ln)
_ = gettext_set_language(ln)
if not bibformatadminlib.can_write_etc_bibformat_dir():
try:
raise InvenioBibFormatWarning(_('Cannot write in etc/bibformat dir of your Invenio installation. Check directory permission.'))
except InvenioBibFormatWarning, exc:
register_exception(stream='warning', req=req)
warnings.append(exc.message)
# Check if user is authorized to administer
# If not, still display page but offer to log in
try:
uid = getUid(req)
except:
return error_page('Error', req)
(auth_code, auth_msg) = check_user(req, 'cfgbibformat')
if not auth_code:
is_admin = True
else:
is_admin = False
navtrail = '''<a class="navtrail" href="%s/help/admin">%s</a>''' % \
(CFG_SITE_SECURE_URL, _("Admin Area"))
return page(title=_("BibFormat Admin"),
body=bibformatadminlib.perform_request_index(ln=ln,
warnings=warnings,
is_admin=is_admin),
language=ln,
uid=uid,
navtrail = navtrail,
lastupdated=__lastupdated__,
req=req)
def output_formats_manage(req, ln=CFG_SITE_LANG, sortby="code"):
"""
Main page for output formats management. Check for authentication and print output formats list.
@param req: the request object
@param ln: language
@param sortby: the sorting crieteria (can be 'code' or 'name')
@return: a web page
"""
ln = wash_language(ln)
_ = gettext_set_language(ln)
navtrail_previous_links = bibformatadminlib.getnavtrail()
try:
uid = getUid(req)
except:
return error_page('Error', req)
(auth_code, auth_msg) = check_user(req, 'cfgbibformat')
if not auth_code:
sortby = wash_url_argument(sortby, 'str')
return page(title=_("Manage Output Formats"),
body=bibformatadminlib.perform_request_output_formats_management(ln=ln, sortby=sortby),
uid=uid,
language=ln,
navtrail = navtrail_previous_links,
lastupdated=__lastupdated__,
req=req)
else:
return page_not_authorized(req=req,
text=auth_msg,
navtrail=navtrail_previous_links)
def output_format_show(req, bfo, ln=CFG_SITE_LANG,
r_fld=[], r_val=[], r_tpl=[],
default="", r_upd="", chosen_option="",
**args):
"""
Show a single output format. Check for authentication and print output format settings.
The page either shows the output format from file, or from user's
POST session, as we want to let him edit the rules without
saving. Policy is: r_fld, r_val, rules_tpl are list of attributes
of the rules. If they are empty, load from file. Else use
POST. The i th value of each list is one of the attributes of rule
i. Rule i is the i th rule in order of evaluation. All list have
the same number of item.
r_upd contains an action that has to be performed on rules. It
can composed of a number (i, the rule we want to modify) and an
operator : "save" to save the rules, "add" or "del".
syntax: operator [number]
For eg: r_upd = _("Save Changes") saves all rules (no int should be specified).
For eg: r_upd = _("Add New Rule") adds a rule (no int should be specified).
For eg: r_upd = _("Remove Rule") + " 5" deletes rule at position 5.
The number is used only for operation delete.
An action can also be in **args. We must look there for string starting
with '(+|-) [number]' to increase (+) or decrease (-) a rule given by its
index (number).
For example "+ 5" increase priority of rule 5 (put it at fourth position).
The string in **args can be followed by some garbage that looks like .x
or .y, as this is returned as the coordinate of the click on the
<input type="image">. We HAVE to use args and reason on its keys, because for <input> of
type image, iexplorer does not return the value of the tag, but only the name.
Action is executed only if we are working from user's POST session
(means we must have loaded the output format first, which is
totally normal and expected behaviour)
@param req: the request object
@param bfo: the filename of the output format to show
@param ln: language
@param r_fld: the list of 'field' attribute for each rule
@param r_val: the list of 'value' attribute for each rule
@param r_tpl: the list of 'template' attribute for each rule
@param default: the default format template used by this output format
@param r_upd: the rule that we want to increase/decrease in order of evaluation
@param chosen_option: emptry string when user has not yet confirmed to go on
@return: a web page
"""
ln = wash_language(ln)
_ = gettext_set_language(ln)
navtrail_previous_links = bibformatadminlib.getnavtrail(''' &gt; <a class="navtrail" href="%s/admin/bibformat/bibformatadmin.py/output_formats_manage?ln=%s">%s</a>''' % (CFG_SITE_SECURE_URL, ln, _("Manage Output Formats")))
code = wash_url_argument(bfo, 'str')
try:
uid = getUid(req)
except:
return error_page('Error', req)
(auth_code, auth_msg) = check_user(req, 'cfgbibformat')
if not auth_code:
bfo = wash_url_argument(bfo, 'str')
default = wash_url_argument(default, 'str')
r_upd = wash_url_argument(r_upd, 'str')
if not bibformatadminlib.can_read_output_format(bfo): #No read permission
try:
raise InvenioBibFormatError(_('Output format %s cannot not be read. %s') % (bfo, ""))
except InvenioBibFormatError, exc:
register_exception(req=req)
return page(title=_("Restricted Output Format"),
body = """You don't have permission to
view this output format.""",
language=ln,
navtrail = navtrail_previous_links,
lastupdated=__lastupdated__,
req=req)
output_format = bibformat_engine.get_output_format(code=bfo,
with_attributes=True)
name = output_format['attrs']['names']['generic']
if name == "":
name = bfo
if not bibformatadminlib.can_write_output_format(bfo) and \
chosen_option == "":#No write permission
return dialog_box(req=req,
ln=ln,
title="File Permission on %s" % name,
message="You don't have write permission " \
"on <i>%s</i>.<br/> You can view the output " \
"format, but not edit it." % name,
navtrail=navtrail_previous_links,
options=[ _("Ok")])
return page(title=_('Output Format %s Rules' % name),
body=bibformatadminlib.perform_request_output_format_show(bfo=bfo,
ln=ln,
r_fld=r_fld,
r_val=r_val,
r_tpl=r_tpl,
default=default,
r_upd=r_upd,
args=args),
uid=uid,
language=ln,
navtrail = navtrail_previous_links,
lastupdated=__lastupdated__,
req=req)
else:
return page_not_authorized(req=req,
text=auth_msg,
navtrail=navtrail_previous_links)
def output_format_show_attributes(req, bfo, ln=CFG_SITE_LANG):
"""
Page for output format names and descrition attributes edition.
@param req: the request object
@param ln: language
@param bfo: the filename of the template to show
@return: a web page
"""
ln = wash_language(ln)
_ = gettext_set_language(ln)
navtrail_previous_links = bibformatadminlib.getnavtrail(''' &gt; <a class="navtrail" href="%s/admin/bibformat/bibformatadmin.py/output_formats_manage?ln=%s">%s</a>''' % (CFG_SITE_SECURE_URL, ln , _("Manage Output Formats")))
try:
uid = getUid(req)
except:
return error_page('Error', req)
(auth_code, auth_msg) = check_user(req, 'cfgbibformat')
if not auth_code:
bfo = wash_url_argument(bfo, 'str')
if not bibformatadminlib.can_read_output_format(bfo): #No read permission
try:
raise InvenioBibFormatError(_('Output format %s cannot not be read. %s') % (bfo, ""))
except InvenioBibFormatError, exc:
register_exception(req=req)
return page(title=_("Restricted Output Format"),
body = """You don't have permission to
view this output format.""",
language=ln,
navtrail = navtrail_previous_links,
lastupdated=__lastupdated__,
req=req)
output_format = bibformat_engine.get_output_format(code=bfo,
with_attributes=True)
name = output_format['attrs']['names']['generic']
return page(title=_("Output Format %s Attributes" % name),
body=bibformatadminlib.perform_request_output_format_show_attributes(bfo, ln=ln),
uid=uid,
language=ln,
navtrail = navtrail_previous_links ,
lastupdated=__lastupdated__,
req=req)
else:
return page_not_authorized(req=req, text=auth_msg)
def output_format_show_dependencies(req, bfo, ln=CFG_SITE_LANG):
"""
Show the dependencies of the given output format.
@param req: the request object
@param ln: language
@param bfo: the filename of the output format to show
@return: a web page
"""
ln = wash_language(ln)
_ = gettext_set_language(ln)
navtrail_previous_links = bibformatadminlib.getnavtrail(''' &gt; <a class="navtrail" href="%s/admin/bibformat/bibformatadmin.py/format_templates_manage?ln=%s">%s </a>''' % (CFG_SITE_SECURE_URL, ln, _("Manage Output Formats")))
try:
uid = getUid(req)
except:
return error_page('Error', req)
(auth_code, auth_msg) = check_user(req, 'cfgbibformat')
if not auth_code:
bfo = wash_url_argument(bfo, 'str')
if not bibformatadminlib.can_read_output_format(bfo): #No read permission
try:
raise InvenioBibFormatError(_('Output format %s cannot not be read. %s') % (bfo, ""))
except InvenioBibFormatError, exc:
register_exception(req=req)
return page(title=_("Restricted Output Format"),
body = """You don't have permission
to view this output format.""",
language=ln,
navtrail = navtrail_previous_links,
lastupdated=__lastupdated__,
req=req)
format_name = bibformat_engine.get_output_format_attrs(bfo)['names']['generic']
return page(title=_("Output Format %s Dependencies" % format_name),
body=bibformatadminlib.perform_request_output_format_show_dependencies(bfo, ln=ln),
uid=uid,
language=ln,
navtrail = navtrail_previous_links,
lastupdated=__lastupdated__,
req=req)
else:
return page_not_authorized(req=req, text=auth_msg)
def output_format_update_attributes(req, bfo, ln=CFG_SITE_LANG,
name = "", description="",
code="", content_type="",
names_trans=[], visibility="0"):
"""
Update the name, description and code of given output format
@param req: the request object
@param ln: language
@param description: the new description
@param name: the new name
@param code: the new short code (== new bfo) of the output format
@param content_type: the new content_type of the output format
@param bfo: the filename of the output format to update
@param names_trans: the translations in the same order as the languages from get_languages()
@param visibility: the visibility of the output format in the output formats list (public pages)
@return: a web page (or redirection to a web page)
"""
ln = wash_language(ln)
_ = gettext_set_language(ln)
try:
uid = getUid(req)
except:
return error_page('Error', req)
(auth_code, auth_msg) = check_user(req, 'cfgbibformat')
if not auth_code:
name = wash_url_argument(name, 'str')
description = wash_url_argument(description, 'str')
bfo = wash_url_argument(bfo, 'str')
code = wash_url_argument(code, 'str')
visibility = wash_url_argument(visibility, 'int')
bfo = bibformatadminlib.update_output_format_attributes(bfo,
name,
description,
code,
content_type,
names_trans,
visibility)
redirect_to_url(req, "output_format_show?ln=%(ln)s&bfo=%(bfo)s" % {'ln':ln,
'bfo':bfo,
'names_trans':names_trans})
else:
return page_not_authorized(req=req,
text=auth_msg)
def output_format_delete(req, bfo, ln=CFG_SITE_LANG, chosen_option=""):
"""
Delete an output format
@param req: the request object
@param bfo: the filename of the output format to delete
@param ln: language
@param chosen_option: empty string when user has not yet confirmed, else "Delete" to apply
@return: a web page (or redirection to a web page)
"""
ln = wash_language(ln)
_ = gettext_set_language(ln)
navtrail_previous_links = bibformatadminlib.getnavtrail(''' &gt; <a class="navtrail" href="%s/admin/bibformat/bibformatadmin.py/output_formats_manage?ln=%s">%s</a> &gt; %s''' % (CFG_SITE_SECURE_URL, ln, _("Manage Output Formats"), _("Delete Output Format")))
try:
uid = getUid(req)
except:
return error_page('Error', req)
(auth_code, auth_msg) = check_user(req, 'cfgbibformat')
if not auth_code:
#Ask confirmation to user if not already done
chosen_option = wash_url_argument(chosen_option, 'str')
if chosen_option == "":
bfo = wash_url_argument(bfo, 'str')
format_name = bibformat_dblayer.get_output_format_names(bfo)['generic']
return dialog_box(req=req,
ln=ln,
title="Delete %s"%format_name,
message="Are you sure you want to" \
"delete output format <i>%s</i>?" % format_name,
navtrail=navtrail_previous_links,
options=[_("Cancel"), _("Delete")])
elif chosen_option==_("Delete"):
bibformatadminlib.delete_output_format(bfo)
redirect_to_url(req, "output_formats_manage?ln=%(ln)s"%{'ln':ln})
else:
return page_not_authorized(req=req, text=auth_msg)
def output_format_add(req, ln=CFG_SITE_LANG):
"""
Adds a new output format
@param req: the request object
@param ln: language
@return: a web page (or redirection to a web page)
"""
ln = wash_language(ln)
_ = gettext_set_language(ln)
try:
uid = getUid(req)
except:
return error_page('Error', req)
(auth_code, auth_msg) = check_user(req, 'cfgbibformat')
if not auth_code:
bfo = bibformatadminlib.add_output_format()
if bfo == None:
return page(title=_("Cannot create output format"),
body = """BibFormat cannot add an output format.
Check output formats directory permissions.""",
language=ln,
lastupdated=__lastupdated__,
req=req)
redirect_to_url(req, "output_format_show_attributes?ln=%(ln)s&bfo=%(bfo)s" % {'ln':ln, 'bfo':bfo})
else:
return page_not_authorized(req=req, text=auth_msg)
def format_templates_manage(req, ln=CFG_SITE_LANG, checking='0'):
"""
Main page for formats templates management. Check for authentication and print formats list.
@param req: the request object
@param ln: language
@param checking: if 0, basic checking. Else perform extensive checking (time-consuming)
@return: a web page
"""
ln = wash_language(ln)
_ = gettext_set_language(ln)
navtrail_previous_links = bibformatadminlib.getnavtrail()
try:
uid = getUid(req)
except:
return error_page('Error', req)
(auth_code, auth_msg) = check_user(req, 'cfgbibformat')
if not auth_code:
checking_level = wash_url_argument(checking, 'int')
return page(title=_("Manage Format Templates"),
body=bibformatadminlib.perform_request_format_templates_management(ln=ln, checking=checking_level),
uid=uid,
language=ln,
navtrail = navtrail_previous_links,
lastupdated=__lastupdated__,
req=req)
else:
return page_not_authorized(req=req,
text=auth_msg,
navtrail=navtrail_previous_links)
def format_template_show(req, bft, code=None, ln=CFG_SITE_LANG,
ln_for_preview=CFG_SITE_LANG,
pattern_for_preview="",
content_type_for_preview="text/html",
chosen_option=""):
"""
Main page for template edition. Check for authentication and print formats editor.
@param req: the request object
@param ln: language
@param code: the code being edited
@param bft: the name of the template to show
@param ln_for_preview: the language for the preview (for bfo)
@param pattern_for_preview: the search pattern to be used for the preview (for bfo)
@param content_type_for_preview: the (MIME) content type of the preview
@param chosen_option: returned value for dialog_box warning
@return: a web page
"""
ln = wash_language(ln)
_ = gettext_set_language(ln)
navtrail_previous_links = bibformatadminlib.getnavtrail('''
&gt; <a class="navtrail" href="%s/admin/bibformat/bibformatadmin.py/format_templates_manage?ln=%s">%s</a>''' % (CFG_SITE_SECURE_URL, ln , _("Manage Format Templates")))
try:
uid = getUid(req)
except:
return error_page('Error', req)
(auth_code, auth_msg) = check_user(req, 'cfgbibformat')
if not auth_code:
format_template = wash_url_argument(bft, 'str')
ln_preview = wash_language(ln_for_preview)
pattern_preview = wash_url_argument(pattern_for_preview, 'str')
if not bibformatadminlib.can_read_format_template(bft): #No read permission
try:
raise InvenioBibFormatError(_('Format template %s cannot not be read. %s') % (format_template, ""))
except InvenioBibFormatError, exc:
register_exception(req=req)
return page(title=_("Restricted Format Template"),
body = """You don't have permission
to view this format template.""",
language=ln,
navtrail = navtrail_previous_links,
lastupdated=__lastupdated__,
req=req)
format_name = bibformat_engine.get_format_template_attrs(bft)['name']
if not bibformatadminlib.can_write_format_template(bft) and \
chosen_option == "": #No write permission
return dialog_box(req=req,
ln=ln,
title="File Permission on %s" % format_name,
message="You don't have write permission " \
"on <i>%s</i>.<br/> You can view the template" \
", but not edit it." % format_name,
navtrail=navtrail_previous_links,
options=[ _("Ok")])
if bft.endswith('.xsl'):
format_name += ' (XSL)'
return page(title=_("Format Template %s"%format_name),
body=bibformatadminlib.perform_request_format_template_show(format_template,
code=code,
ln=ln,
ln_for_preview=ln_preview,
pattern_for_preview=pattern_preview,
content_type_for_preview=content_type_for_preview),
uid=uid,
language=ln,
navtrail = navtrail_previous_links,
lastupdated=__lastupdated__,
req=req)
else:
return page_not_authorized(req=req,
text=auth_msg,
navtrail=navtrail_previous_links)
def format_template_show_attributes(req, bft, ln=CFG_SITE_LANG, new=0):
"""
Page for template name and descrition attributes edition.
This is also the first page shown when a format template
has just been added. In that case new is different from
False and we can offer specific option to user (for ex
let him make a duplicate of existing template).
@param req: the request object
@param ln: language
@param bft: the name of the template to show
@param new: if "False", the template has not just been added
@return: a web page
"""
ln = wash_language(ln)
_ = gettext_set_language(ln)
navtrail_previous_links = bibformatadminlib.getnavtrail(''' &gt; <a class="navtrail" href="%s/admin/bibformat/bibformatadmin.py/format_templates_manage?ln=%s">%s</a>''' % (CFG_SITE_SECURE_URL, ln, _("Manage Format Templates")))
try:
uid = getUid(req)
except:
return error_page('Error', req)
(auth_code, auth_msg) = check_user(req, 'cfgbibformat')
if not auth_code:
format_template = wash_url_argument(bft, 'str')
format_name = bibformat_engine.get_format_template_attrs(bft)['name']
is_new = wash_url_argument(new, 'int')
if not bibformatadminlib.can_read_format_template(bft): #No read permission
try:
raise InvenioBibFormatError(_('Format template %s cannot not be read. %s') % (format_template, ""))
except InvenioBibFormatError, exc:
register_exception(req=req)
return page(title=_("Restricted Format Template"),
body = """You don't have permission
to view this format template.""",
language=ln,
navtrail = navtrail_previous_links,
lastupdated=__lastupdated__,
req=req)
return page(title=_("Format Template %s Attributes"%format_name),
body=bibformatadminlib.perform_request_format_template_show_attributes(bft, ln=ln, new=is_new),
uid=uid,
language=ln,
navtrail = navtrail_previous_links ,
lastupdated=__lastupdated__,
req=req)
else:
return page_not_authorized(req=req, text=auth_msg)
def format_template_show_dependencies(req, bft, ln=CFG_SITE_LANG):
"""
Show the dependencies (on elements) of the given format.
@param req: the request object
@param ln: language
@param bft: the filename of the template to show
@return: a web page
"""
ln = wash_language(ln)
_ = gettext_set_language(ln)
navtrail_previous_links = bibformatadminlib.getnavtrail(''' &gt; <a class="navtrail" href="%s/admin/bibformat/bibformatadmin.py/format_templates_manage?ln=%s">%s</a>''' % (CFG_SITE_SECURE_URL, ln, _("Manage Format Templates")))
try:
uid = getUid(req)
except:
return error_page('Error', req)
(auth_code, auth_msg) = check_user(req, 'cfgbibformat')
if not auth_code:
format_template = wash_url_argument(bft, 'str')
format_name = bibformat_engine.get_format_template_attrs(bft)['name']
return page(title=_("Format Template %s Dependencies" % format_name),
body=bibformatadminlib.perform_request_format_template_show_dependencies(bft, ln=ln),
uid=uid,
language=ln,
navtrail = navtrail_previous_links,
lastupdated=__lastupdated__,
req=req)
else:
return page_not_authorized(req=req, text=auth_msg)
def format_template_update_attributes(req, bft, ln=CFG_SITE_LANG,
name = "", description="",
duplicate=None):
"""
Update the name and description of given format template
@param req: the request object
@param ln: language
@param description: the new description
@param name: the new name
@param bft: the filename of the template to update
@param duplicate: the filename of template that we want to copy (the code)
@return: a web page (or redirection to a web page)
"""
ln = wash_language(ln)
_ = gettext_set_language(ln)
try:
uid = getUid(req)
except:
return error_page('Error', req)
(auth_code, auth_msg) = check_user(req, 'cfgbibformat')
if not auth_code:
if duplicate is not None:
duplicate = wash_url_argument(duplicate, 'str')
name = wash_url_argument(name, 'str')
description = wash_url_argument(description, 'str')
bft = bibformatadminlib.update_format_template_attributes(bft,
name,
description,
duplicate)
redirect_to_url(req, "format_template_show?ln=%(ln)s&bft=%(bft)s" % {'ln':ln, 'bft':bft})
else:
return page_not_authorized(req=req, text=auth_msg)
def format_template_delete(req, bft, ln=CFG_SITE_LANG, chosen_option=""):
"""
Delete a format template
@param req: the request object
@param bft: the filename of the template to delete
@param ln: language
@param chosen_option: empty string when user has not yet confirm. Else "Delete" to confirm
@return: a web page (or redirection to a web page)
"""
ln = wash_language(ln)
_ = gettext_set_language(ln)
navtrail_previous_links = bibformatadminlib.getnavtrail('''
&gt; <a class="navtrail" href="%s/admin/bibformat/bibformatadmin.py/format_templates_manage?ln=%s">%s</a> &gt; %s''' % (CFG_SITE_SECURE_URL, ln ,_("Manage Format Templates"),_("Delete Format Template")))
try:
uid = getUid(req)
except:
return error_page('Error', req)
(auth_code, auth_msg) = check_user(req, 'cfgbibformat')
if not auth_code:
#Ask confirmation to user if not already done
chosen_option = wash_url_argument(chosen_option, 'str')
if chosen_option == "":
format_template = wash_url_argument(bft, 'str')
format_name = bibformat_engine.get_format_template_attrs(bft)['name']
return dialog_box(req=req,
ln=ln,
title="Delete %s" % format_name,
message="Are you sure you want to delete" \
"format template <i>%s</i>?" % format_name,
navtrail=navtrail_previous_links,
options=[_("Cancel"), _("Delete")])
elif chosen_option==_("Delete"):
bibformatadminlib.delete_format_template(bft)
redirect_to_url(req, "format_templates_manage?ln=%(ln)s" % {'ln':ln})
else:
return page_not_authorized(req=req, text=auth_msg)
def format_template_add(req, ln=CFG_SITE_LANG):
"""
Adds a new format template
@param req: the request object
@param ln: language
@return: a web page (or redirection to a web page)
"""
ln = wash_language(ln)
_ = gettext_set_language(ln)
try:
uid = getUid(req)
except:
return error_page('Error', req)
(auth_code, auth_msg) = check_user(req, 'cfgbibformat')
if not auth_code:
bft = bibformatadminlib.add_format_template()
redirect_to_url(req, "format_template_show_attributes?ln=%(ln)s&bft=%(bft)s&new=1" % {'ln':ln, 'bft':bft})
else:
return page_not_authorized(req=req, text=auth_msg)
def format_template_show_preview_or_save(req, bft, ln=CFG_SITE_LANG, code=None,
ln_for_preview=CFG_SITE_LANG,
pattern_for_preview="",
content_type_for_preview='text/html',
save_action=None,
navtrail=""):
"""
Print the preview of a record with a format template. To be included inside Format template
editor. If the save_action has a value, then the code should also be saved at the same time
@param req: the request object
@param code: the code of a template to use for formatting
@param ln: language
@param ln_for_preview: the language for the preview (for bfo)
@param pattern_for_preview: the search pattern to be used for the preview (for bfo)
@param content_type_for_preview: the content-type to use to serve the preview page
@param save_action: has a value if the code has to be saved
@param bft: the filename of the template to save
@param navtrail: navigation trail
@return: a web page
"""
ln = wash_language(ln)
_ = gettext_set_language(ln)
(auth_code, auth_msg) = check_user(req, 'cfgbibformat')
if not auth_code:
user_info = collect_user_info(req)
uid = user_info['uid']
bft = wash_url_argument(bft, 'str')
if save_action is not None and code is not None:
#save
bibformatadminlib.update_format_template_code(bft, code=code)
bibformat_engine.clear_caches()
if code is None:
code = bibformat_engine.get_format_template(bft)['code']
ln_for_preview = wash_language(ln_for_preview)
pattern_for_preview = wash_url_argument(pattern_for_preview, 'str')
if pattern_for_preview == "":
try:
recID = search_pattern(p='-collection:DELETED').pop()
except KeyError:
return page(title="No Document Found",
body="",
uid=uid,
language=ln_for_preview,
navtrail = "",
lastupdated=__lastupdated__,
req=req,
navmenuid='search')
pattern_for_preview = "recid:%s" % recID
else:
try:
recID = search_pattern(p=pattern_for_preview + \
' -collection:DELETED').pop()
except KeyError:
return page(title="No Record Found for %s" % pattern_for_preview,
body="",
uid=uid,
language=ln_for_preview,
navtrail = "",
lastupdated=__lastupdated__,
req=req)
units = create_basic_search_units(None, pattern_for_preview, None)
keywords = [unit[1] for unit in units if unit[0] != '-']
bfo = bibformat_engine.BibFormatObject(recID = recID,
ln = ln_for_preview,
search_pattern = keywords,
xml_record = None,
user_info = user_info)
body = bibformat_engine.format_with_format_template(bft,
bfo,
verbose=7,
format_template_code=code)
if content_type_for_preview == 'text/html':
#Standard page display with CDS headers, etc.
return page(title="",
body=body,
uid=uid,
language=ln_for_preview,
navtrail = navtrail,
lastupdated=__lastupdated__,
req=req,
navmenuid='search')
else:
#Output with chosen content-type.
req.content_type = content_type_for_preview
req.send_http_header()
req.write(body)
else:
return page_not_authorized(req=req, text=auth_msg)
def format_template_show_short_doc(req, ln=CFG_SITE_LANG, search_doc_pattern=""):
"""
Prints the format elements documentation in a brief way. To be included inside Format template
editor.
@param req: the request object
@param ln: language
@param search_doc_pattern: a search pattern that specified which elements to display
@return: a web page
"""
ln = wash_language(ln)
_ = gettext_set_language(ln)
try:
uid = getUid(req)
except:
return error_page('Error', req)
(auth_code, auth_msg) = check_user(req, 'cfgbibformat')
if not auth_code:
search_doc_pattern = wash_url_argument(search_doc_pattern, 'str')
return bibformatadminlib.perform_request_format_template_show_short_doc(ln=ln, search_doc_pattern=search_doc_pattern)
else:
return page_not_authorized(req=req, text=auth_msg)
def format_elements_doc(req, ln=CFG_SITE_LANG):
"""
Main page for format elements documentation. Check for authentication and print format elements list.
@param req: the request object
@param ln: language
@return: a web page
"""
ln = wash_language(ln)
_ = gettext_set_language(ln)
navtrail_previous_links = bibformatadminlib.getnavtrail()
try:
uid = getUid(req)
except:
return error_page('Error', req)
(auth_code, auth_msg) = check_user(req, 'cfgbibformat')
if not auth_code:
return page(title=_("Format Elements Documentation"),
body=bibformatadminlib.perform_request_format_elements_documentation(ln=ln),
uid=uid,
language=ln,
navtrail = navtrail_previous_links,
lastupdated=__lastupdated__,
req=req)
else:
return page_not_authorized(req=req,
text=auth_msg,
navtrail=navtrail_previous_links)
def format_element_show_dependencies(req, bfe, ln=CFG_SITE_LANG):
"""
Shows format element dependencies
@param req: the request object
@param req: the request object
@param bfe: the name of the bfe to show
@param ln: language
@return: a web page
"""
ln = wash_language(ln)
_ = gettext_set_language(ln)
navtrail_previous_links = bibformatadminlib.getnavtrail(''' &gt; <a class="navtrail" href="%s/admin/bibformat/bibformatadmin.py/format_elements_doc?ln=%s">%s</a>''' % (CFG_SITE_SECURE_URL, ln , _("Format Elements Documentation")))
try:
uid = getUid(req)
except:
return error_page('Error', req)
(auth_code, auth_msg) = check_user(req, 'cfgbibformat')
if not auth_code:
bfe = wash_url_argument(bfe, 'str')
return page(title=_("Format Element %s Dependencies" % bfe),
body=bibformatadminlib.perform_request_format_element_show_dependencies(bfe=bfe, ln=ln),
uid=uid,
language=ln,
navtrail = navtrail_previous_links,
lastupdated=__lastupdated__,
req=req)
else:
return page_not_authorized(req=req, text=auth_msg, navtrail=navtrail_previous_links)
def format_element_test(req, bfe, ln=CFG_SITE_LANG, param_values=None):
"""
Allows user to test element with different parameters and check output
'param_values' is the list of values to pass to 'format'
function of the element as parameters, in the order ...
If params is None, this means that they have not be defined by user yet.
@param req: the request object
@param bfe: the name of the element to test
@param ln: language
@param param_values: the list of parameters to pass to element format function
@return: a web page
"""
ln = wash_language(ln)
_ = gettext_set_language(ln)
navtrail_previous_links = bibformatadminlib.getnavtrail(''' &gt; <a class="navtrail" href="%s/admin/bibformat/bibformatadmin.py/format_elements_doc?ln=%s">%s</a>''' %( CFG_SITE_SECURE_URL, ln , _("Format Elements Documentation")))
(auth_code, auth_msg) = check_user(req, 'cfgbibformat')
if not auth_code:
bfe = wash_url_argument(bfe, 'str')
user_info = collect_user_info(req)
uid = user_info['uid']
return page(title=_("Test Format Element %s" % bfe),
body=bibformatadminlib.perform_request_format_element_test(bfe=bfe,
ln=ln,
param_values=param_values,
user_info=user_info),
uid=uid,
language=ln,
navtrail = navtrail_previous_links,
lastupdated=__lastupdated__,
req=req)
else:
return page_not_authorized(req=req,
text=auth_msg,
navtrail=navtrail_previous_links)
def validate_format(req, ln=CFG_SITE_LANG, bfo=None, bft=None, bfe=None):
"""
Returns a page showing the status of an output format or format
template or format element. This page is called from output
formats management page or format template management page or
format elements documentation.
The page only shows the status of one of the format, depending on
the specified one. If multiple are specified, shows the first one.
@param req: the request object
@param ln: language
@param bfo: an output format 6 chars code
@param bft: a format element filename
@param bfe: a format element name
@return: a web page
"""
ln = wash_language(ln)
_ = gettext_set_language(ln)
try:
uid = getUid(req)
except:
return error_page('Error', req)
(auth_code, auth_msg) = check_user(req, 'cfgbibformat')
if not auth_code:
if bfo is not None: #Output format validation
bfo = wash_url_argument(bfo, 'str')
navtrail_previous_links = bibformatadminlib.getnavtrail(''' &gt; <a class="navtrail" href="%s/admin/bibformat/bibformatadmin.py/output_formats_manage?ln=%s">%s</a>'''%(CFG_SITE_SECURE_URL, ln, _("Manage Output Formats")))
if not bibformatadminlib.can_read_output_format(bfo): #No read permission
try:
raise InvenioBibFormatError(_('Output format %s cannot not be read. %s') % (bfo, ""))
except InvenioBibFormatError, exc:
register_exception(req=req)
return page(title=_("Restricted Output Format"),
body = """You don't have permission
to view this output format.""",
language=ln,
navtrail = navtrail_previous_links,
lastupdated=__lastupdated__,
req=req)
output_format = bibformat_engine.get_output_format(code=bfo,
with_attributes=True)
name = output_format['attrs']['names']['generic']
title = _("Validation of Output Format %s" % name)
elif bft is not None: #Format template validation
bft = wash_url_argument(bft, 'str')
navtrail_previous_links = bibformatadminlib.getnavtrail(''' &gt; <a class="navtrail" href="%s/admin/bibformat/bibformatadmin.py/format_templates_manage?ln=%s">%s</a>''' % (CFG_SITE_SECURE_URL, ln, _("Manage Format Templates")))
if not bibformatadminlib.can_read_format_template(bft): #No read permission
try:
raise InvenioBibFormatError(_('Format template %s cannot not be read. %s') % (bft, ""))
except InvenioBibFormatError, exc:
register_exception(req=req)
return page(title=_("Restricted Format Template"),
body = """You don't have permission to
view this format template.""",
language=ln,
navtrail = navtrail_previous_links,
lastupdated=__lastupdated__,
req=req)
name = bibformat_engine.get_format_template_attrs(bft)['name']
title = _("Validation of Format Template %s" % name)
elif bfe is not None: #Format element validation
bfe = wash_url_argument(bfe, 'str')
navtrail_previous_links = bibformatadminlib.getnavtrail(''' &gt; <a class="navtrail" href="%s/admin/bibformat/bibformatadmin.py/format_elements_doc?ln=%s#%s">%s</a>''' % (CFG_SITE_SECURE_URL, ln , bfe.upper() , _("Format Elements Documentation")))
if not bibformatadminlib.can_read_format_element(bfe) and \
not bibformat_dblayer.tag_exists_for_name(bfe): #No read permission
try:
raise InvenioBibFormatError(_('Format element %s cannot not be read. %s') % (bfe, ""))
except InvenioBibFormatError, exc:
register_exception(req=req)
return page(title=_("Restricted Format Element"),
body = """You don't have permission
to view this format element.""",
language=ln,
navtrail = navtrail_previous_links,
lastupdated=__lastupdated__,
req=req)
title = _("Validation of Format Element %s" % bfe)
else: #No format specified
try:
raise InvenioBibFormatError(_('No format specified for validation. Please specify one.'))
except InvenioBibFormatError, exc:
register_exception(req=req)
return page(title=_("Format Validation"),
body="No format has been specified.",
uid=uid,
language=ln,
navtrail = navtrail_previous_links,
lastupdated=__lastupdated__,
req=req)
return page(title=title,
body=bibformatadminlib.perform_request_format_validate(ln=ln,
bfo=bfo,
bft=bft,
bfe=bfe),
uid=uid,
language=ln,
navtrail = navtrail_previous_links,
lastupdated=__lastupdated__,
req=req)
else:
navtrail_previous_links = bibformatadminlib.getnavtrail(''' &gt; <a class="navtrail" href="%s/admin/bibformat/bibformatadmin.py/?ln=%s'''%(CFG_SITE_SECURE_URL, ln))
return page_not_authorized(req=req,
text=auth_msg,
navtrail=navtrail_previous_links)
def download_dreamweaver_floater(req):
"""
Trigger download of a BibFormat palette for Dreamweaver.
@param req: the request object
@return: the palette code to be used within Dreamweaver
"""
#bibformat_templates = invenio.template.load('bibformat')
req.content_type = 'text/html'
req.headers_out["Content-Disposition"] = "attachment; filename=BibFormat_floater.html"
req.send_http_header()
req.write(bibformatadminlib.perform_request_dreamweaver_floater())
def dialog_box(req, url="", ln=CFG_SITE_LANG, navtrail="",
title="", message="", options=[]):
"""
Returns a dialog box with a given title, message and options.
Used for asking confirmation on actions.
The page that will receive the result must take 'chosen_option' as parameter.
@param req: the request object
@param url: the url used to submit the options chosen by the user
@param ln: language
@param navtrail: navigation trail
@param title: title of the page/dialog
@param message: message to display in the dialog box
@param options: the list of labels for the buttons given as choice to user
@return: a dialog page
"""
import invenio
bibformat_templates = invenio.template.load('bibformat')
return page(title="",
body = bibformat_templates.tmpl_admin_dialog_box(url,
ln,
title,
message,
options),
language=ln,
lastupdated=__lastupdated__,
navtrail=navtrail,
req=req)
diff --git a/invenio/legacy/bibindex/engine.py b/invenio/legacy/bibindex/engine.py
index 7891ff41d..9990a71cf 100644
--- a/invenio/legacy/bibindex/engine.py
+++ b/invenio/legacy/bibindex/engine.py
@@ -1,1984 +1,1984 @@
# -*- coding: utf-8 -*-
##
## This file is part of Invenio.
## Copyright (C) 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011, 2012, 2013 CERN.
##
## Invenio is free software; you can redistribute it and/or
## modify it under the terms of the GNU General Public License as
## published by the Free Software Foundation; either version 2 of the
## License, or (at your option) any later version.
##
## Invenio is distributed in the hope that it will be useful, but
## WITHOUT ANY WARRANTY; without even the implied warranty of
## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
## General Public License for more details.
##
## You should have received a copy of the GNU General Public License
## along with Invenio; if not, write to the Free Software Foundation, Inc.,
## 59 Temple Place, Suite 330, Boston, MA 02111-1307, USA.
"""
BibIndex indexing engine implementation. See bibindex executable for entry point.
"""
__revision__ = "$Id$"
import re
import sys
import time
import fnmatch
from datetime import datetime
from time import strptime
from invenio.config import CFG_SOLR_URL
from invenio.bibindex_engine_config import CFG_MAX_MYSQL_THREADS, \
CFG_MYSQL_THREAD_TIMEOUT, \
CFG_CHECK_MYSQL_THREADS, \
CFG_BIBINDEX_COLUMN_VALUE_SEPARATOR, \
CFG_BIBINDEX_INDEX_TABLE_TYPE, \
CFG_BIBINDEX_ADDING_RECORDS_STARTED_STR, \
CFG_BIBINDEX_UPDATE_MESSAGE
from invenio.bibauthority_config import \
CFG_BIBAUTHORITY_CONTROLLED_FIELDS_BIBLIOGRAPHIC, \
CFG_BIBAUTHORITY_RECORD_CONTROL_NUMBER_FIELD
from invenio.bibauthority_engine import get_index_strings_by_control_no,\
get_control_nos_from_recID
from invenio.bibindexadminlib import get_idx_remove_html_markup, \
get_idx_remove_latex_markup, \
get_idx_remove_stopwords
from invenio.bibdocfile import BibRecDocs
from invenio.search_engine import perform_request_search, \
get_index_stemming_language, \
get_synonym_terms, \
search_pattern, \
search_unit_in_bibrec
from invenio.dbquery import run_sql, DatabaseError, serialize_via_marshal, \
deserialize_via_marshal, wash_table_column_name
from invenio.bibindex_engine_washer import wash_index_term
from invenio.bibtask import task_init, write_message, get_datetime, \
task_set_option, task_get_option, task_get_task_param, \
task_update_progress, task_sleep_now_if_required
from invenio.intbitset import intbitset
-from invenio.errorlib import register_exception
+from invenio.ext.logging import register_exception
from invenio.bibrankadminlib import get_def_name
from invenio.solrutils_bibindex_indexer import solr_commit
from invenio.bibindex_tokenizers.BibIndexJournalTokenizer import \
CFG_JOURNAL_TAG, \
CFG_JOURNAL_PUBINFO_STANDARD_FORM, \
CFG_JOURNAL_PUBINFO_STANDARD_FORM_REGEXP_CHECK
from invenio.bibindex_engine_utils import load_tokenizers, \
get_all_index_names_and_column_values, \
get_idx_indexer, \
get_index_tags, \
get_field_tags, \
get_tag_indexes, \
get_all_indexes, \
get_all_virtual_indexes, \
get_index_virtual_indexes, \
is_index_virtual, \
get_virtual_index_building_blocks, \
get_index_id_from_index_name, \
get_index_name_from_index_id, \
run_sql_drop_silently, \
get_min_last_updated, \
remove_inexistent_indexes
from invenio.search_engine_utils import get_fieldvalues
from invenio.bibfield import get_record
from invenio.memoiseutils import Memoise
if sys.hexversion < 0x2040000:
# pylint: disable=W0622
from sets import Set as set
# pylint: enable=W0622
## precompile some often-used regexp for speed reasons:
re_subfields = re.compile('\$\$\w')
re_datetime_shift = re.compile("([-\+]{0,1})([\d]+)([dhms])")
nb_char_in_line = 50 # for verbose pretty printing
chunksize = 1000 # default size of chunks that the records will be treated by
base_process_size = 4500 # process base size
_last_word_table = None
_TOKENIZERS = load_tokenizers()
def list_union(list1, list2):
"Returns union of the two lists."
union_dict = {}
for e in list1:
union_dict[e] = 1
for e in list2:
union_dict[e] = 1
return union_dict.keys()
def list_unique(_list):
"""Returns a _list with duplicates removed."""
_dict = {}
for e in _list:
_dict[e] = 1
return _dict.keys()
## safety function for killing slow DB threads:
def kill_sleepy_mysql_threads(max_threads=CFG_MAX_MYSQL_THREADS, thread_timeout=CFG_MYSQL_THREAD_TIMEOUT):
"""Check the number of DB threads and if there are more than
MAX_THREADS of them, lill all threads that are in a sleeping
state for more than THREAD_TIMEOUT seconds. (This is useful
for working around the the max_connection problem that appears
during indexation in some not-yet-understood cases.) If some
threads are to be killed, write info into the log file.
"""
res = run_sql("SHOW FULL PROCESSLIST")
if len(res) > max_threads:
for row in res:
r_id, dummy, dummy, dummy, r_command, r_time, dummy, dummy = row
if r_command == "Sleep" and int(r_time) > thread_timeout:
run_sql("KILL %s", (r_id,))
write_message("WARNING: too many DB threads, killing thread %s" % r_id, verbose=1)
return
def get_associated_subfield_value(recID, tag, value, associated_subfield_code):
"""Return list of ASSOCIATED_SUBFIELD_CODE, if exists, for record
RECID and TAG of value VALUE. Used by fulltext indexer only.
Note: TAG must be 6 characters long (tag+ind1+ind2+sfcode),
otherwise en empty string is returned.
FIXME: what if many tag values have the same value but different
associated_subfield_code? Better use bibrecord library for this.
"""
out = ""
if len(tag) != 6:
return out
bibXXx = "bib" + tag[0] + tag[1] + "x"
bibrec_bibXXx = "bibrec_" + bibXXx
query = """SELECT bb.field_number, b.tag, b.value FROM %s AS b, %s AS bb
WHERE bb.id_bibrec=%%s AND bb.id_bibxxx=b.id AND tag LIKE
%%s%%""" % (bibXXx, bibrec_bibXXx)
res = run_sql(query, (recID, tag[:-1]))
field_number = -1
for row in res:
if row[1] == tag and row[2] == value:
field_number = row[0]
if field_number > 0:
for row in res:
if row[0] == field_number and row[1] == tag[:-1] + associated_subfield_code:
out = row[2]
break
return out
def get_author_canonical_ids_for_recid(recID):
"""
Return list of author canonical IDs (e.g. `J.Ellis.1') for the
given record. Done by consulting BibAuthorID module.
"""
from invenio.bibauthorid_dbinterface import get_persons_from_recids
lwords = []
res = get_persons_from_recids([recID])
if res is None:
## BibAuthorID is not enabled
return lwords
else:
dpersons, dpersoninfos = res
for aid in dpersoninfos.keys():
author_canonical_id = dpersoninfos[aid].get('canonical_id', '')
if author_canonical_id:
lwords.append(author_canonical_id)
return lwords
def swap_temporary_reindex_tables(index_id, reindex_prefix="tmp_"):
"""Atomically swap reindexed temporary table with the original one.
Delete the now-old one."""
is_virtual = is_index_virtual(index_id)
if is_virtual:
write_message("Removing %s index tables for id %s" % (reindex_prefix, index_id))
query = """DROP TABLE IF EXISTS %%sidxWORD%02dR, %%sidxWORD%02dF,
%%sidxPAIR%02dR, %%sidxPAIR%02dF,
%%sidxPHRASE%02dR, %%sidxPHRASE%02dF
""" % ((index_id,)*6)
query = query % ((reindex_prefix,)*6)
run_sql(query)
else:
write_message("Putting new tmp index tables for id %s into production" % index_id)
run_sql(
"RENAME TABLE " +
"idxWORD%02dR TO old_idxWORD%02dR," % (index_id, index_id) +
"%sidxWORD%02dR TO idxWORD%02dR," % (reindex_prefix, index_id, index_id) +
"idxWORD%02dF TO old_idxWORD%02dF," % (index_id, index_id) +
"%sidxWORD%02dF TO idxWORD%02dF," % (reindex_prefix, index_id, index_id) +
"idxPAIR%02dR TO old_idxPAIR%02dR," % (index_id, index_id) +
"%sidxPAIR%02dR TO idxPAIR%02dR," % (reindex_prefix, index_id, index_id) +
"idxPAIR%02dF TO old_idxPAIR%02dF," % (index_id, index_id) +
"%sidxPAIR%02dF TO idxPAIR%02dF," % (reindex_prefix, index_id, index_id) +
"idxPHRASE%02dR TO old_idxPHRASE%02dR," % (index_id, index_id) +
"%sidxPHRASE%02dR TO idxPHRASE%02dR," % (reindex_prefix, index_id, index_id) +
"idxPHRASE%02dF TO old_idxPHRASE%02dF," % (index_id, index_id) +
"%sidxPHRASE%02dF TO idxPHRASE%02dF;" % (reindex_prefix, index_id, index_id)
)
write_message("Dropping old index tables for id %s" % index_id)
run_sql_drop_silently("DROP TABLE old_idxWORD%02dR, old_idxWORD%02dF, old_idxPAIR%02dR, old_idxPAIR%02dF, old_idxPHRASE%02dR, old_idxPHRASE%02dF" % (index_id, index_id, index_id, index_id, index_id, index_id)) # kwalitee: disable=sql
def init_temporary_reindex_tables(index_id, reindex_prefix="tmp_"):
"""Create reindexing temporary tables."""
write_message("Creating new tmp index tables for id %s" % index_id)
run_sql_drop_silently("""DROP TABLE IF EXISTS %sidxWORD%02dF""" % (wash_table_column_name(reindex_prefix), index_id)) # kwalitee: disable=sql
run_sql("""CREATE TABLE %sidxWORD%02dF (
id mediumint(9) unsigned NOT NULL auto_increment,
term varchar(50) default NULL,
hitlist longblob,
PRIMARY KEY (id),
UNIQUE KEY term (term)
) ENGINE=MyISAM""" % (reindex_prefix, index_id))
run_sql_drop_silently("""DROP TABLE IF EXISTS %sidxWORD%02dR""" % (wash_table_column_name(reindex_prefix), index_id)) # kwalitee: disable=sql
run_sql("""CREATE TABLE %sidxWORD%02dR (
id_bibrec mediumint(9) unsigned NOT NULL,
termlist longblob,
type enum('CURRENT','FUTURE','TEMPORARY') NOT NULL default 'CURRENT',
PRIMARY KEY (id_bibrec,type)
) ENGINE=MyISAM""" % (reindex_prefix, index_id))
run_sql_drop_silently("""DROP TABLE IF EXISTS %sidxPAIR%02dF""" % (wash_table_column_name(reindex_prefix), index_id)) # kwalitee: disable=sql
run_sql("""CREATE TABLE %sidxPAIR%02dF (
id mediumint(9) unsigned NOT NULL auto_increment,
term varchar(100) default NULL,
hitlist longblob,
PRIMARY KEY (id),
UNIQUE KEY term (term)
) ENGINE=MyISAM""" % (reindex_prefix, index_id))
run_sql_drop_silently("""DROP TABLE IF EXISTS %sidxPAIR%02dR""" % (wash_table_column_name(reindex_prefix), index_id)) # kwalitee: disable=sql
run_sql("""CREATE TABLE %sidxPAIR%02dR (
id_bibrec mediumint(9) unsigned NOT NULL,
termlist longblob,
type enum('CURRENT','FUTURE','TEMPORARY') NOT NULL default 'CURRENT',
PRIMARY KEY (id_bibrec,type)
) ENGINE=MyISAM""" % (reindex_prefix, index_id))
run_sql_drop_silently("""DROP TABLE IF EXISTS %sidxPHRASE%02dF""" % (wash_table_column_name(reindex_prefix), index_id)) # kwalitee: disable=sql
run_sql("""CREATE TABLE %sidxPHRASE%02dF (
id mediumint(9) unsigned NOT NULL auto_increment,
term text default NULL,
hitlist longblob,
PRIMARY KEY (id),
KEY term (term(50))
) ENGINE=MyISAM""" % (reindex_prefix, index_id))
run_sql_drop_silently("""DROP TABLE IF EXISTS %sidxPHRASE%02dR""" % (wash_table_column_name(reindex_prefix), index_id)) # kwalitee: disable=sql
run_sql("""CREATE TABLE %sidxPHRASE%02dR (
id_bibrec mediumint(9) unsigned NOT NULL default '0',
termlist longblob,
type enum('CURRENT','FUTURE','TEMPORARY') NOT NULL default 'CURRENT',
PRIMARY KEY (id_bibrec,type)
) ENGINE=MyISAM""" % (reindex_prefix, index_id))
def remove_subfields(s):
"Removes subfields from string, e.g. 'foo $$c bar' becomes 'foo bar'."
return re_subfields.sub(' ', s)
def get_field_indexes(field):
"""Returns indexes names and ids corresponding to the given field"""
if field[0:3].isdigit():
#field is actually a tag
return get_tag_indexes(field, virtual=False)
else:
#future implemeptation for fields
return []
get_field_indexes_memoised = Memoise(get_field_indexes)
def get_all_synonym_knowledge_bases():
"""Returns a dictionary of name key and knowledge base name and match type tuple value
information of all defined words indexes that have knowledge base information.
Returns empty dictionary in case there are no tags indexed.
Example: output['global'] = ('INDEX-SYNONYM-TITLE', 'exact'), output['title'] = ('INDEX-SYNONYM-TITLE', 'exact')."""
res = get_all_index_names_and_column_values("synonym_kbrs")
out = {}
for row in res:
kb_data = row[1]
# ignore empty strings
if len(kb_data):
out[row[0]] = tuple(kb_data.split(CFG_BIBINDEX_COLUMN_VALUE_SEPARATOR))
return out
def get_index_remove_stopwords(index_id):
"""Returns value of a remove_stopword field from idxINDEX database table
if it's not 'No'. If it's 'No' returns False.
Just for consistency with WordTable.
@param index_id: id of the index
"""
result = get_idx_remove_stopwords(index_id)
if isinstance(result, tuple):
return False
if result == 'No' or result == '':
return False
return result
def get_index_remove_html_markup(index_id):
""" Gets remove_html_markup parameter from database ('Yes' or 'No') and
changes it to True, False.
Just for consistency with WordTable."""
result = get_idx_remove_html_markup(index_id)
if result == 'Yes':
return True
return False
def get_index_remove_latex_markup(index_id):
""" Gets remove_latex_markup parameter from database ('Yes' or 'No') and
changes it to True, False.
Just for consistency with WordTable."""
result = get_idx_remove_latex_markup(index_id)
if result == 'Yes':
return True
return False
def get_index_tokenizer(index_id):
"""Returns value of a tokenizer field from idxINDEX database table
@param index_id: id of the index
"""
query = "SELECT tokenizer FROM idxINDEX WHERE id=%s" % index_id
out = None
try:
res = run_sql(query)
if res:
out = _TOKENIZERS[res[0][0]]
except DatabaseError:
write_message("Exception caught for SQL statement: %s; column tokenizer might not exist" % query, sys.stderr)
except KeyError:
write_message("Exception caught: there is no such tokenizer")
out = None
return out
def get_last_updated_all_indexes():
"""Returns last modification date for all defined indexes"""
query= """SELECT name, last_updated FROM idxINDEX"""
res = run_sql(query)
return res
def split_ranges(parse_string):
"""Parse a string a return the list or ranges."""
recIDs = []
ranges = parse_string.split(",")
for arange in ranges:
tmp_recIDs = arange.split("-")
if len(tmp_recIDs) == 1:
recIDs.append([int(tmp_recIDs[0]), int(tmp_recIDs[0])])
else:
if int(tmp_recIDs[0]) > int(tmp_recIDs[1]): # sanity check
tmp = tmp_recIDs[0]
tmp_recIDs[0] = tmp_recIDs[1]
tmp_recIDs[1] = tmp
recIDs.append([int(tmp_recIDs[0]), int(tmp_recIDs[1])])
return recIDs
def get_word_tables(tables):
""" Given a list of table names it return a list of tuples
(index_id, index_name, index_tags).
"""
wordTables = []
if tables:
for index in tables:
index_id = get_index_id_from_index_name(index)
if index_id:
wordTables.append((index_id, index, get_index_tags(index)))
else:
write_message("Error: There is no %s words table." % index, sys.stderr)
return wordTables
def get_date_range(var):
"Returns the two dates contained as a low,high tuple"
limits = var.split(",")
if len(limits) == 1:
low = get_datetime(limits[0])
return low, None
if len(limits) == 2:
low = get_datetime(limits[0])
high = get_datetime(limits[1])
return low, high
return None, None
def create_range_list(res):
"""Creates a range list from a recID select query result contained
in res. The result is expected to have ascending numerical order."""
if not res:
return []
row = res[0]
if not row:
return []
else:
range_list = [[row, row]]
for row in res[1:]:
row_id = row
if row_id == range_list[-1][1] + 1:
range_list[-1][1] = row_id
else:
range_list.append([row_id, row_id])
return range_list
def beautify_range_list(range_list):
"""Returns a non overlapping, maximal range list"""
ret_list = []
for new in range_list:
found = 0
for old in ret_list:
if new[0] <= old[0] <= new[1] + 1 or new[0] - 1 <= old[1] <= new[1]:
old[0] = min(old[0], new[0])
old[1] = max(old[1], new[1])
found = 1
break
if not found:
ret_list.append(new)
return ret_list
def truncate_index_table(index_name):
"""Properly truncate the given index."""
index_id = get_index_id_from_index_name(index_name)
if index_id:
write_message('Truncating %s index table in order to reindex.' % index_name, verbose=2)
run_sql("UPDATE idxINDEX SET last_updated='0000-00-00 00:00:00' WHERE id=%s", (index_id,))
run_sql("TRUNCATE idxWORD%02dF" % index_id) # kwalitee: disable=sql
run_sql("TRUNCATE idxWORD%02dR" % index_id) # kwalitee: disable=sql
run_sql("TRUNCATE idxPHRASE%02dF" % index_id) # kwalitee: disable=sql
run_sql("TRUNCATE idxPHRASE%02dR" % index_id) # kwalitee: disable=sql
def update_index_last_updated(indexes, starting_time=None):
"""Update last_updated column of the index table in the database.
Puts starting time there so that if the task was interrupted for record download,
the records will be reindexed next time.
@param indexes: list of indexes names
"""
if starting_time is None:
return None
for index_name in indexes:
write_message("updating last_updated to %s...for %s index" % (starting_time, index_name), verbose=9)
run_sql("UPDATE idxINDEX SET last_updated=%s WHERE name=%s", (starting_time, index_name,))
def get_percentage_completed(num_done, num_total):
""" Return a string containing the approx. percentage completed """
percentage_remaining = 100.0 * float(num_done) / float(num_total)
if percentage_remaining:
percentage_display = "(%.1f%%)" % (percentage_remaining,)
else:
percentage_display = ""
return percentage_display
def _fill_dict_of_indexes_with_empty_sets():
"""find_affected_records internal function.
Creates dict: {'index_name1':set([]), ...}
"""
index_dict = {}
tmp_all_indexes = get_all_indexes(virtual=False)
for index in tmp_all_indexes:
index_dict[index] = set([])
return index_dict
def find_affected_records_for_index(indexes=[], recIDs=[], force_all_indexes=False):
"""
Function checks which records need to be changed/reindexed
for given index/indexes.
Makes use of hstRECORD table where different revisions of record
are kept.
If parameter force_all_indexes is set function will assign all recIDs to all indexes.
@param indexes: names of indexes for reindexation separated by coma
@param recIDs: recIDs for reindexation in form: [[range1_down, range1_up],[range2_down, range2_up]..]
@param force_all_indexes: should we index all indexes?
"""
tmp_dates = dict(get_last_updated_all_indexes())
modification_dates = dict([(date, tmp_dates[date] or datetime(1000,1,1,1,1,1)) for date in tmp_dates])
tmp_all_indexes = get_all_indexes(virtual=False)
indexes = remove_inexistent_indexes(indexes, leave_virtual=False)
if not indexes:
return {}
def _should_reindex_for_revision(index_name, revision_date):
try:
if modification_dates[index_name] < revision_date and index_name in indexes:
return True
return False
except KeyError:
return False
if force_all_indexes:
records_for_indexes = {}
all_recIDs = []
for recIDs_range in recIDs:
all_recIDs.extend(range(recIDs_range[0], recIDs_range[1]+1))
for index in indexes:
records_for_indexes[index] = all_recIDs
return records_for_indexes
min_last_updated = get_min_last_updated(indexes)[0][0] or datetime(1000,1,1,1,1,1)
indexes_to_change = _fill_dict_of_indexes_with_empty_sets()
recIDs_info = []
for recIDs_range in recIDs:
query = """SELECT id_bibrec,job_date,affected_fields FROM hstRECORD WHERE
id_bibrec BETWEEN %s AND %s AND job_date > '%s'""" % (recIDs_range[0], recIDs_range[1], min_last_updated)
res = run_sql(query)
if res:
recIDs_info.extend(res)
for recID_info in recIDs_info:
recID, revision, affected_fields = recID_info
affected_fields = affected_fields.split(",")
indexes_for_recID = set()
for field in affected_fields:
if field:
field_indexes = get_field_indexes_memoised(field) or []
indexes_names = set([idx[1] for idx in field_indexes])
indexes_for_recID |= indexes_names
else:
#record was inserted, all fields were changed, no specific affected fields
indexes_for_recID |= set(tmp_all_indexes)
indexes_for_recID_filtered = [ind for ind in indexes_for_recID if _should_reindex_for_revision(ind, revision)]
for index in indexes_for_recID_filtered:
indexes_to_change[index].add(recID)
indexes_to_change = dict((k, list(sorted(v))) for k, v in indexes_to_change.iteritems() if v)
return indexes_to_change
#def update_text_extraction_date(first_recid, last_recid):
#"""for all the bibdoc connected to the specified recid, set
#the text_extraction_date to the task_starting_time."""
#run_sql("UPDATE bibdoc JOIN bibrec_bibdoc ON id=id_bibdoc SET text_extraction_date=%s WHERE id_bibrec BETWEEN %s AND %s", (task_get_task_param('task_starting_time'), first_recid, last_recid))
class WordTable:
"A class to hold the words table."
def __init__(self, index_name, index_id, fields_to_index, table_name_pattern, wordtable_type, tag_to_tokenizer_map, wash_index_terms=50):
"""Creates words table instance.
@param index_name: the index name
@param index_id: the index integer identificator
@param fields_to_index: a list of fields to index
@param table_name_pattern: i.e. idxWORD%02dF or idxPHRASE%02dF
@parm wordtable_type: type of the wordtable: Words, Pairs, Phrases
@param tag_to_tokenizer_map: a mapping to specify particular tokenizer to
extract words from particular metdata (such as 8564_u)
@param wash_index_terms: do we wash index terms, and if yes (when >0),
how many characters do we keep in the index terms; see
max_char_length parameter of wash_index_term()
"""
self.index_name = index_name
self.index_id = index_id
self.tablename = table_name_pattern % index_id
self.virtual_tablename_pattern = table_name_pattern[table_name_pattern.find('idx'):-1]
self.humanname = get_def_name('%s' % (str(index_id),), "idxINDEX")[0][1]
self.recIDs_in_mem = []
self.fields_to_index = fields_to_index
self.value = {}
try:
self.stemming_language = get_index_stemming_language(index_id)
except KeyError:
self.stemming_language = ''
self.remove_stopwords = get_index_remove_stopwords(index_id)
self.remove_html_markup = get_index_remove_html_markup(index_id)
self.remove_latex_markup = get_index_remove_latex_markup(index_id)
self.tokenizer = get_index_tokenizer(index_id)(self.stemming_language,
self.remove_stopwords,
self.remove_html_markup,
self.remove_latex_markup)
self.default_tokenizer_function = self.tokenizer.get_tokenizing_function(wordtable_type)
self.wash_index_terms = wash_index_terms
self.is_virtual = is_index_virtual(self.index_id)
self.virtual_indexes = get_index_virtual_indexes(self.index_id)
# tagToTokenizer mapping. It offers an indirection level necessary for
# indexing fulltext.
self.tag_to_words_fnc_map = {}
for k in tag_to_tokenizer_map.keys():
special_tokenizer_for_tag = _TOKENIZERS[tag_to_tokenizer_map[k]](self.stemming_language,
self.remove_stopwords,
self.remove_html_markup,
self.remove_latex_markup)
special_tokenizer_function = special_tokenizer_for_tag.get_tokenizing_function(wordtable_type)
self.tag_to_words_fnc_map[k] = special_tokenizer_function
if self.stemming_language and self.tablename.startswith('idxWORD'):
write_message('%s has stemming enabled, language %s' % (self.tablename, self.stemming_language))
def turn_off_virtual_indexes(self):
self.virtual_indexes = []
def turn_on_virtual_indexes(self):
self.virtual_indexes = get_index_virtual_indexes(self.index_id)
def get_field(self, recID, tag):
"""Returns list of values of the MARC-21 'tag' fields for the
record 'recID'."""
out = []
bibXXx = "bib" + tag[0] + tag[1] + "x"
bibrec_bibXXx = "bibrec_" + bibXXx
query = """SELECT value FROM %s AS b, %s AS bb
WHERE bb.id_bibrec=%%s AND bb.id_bibxxx=b.id
AND tag LIKE %%s""" % (bibXXx, bibrec_bibXXx)
res = run_sql(query, (recID, tag))
for row in res:
out.append(row[0])
return out
def clean(self):
"Cleans the words table."
self.value = {}
def put_into_db(self, mode="normal"):
"""Updates the current words table in the corresponding DB
idxFOO table. Mode 'normal' means normal execution,
mode 'emergency' means words index reverting to old state.
"""
write_message("%s %s wordtable flush started" % (self.tablename, mode))
write_message('...updating %d words into %s started' % \
(len(self.value), self.tablename))
task_update_progress("(%s:%s) flushed %d/%d words" % (self.tablename, self.humanname, 0, len(self.value)))
self.recIDs_in_mem = beautify_range_list(self.recIDs_in_mem)
all_indexes = [(self.index_id, self.humanname)]
if self.virtual_indexes:
all_indexes.extend(self.virtual_indexes)
for ind_id, ind_name in all_indexes:
tab_name = self.tablename[:-1] + "R"
if ind_id != self.index_id:
tab_name = self.virtual_tablename_pattern % ind_id + "R"
if mode == "normal":
for group in self.recIDs_in_mem:
query = """UPDATE %s SET type='TEMPORARY' WHERE id_bibrec
BETWEEN %%s AND %%s AND type='CURRENT'""" % tab_name
write_message(query % (group[0], group[1]), verbose=9)
run_sql(query, (group[0], group[1]))
nb_words_total = len(self.value)
nb_words_report = int(nb_words_total / 10.0)
nb_words_done = 0
for word in self.value.keys():
self.put_word_into_db(word, ind_id)
nb_words_done += 1
if nb_words_report != 0 and ((nb_words_done % nb_words_report) == 0):
write_message('......processed %d/%d words' % (nb_words_done, nb_words_total))
percentage_display = get_percentage_completed(nb_words_done, nb_words_total)
task_update_progress("(%s:%s) flushed %d/%d words %s" % (tab_name, ind_name, nb_words_done, nb_words_total, percentage_display))
write_message('...updating %d words into %s ended' % \
(nb_words_total, tab_name))
write_message('...updating reverse table %s started' % tab_name)
if mode == "normal":
for group in self.recIDs_in_mem:
query = """UPDATE %s SET type='CURRENT' WHERE id_bibrec
BETWEEN %%s AND %%s AND type='FUTURE'""" % tab_name
write_message(query % (group[0], group[1]), verbose=9)
run_sql(query, (group[0], group[1]))
query = """DELETE FROM %s WHERE id_bibrec
BETWEEN %%s AND %%s AND type='TEMPORARY'""" % tab_name
write_message(query % (group[0], group[1]), verbose=9)
run_sql(query, (group[0], group[1]))
#if self.is_fulltext_index:
#update_text_extraction_date(group[0], group[1])
write_message('End of updating wordTable into %s' % tab_name, verbose=9)
elif mode == "emergency":
for group in self.recIDs_in_mem:
query = """UPDATE %s SET type='CURRENT' WHERE id_bibrec
BETWEEN %%s AND %%s AND type='TEMPORARY'""" % tab_name
write_message(query % (group[0], group[1]), verbose=9)
run_sql(query, (group[0], group[1]))
query = """DELETE FROM %s WHERE id_bibrec
BETWEEN %%s AND %%s AND type='FUTURE'""" % tab_name
write_message(query % (group[0], group[1]), verbose=9)
run_sql(query, (group[0], group[1]))
write_message('End of emergency flushing wordTable into %s' % tab_name, verbose=9)
write_message('...updating reverse table %s ended' % tab_name)
self.clean()
self.recIDs_in_mem = []
write_message("%s %s wordtable flush ended" % (self.tablename, mode))
task_update_progress("(%s:%s) flush ended" % (self.tablename, self.humanname))
def load_old_recIDs(self, word, index_id=None):
"""Load existing hitlist for the word from the database index files."""
tab_name = self.tablename
if index_id != self.index_id:
tab_name = self.virtual_tablename_pattern % index_id + "F"
query = "SELECT hitlist FROM %s WHERE term=%%s" % tab_name
res = run_sql(query, (word,))
if res:
return intbitset(res[0][0])
else:
return None
def merge_with_old_recIDs(self, word, set):
"""Merge the system numbers stored in memory (hash of recIDs with value +1 or -1
according to whether to add/delete them) with those stored in the database index
and received in set universe of recIDs for the given word.
Return False in case no change was done to SET, return True in case SET
was changed.
"""
oldset = intbitset(set)
set.update_with_signs(self.value[word])
return set != oldset
def put_word_into_db(self, word, index_id):
"""Flush a single word to the database and delete it from memory"""
tab_name = self.tablename
if index_id != self.index_id:
tab_name = self.virtual_tablename_pattern % index_id + "F"
set = self.load_old_recIDs(word, index_id)
if set is not None: # merge the word recIDs found in memory:
if not self.merge_with_old_recIDs(word, set):
# nothing to update:
write_message("......... unchanged hitlist for ``%s''" % word, verbose=9)
pass
else:
# yes there were some new words:
write_message("......... updating hitlist for ``%s''" % word, verbose=9)
run_sql("UPDATE %s SET hitlist=%%s WHERE term=%%s" % wash_table_column_name(tab_name), (set.fastdump(), word)) # kwalitee: disable=sql
else: # the word is new, will create new set:
write_message("......... inserting hitlist for ``%s''" % word, verbose=9)
set = intbitset(self.value[word].keys())
try:
run_sql("INSERT INTO %s (term, hitlist) VALUES (%%s, %%s)" % wash_table_column_name(tab_name), (word, set.fastdump())) # kwalitee: disable=sql
except Exception, e:
## We send this exception to the admin only when is not
## already reparing the problem.
register_exception(prefix="Error when putting the term '%s' into db (hitlist=%s): %s\n" % (repr(word), set, e), alert_admin=(task_get_option('cmd') != 'repair'))
if not set: # never store empty words
run_sql("DELETE FROM %s WHERE term=%%s" % wash_table_column_name(tab_name), (word,)) # kwalitee: disable=sql
def display(self):
"Displays the word table."
keys = self.value.keys()
keys.sort()
for k in keys:
write_message("%s: %s" % (k, self.value[k]))
def count(self):
"Returns the number of words in the table."
return len(self.value)
def info(self):
"Prints some information on the words table."
write_message("The words table contains %d words." % self.count())
def lookup_words(self, word=""):
"Lookup word from the words table."
if not word:
done = 0
while not done:
try:
word = raw_input("Enter word: ")
done = 1
except (EOFError, KeyboardInterrupt):
return
if self.value.has_key(word):
write_message("The word '%s' is found %d times." \
% (word, len(self.value[word])))
else:
write_message("The word '%s' does not exist in the word file."\
% word)
def add_recIDs(self, recIDs, opt_flush):
"""Fetches records which id in the recIDs range list and adds
them to the wordTable. The recIDs range list is of the form:
[[i1_low,i1_high],[i2_low,i2_high], ..., [iN_low,iN_high]].
"""
if self.is_virtual:
return
global chunksize, _last_word_table
flush_count = 0
records_done = 0
records_to_go = 0
for arange in recIDs:
records_to_go = records_to_go + arange[1] - arange[0] + 1
time_started = time.time() # will measure profile time
for arange in recIDs:
i_low = arange[0]
chunksize_count = 0
while i_low <= arange[1]:
task_sleep_now_if_required()
# calculate chunk group of recIDs and treat it:
i_high = min(i_low + opt_flush - flush_count - 1, arange[1])
i_high = min(i_low + chunksize - chunksize_count - 1, i_high)
try:
self.chk_recID_range(i_low, i_high)
except StandardError:
if self.index_name == 'fulltext' and CFG_SOLR_URL:
solr_commit()
raise
write_message(CFG_BIBINDEX_ADDING_RECORDS_STARTED_STR % \
(self.tablename, i_low, i_high))
if CFG_CHECK_MYSQL_THREADS:
kill_sleepy_mysql_threads()
percentage_display = get_percentage_completed(records_done, records_to_go)
task_update_progress("(%s:%s) adding recs %d-%d %s" % (self.tablename, self.humanname, i_low, i_high, percentage_display))
self.del_recID_range(i_low, i_high)
just_processed = self.add_recID_range(i_low, i_high)
flush_count = flush_count + i_high - i_low + 1
chunksize_count = chunksize_count + i_high - i_low + 1
records_done = records_done + just_processed
write_message(CFG_BIBINDEX_ADDING_RECORDS_STARTED_STR % \
(self.tablename, i_low, i_high))
if chunksize_count >= chunksize:
chunksize_count = 0
# flush if necessary:
if flush_count >= opt_flush:
self.put_into_db()
self.clean()
if self.index_name == 'fulltext' and CFG_SOLR_URL:
solr_commit()
write_message("%s backing up" % (self.tablename))
flush_count = 0
self.log_progress(time_started, records_done, records_to_go)
# iterate:
i_low = i_high + 1
if flush_count > 0:
self.put_into_db()
if self.index_name == 'fulltext' and CFG_SOLR_URL:
solr_commit()
self.log_progress(time_started, records_done, records_to_go)
def add_recID_range(self, recID1, recID2):
"""Add records from RECID1 to RECID2."""
wlist = {}
self.recIDs_in_mem.append([recID1, recID2])
# special case of author indexes where we also add author
# canonical IDs:
if self.index_name in ('author', 'firstauthor', 'exactauthor', 'exactfirstauthor'):
for recID in range(recID1, recID2 + 1):
if not wlist.has_key(recID):
wlist[recID] = []
wlist[recID] = list_union(get_author_canonical_ids_for_recid(recID),
wlist[recID])
if len(self.fields_to_index) == 0:
#'no tag' style of indexing - use bibfield instead of directly consulting bibrec
tokenizing_function = self.default_tokenizer_function
for recID in range(recID1, recID2 + 1):
record = get_record(recID)
if record:
new_words = tokenizing_function(record)
if not wlist.has_key(recID):
wlist[recID] = []
wlist[recID] = list_union(new_words, wlist[recID])
# case of special indexes:
elif self.index_name in ('authorcount', 'journal'):
for tag in self.fields_to_index:
tokenizing_function = self.tag_to_words_fnc_map.get(tag, self.default_tokenizer_function)
for recID in range(recID1, recID2 + 1):
new_words = tokenizing_function(recID)
if not wlist.has_key(recID):
wlist[recID] = []
wlist[recID] = list_union(new_words, wlist[recID])
# usual tag-by-tag indexing for the rest:
else:
for tag in self.fields_to_index:
tokenizing_function = self.tag_to_words_fnc_map.get(tag, self.default_tokenizer_function)
phrases = self.get_phrases_for_tokenizing(tag, recID1, recID2)
for row in sorted(phrases):
recID, phrase = row
if not wlist.has_key(recID):
wlist[recID] = []
new_words = tokenizing_function(phrase)
wlist[recID] = list_union(new_words, wlist[recID])
# lookup index-time synonyms:
synonym_kbrs = get_all_synonym_knowledge_bases()
if synonym_kbrs.has_key(self.index_name):
if len(wlist) == 0: return 0
recIDs = wlist.keys()
for recID in recIDs:
for word in wlist[recID]:
word_synonyms = get_synonym_terms(word,
synonym_kbrs[self.index_name][0],
synonym_kbrs[self.index_name][1],
use_memoise=True)
if word_synonyms:
wlist[recID] = list_union(word_synonyms, wlist[recID])
# were there some words for these recIDs found?
recIDs = wlist.keys()
for recID in recIDs:
# was this record marked as deleted?
if "DELETED" in self.get_field(recID, "980__c"):
wlist[recID] = []
write_message("... record %d was declared deleted, removing its word list" % recID, verbose=9)
write_message("... record %d, termlist: %s" % (recID, wlist[recID]), verbose=9)
self.index_virtual_indexes_reversed(wlist, recID1, recID2)
if len(wlist) == 0: return 0
# put words into reverse index table with FUTURE status:
for recID in recIDs:
run_sql("INSERT INTO %sR (id_bibrec,termlist,type) VALUES (%%s,%%s,'FUTURE')" % wash_table_column_name(self.tablename[:-1]), (recID, serialize_via_marshal(wlist[recID]))) # kwalitee: disable=sql
# ... and, for new records, enter the CURRENT status as empty:
try:
run_sql("INSERT INTO %sR (id_bibrec,termlist,type) VALUES (%%s,%%s,'CURRENT')" % wash_table_column_name(self.tablename[:-1]), (recID, serialize_via_marshal([]))) # kwalitee: disable=sql
except DatabaseError:
# okay, it's an already existing record, no problem
pass
# put words into memory word list:
put = self.put
for recID in recIDs:
for w in wlist[recID]:
put(recID, w, 1)
return len(recIDs)
def get_phrases_for_tokenizing(self, tag, first_recID, last_recID):
"""Gets phrases for later tokenization for a range of records and
specific tag.
@param tag: MARC tag
@param first_recID: first recID from the range of recIDs to index
@param last_recID: last recID from the range of recIDs to index
"""
bibXXx = "bib" + tag[0] + tag[1] + "x"
bibrec_bibXXx = "bibrec_" + bibXXx
query = """SELECT bb.id_bibrec,b.value FROM %s AS b, %s AS bb
WHERE bb.id_bibrec BETWEEN %%s AND %%s
AND bb.id_bibxxx=b.id AND tag LIKE %%s""" % (bibXXx, bibrec_bibXXx)
phrases = run_sql(query, (first_recID, last_recID, tag))
if tag == '8564_u':
## FIXME: Quick hack to be sure that hidden files are
## actually indexed.
phrases = set(phrases)
for recid in xrange(int(first_recID), int(last_recID) + 1):
for bibdocfile in BibRecDocs(recid).list_latest_files():
phrases.add((recid, bibdocfile.get_url()))
#authority records
pattern = tag.replace('%', '*')
matches = fnmatch.filter(CFG_BIBAUTHORITY_CONTROLLED_FIELDS_BIBLIOGRAPHIC.keys(), pattern)
if not len(matches):
return phrases
phrases = set(phrases)
for tag_match in matches:
authority_tag = tag_match[0:3] + "__0"
for recID in xrange(int(first_recID), int(last_recID) + 1):
control_nos = get_fieldvalues(recID, authority_tag)
for control_no in control_nos:
new_strings = get_index_strings_by_control_no(control_no)
for string_value in new_strings:
phrases.add((recID, string_value))
return phrases
def index_virtual_indexes_reversed(self, wlist, recID1, recID2):
"""Inserts indexed words into all virtual indexes connected to
this index"""
#first: need to take old values from given index to remove
#them from virtual indexes
query = """SELECT id_bibrec, termlist FROM %sR WHERE id_bibrec
BETWEEN %%s AND %%s""" % wash_table_column_name(self.tablename[:-1])
old_index_values = run_sql(query, (recID1, recID2))
if old_index_values:
zipped = zip(*old_index_values)
old_index_values = dict(zip(zipped[0], map(deserialize_via_marshal, zipped[1])))
else:
old_index_values = dict()
recIDs = wlist.keys()
for vindex_id, vindex_name in self.virtual_indexes:
#second: need to take old values from virtual index
#to have a list of words from which we can remove old values from given index
tab_name = self.virtual_tablename_pattern % vindex_id + "R"
query = """SELECT id_bibrec, termlist FROM %s WHERE type='CURRENT' AND id_bibrec
BETWEEN %%s AND %%s""" % tab_name
old_virtual_index_values = run_sql(query, (recID1, recID2))
if old_virtual_index_values:
zipped = zip(*old_virtual_index_values)
old_virtual_index_values = dict(zip(zipped[0], map(deserialize_via_marshal, zipped[1])))
else:
old_virtual_index_values = dict()
for recID in recIDs:
to_serialize = list((set(old_virtual_index_values.get(recID) or []) - set(old_index_values.get(recID) or [])) | set(wlist[recID]))
run_sql("INSERT INTO %s (id_bibrec,termlist,type) VALUES (%%s,%%s,'FUTURE')" % wash_table_column_name(tab_name), (recID, serialize_via_marshal(to_serialize))) # kwalitee: disable=sql
try:
run_sql("INSERT INTO %s (id_bibrec,termlist,type) VALUES (%%s,%%s,'CURRENT')" % wash_table_column_name(tab_name), (recID, serialize_via_marshal([]))) # kwalitee: disable=sql
except DatabaseError:
pass
if len(recIDs) != (recID2 - recID1 + 1):
#for records in range(recID1, recID2) which weren't updated:
#need to prevent them from being deleted by function: 'put_into_db'
#which deletes all records with 'CURRENT' status
query = """INSERT INTO %s (id_bibrec, termlist, type)
SELECT id_bibrec, termlist, 'FUTURE' FROM %s
WHERE id_bibrec BETWEEN %%s AND %%s
AND type='CURRENT'
AND id_bibrec IN (
SELECT id_bibrec FROM %s
WHERE id_bibrec BETWEEN %%s AND %%s
GROUP BY id_bibrec HAVING COUNT(id_bibrec) = 1
)
""" % ((wash_table_column_name(tab_name),)*3)
run_sql(query, (recID1, recID2, recID1, recID2))
def log_progress(self, start, done, todo):
"""Calculate progress and store it.
start: start time,
done: records processed,
todo: total number of records"""
time_elapsed = time.time() - start
# consistency check
if time_elapsed == 0 or done > todo:
return
time_recs_per_min = done / (time_elapsed / 60.0)
write_message("%d records took %.1f seconds to complete.(%1.f recs/min)"\
% (done, time_elapsed, time_recs_per_min))
if time_recs_per_min:
write_message("Estimated runtime: %.1f minutes" % \
((todo - done) / time_recs_per_min))
def put(self, recID, word, sign):
"""Adds/deletes a word to the word list."""
try:
if self.wash_index_terms:
word = wash_index_term(word, self.wash_index_terms)
if self.value.has_key(word):
# the word 'word' exist already: update sign
self.value[word][recID] = sign
else:
self.value[word] = {recID: sign}
except:
write_message("Error: Cannot put word %s with sign %d for recID %s." % (word, sign, recID))
def del_recIDs(self, recIDs):
"""Fetches records which id in the recIDs range list and adds
them to the wordTable. The recIDs range list is of the form:
[[i1_low,i1_high],[i2_low,i2_high], ..., [iN_low,iN_high]].
"""
count = 0
for arange in recIDs:
task_sleep_now_if_required()
self.del_recID_range(arange[0], arange[1])
count = count + arange[1] - arange[0]
self.put_into_db()
if self.index_name == 'fulltext' and CFG_SOLR_URL:
solr_commit()
def del_recID_range(self, low, high):
"""Deletes records with 'recID' system number between low
and high from memory words index table."""
write_message("%s fetching existing words for records #%d-#%d started" % \
(self.tablename, low, high), verbose=3)
self.recIDs_in_mem.append([low, high])
query = """SELECT id_bibrec,termlist FROM %sR as bb WHERE bb.id_bibrec
BETWEEN %%s AND %%s""" % (self.tablename[:-1])
recID_rows = run_sql(query, (low, high))
for recID_row in recID_rows:
recID = recID_row[0]
wlist = deserialize_via_marshal(recID_row[1])
for word in wlist:
self.put(recID, word, -1)
write_message("%s fetching existing words for records #%d-#%d ended" % \
(self.tablename, low, high), verbose=3)
def report_on_table_consistency(self):
"""Check reverse words index tables (e.g. idxWORD01R) for
interesting states such as 'TEMPORARY' state.
Prints small report (no of words, no of bad words).
"""
# find number of words:
query = """SELECT COUNT(*) FROM %s""" % (self.tablename)
res = run_sql(query, None, 1)
if res:
nb_words = res[0][0]
else:
nb_words = 0
# find number of records:
query = """SELECT COUNT(DISTINCT(id_bibrec)) FROM %sR""" % (self.tablename[:-1])
res = run_sql(query, None, 1)
if res:
nb_records = res[0][0]
else:
nb_records = 0
# report stats:
write_message("%s contains %d words from %d records" % (self.tablename, nb_words, nb_records))
# find possible bad states in reverse tables:
query = """SELECT COUNT(DISTINCT(id_bibrec)) FROM %sR WHERE type <> 'CURRENT'""" % (self.tablename[:-1])
res = run_sql(query)
if res:
nb_bad_records = res[0][0]
else:
nb_bad_records = 999999999
if nb_bad_records:
write_message("EMERGENCY: %s needs to repair %d of %d index records" % \
(self.tablename, nb_bad_records, nb_records))
else:
write_message("%s is in consistent state" % (self.tablename))
return nb_bad_records
def repair(self, opt_flush):
"""Repair the whole table"""
# find possible bad states in reverse tables:
query = """SELECT COUNT(DISTINCT(id_bibrec)) FROM %sR WHERE type <> 'CURRENT'""" % (self.tablename[:-1])
res = run_sql(query, None, 1)
if res:
nb_bad_records = res[0][0]
else:
nb_bad_records = 0
if nb_bad_records == 0:
return
query = """SELECT id_bibrec FROM %sR WHERE type <> 'CURRENT'""" \
% (self.tablename[:-1])
res = intbitset(run_sql(query))
recIDs = create_range_list(list(res))
flush_count = 0
records_done = 0
records_to_go = 0
for arange in recIDs:
records_to_go = records_to_go + arange[1] - arange[0] + 1
time_started = time.time() # will measure profile time
for arange in recIDs:
i_low = arange[0]
chunksize_count = 0
while i_low <= arange[1]:
task_sleep_now_if_required()
# calculate chunk group of recIDs and treat it:
i_high = min(i_low + opt_flush - flush_count - 1, arange[1])
i_high = min(i_low + chunksize - chunksize_count - 1, i_high)
self.fix_recID_range(i_low, i_high)
flush_count = flush_count + i_high - i_low + 1
chunksize_count = chunksize_count + i_high - i_low + 1
records_done = records_done + i_high - i_low + 1
if chunksize_count >= chunksize:
chunksize_count = 0
# flush if necessary:
if flush_count >= opt_flush:
self.put_into_db("emergency")
self.clean()
flush_count = 0
self.log_progress(time_started, records_done, records_to_go)
# iterate:
i_low = i_high + 1
if flush_count > 0:
self.put_into_db("emergency")
self.log_progress(time_started, records_done, records_to_go)
write_message("%s inconsistencies repaired." % self.tablename)
def chk_recID_range(self, low, high):
"""Check if the reverse index table is in proper state"""
## check db
query = """SELECT COUNT(*) FROM %sR WHERE type <> 'CURRENT'
AND id_bibrec BETWEEN %%s AND %%s""" % self.tablename[:-1]
res = run_sql(query, (low, high), 1)
if res[0][0] == 0:
write_message("%s for %d-%d is in consistent state" % (self.tablename, low, high))
return # okay, words table is consistent
## inconsistency detected!
write_message("EMERGENCY: %s inconsistencies detected..." % self.tablename)
error_message = "Errors found. You should check consistency of the " \
"%s - %sR tables.\nRunning 'bibindex --repair' is " \
"recommended." % (self.tablename, self.tablename[:-1])
write_message("EMERGENCY: " + error_message, stream=sys.stderr)
raise StandardError(error_message)
def fix_recID_range(self, low, high):
"""Try to fix reverse index database consistency (e.g. table idxWORD01R) in the low,high doc-id range.
Possible states for a recID follow:
CUR TMP FUT: very bad things have happened: warn!
CUR TMP : very bad things have happened: warn!
CUR FUT: delete FUT (crash before flushing)
CUR : database is ok
TMP FUT: add TMP to memory and del FUT from memory
flush (revert to old state)
TMP : very bad things have happened: warn!
FUT: very bad things have happended: warn!
"""
state = {}
query = "SELECT id_bibrec,type FROM %sR WHERE id_bibrec BETWEEN %%s AND %%s"\
% self.tablename[:-1]
res = run_sql(query, (low, high))
for row in res:
if not state.has_key(row[0]):
state[row[0]] = []
state[row[0]].append(row[1])
ok = 1 # will hold info on whether we will be able to repair
for recID in state.keys():
if not 'TEMPORARY' in state[recID]:
if 'FUTURE' in state[recID]:
if 'CURRENT' not in state[recID]:
write_message("EMERGENCY: Index record %d is in inconsistent state. Can't repair it." % recID)
ok = 0
else:
write_message("EMERGENCY: Inconsistency in index record %d detected" % recID)
query = """DELETE FROM %sR
WHERE id_bibrec=%%s""" % self.tablename[:-1]
run_sql(query, (recID,))
write_message("EMERGENCY: Inconsistency in record %d repaired." % recID)
else:
if 'FUTURE' in state[recID] and not 'CURRENT' in state[recID]:
self.recIDs_in_mem.append([recID, recID])
# Get the words file
query = """SELECT type,termlist FROM %sR
WHERE id_bibrec=%%s""" % self.tablename[:-1]
write_message(query, verbose=9)
res = run_sql(query, (recID,))
for row in res:
wlist = deserialize_via_marshal(row[1])
write_message("Words are %s " % wlist, verbose=9)
if row[0] == 'TEMPORARY':
sign = 1
else:
sign = -1
for word in wlist:
self.put(recID, word, sign)
else:
write_message("EMERGENCY: %s for %d is in inconsistent "
"state. Couldn't repair it." % (self.tablename,
recID), stream=sys.stderr)
ok = 0
if not ok:
error_message = "Unrepairable errors found. You should check " \
"consistency of the %s - %sR tables. Deleting affected " \
"TEMPORARY and FUTURE entries from these tables is " \
"recommended; see the BibIndex Admin Guide." % \
(self.tablename, self.tablename[:-1])
write_message("EMERGENCY: " + error_message, stream=sys.stderr)
raise StandardError(error_message)
def remove_dependent_index(self, id_dependent):
"""Removes terms found in dependent index from virtual index.
Function finds words for removal and then removes them from
forward and reversed tables term by term.
@param id_dependent: id of an index which we want to remove from this
virtual index
"""
if not self.is_virtual:
write_message("Index is not virtual...")
return
global chunksize
terms_current_counter = 0
terms_done = 0
terms_to_go = 0
for_full_removal, for_partial_removal = self.get_words_to_remove(id_dependent, misc_lookup=False)
query = """SELECT t.term, m.hitlist FROM %s%02dF as t INNER JOIN %s%02dF as m
ON t.term=m.term""" % (self.tablename[:-3], self.index_id, self.tablename[:-3], id_dependent)
terms_and_hitlists = dict(run_sql(query))
terms_to_go = len(for_full_removal) + len(for_partial_removal)
task_sleep_now_if_required()
#full removal
for term in for_full_removal:
terms_current_counter += 1
hitlist = intbitset(terms_and_hitlists[term])
for recID in hitlist:
self.remove_single_word_reversed_table(term, recID)
self.remove_single_word_forward_table(term)
if terms_current_counter % chunksize == 0:
terms_done += terms_current_counter
terms_current_counter = 0
write_message("removed %s/%s terms..." % (terms_done, terms_to_go))
task_sleep_now_if_required()
terms_done += terms_current_counter
terms_current_counter = 0
#partial removal
for term, indexes in for_partial_removal.iteritems():
self.value = {}
terms_current_counter += 1
hitlist = intbitset(terms_and_hitlists[term])
if len(indexes) > 0:
hitlist -= self._find_common_hitlist(term, id_dependent, indexes)
for recID in hitlist:
self.remove_single_word_reversed_table(term, recID)
if self.value.has_key(term):
self.value[term][recID] = -1
else:
self.value[term] = {recID: -1}
if self.value:
self.put_word_into_db(term, self.index_id)
if terms_current_counter % chunksize == 0:
terms_done += terms_current_counter
terms_current_counter = 0
write_message("removed %s/%s terms..." % (terms_done, terms_to_go))
task_sleep_now_if_required()
def remove_single_word_forward_table(self, word):
"""Immediately and irreversibly removes a word from forward table"""
run_sql("""DELETE FROM %s WHERE term=%%s""" % self.tablename, (word, )) # kwalitee: disable=sql
def remove_single_word_reversed_table(self, word, recID):
"""Removes single word from temlist for given recID"""
old_set = run_sql("""SELECT termlist FROM %sR WHERE id_bibrec=%%s""" % \
wash_table_column_name(self.tablename[:-1]), (recID, ))
new_set = []
if old_set:
new_set = deserialize_via_marshal(old_set[0][0])
if word in new_set:
new_set.remove(word)
if new_set:
run_sql("""UPDATE %sR SET termlist=%%s
WHERE id_bibrec=%%s AND
type='CURRENT'""" % \
wash_table_column_name(self.tablename[:-1]), (serialize_via_marshal(new_set), recID))
def _find_common_hitlist(self, term, id_dependent, indexes):
"""Checks 'indexes' for records that have 'term' indexed
and returns intersection between found records
and records that have a 'term' inside index
defined by id_dependent parameter"""
query = """SELECT m.hitlist FROM idxWORD%02dF as t INNER JOIN idxWORD%02dF as m
ON t.term=m.term WHERE t.term='%s'"""
common_hitlist = intbitset([])
for _id in indexes:
res = run_sql(query % (id_dependent, _id, term))
if res:
common_hitlist |= intbitset(res[0][0])
return common_hitlist
def get_words_to_remove(self, id_dependent, misc_lookup=False):
"""Finds words in dependent index which should be removed from virtual index.
Example:
Virtual index 'A' consists of 'B' and 'C' dependent indexes and we want to
remove 'B' from virtual index 'A'.
First we need to check if 'B' and 'C' have common words. If they have
we need to be careful not to remove common words from 'A', because we want
to remove only words from 'B'.
Then we need to check common words for 'A' and 'B'. These are potential words
for removal. We need to substract common words for 'B' and 'C' from common words
for 'A' and 'B' to be sure that correct words are removed.
@return: (list, dict), list contains terms/words for full removal, dict
contains words for partial removal together with ids of indexes in which
given term/word also exists
"""
query = """SELECT t.term FROM %s%02dF as t INNER JOIN %s%02dF as m
ON t.term=m.term"""
dependent_indexes = get_virtual_index_building_blocks(self.index_id)
other_ids = list(dependent_indexes and zip(*dependent_indexes)[0] or [])
if id_dependent in other_ids:
other_ids.remove(id_dependent)
if not misc_lookup:
misc_id = get_index_id_from_index_name('miscellaneous')
if misc_id in other_ids:
other_ids.remove(misc_id)
#intersections between dependent indexes
left_in_other_indexes = {}
for _id in other_ids:
intersection = zip(*run_sql(query % (self.tablename[:-3], id_dependent, self.tablename[:-3], _id))) # kwalitee: disable=sql
terms = bool(intersection) and intersection[0] or []
for term in terms:
if left_in_other_indexes.has_key(term):
left_in_other_indexes[term].append(_id)
else:
left_in_other_indexes[term] = [_id]
#intersection between virtual index and index we want to remove
main_intersection = zip(*run_sql(query % (self.tablename[:-3], self.index_id, self.tablename[:-3], id_dependent))) # kwalitee: disable=sql
terms_main = set(bool(main_intersection) and main_intersection[0] or [])
return list(terms_main - set(left_in_other_indexes.keys())), left_in_other_indexes
def main():
"""Main that construct all the bibtask."""
task_init(authorization_action='runbibindex',
authorization_msg="BibIndex Task Submission",
description="""Examples:
\t%s -a -i 234-250,293,300-500 -u admin@localhost
\t%s -a -w author,fulltext -M 8192 -v3
\t%s -d -m +4d -A on --flush=10000\n""" % ((sys.argv[0],) * 3), help_specific_usage=""" Indexing options:
-a, --add\t\tadd or update words for selected records
-d, --del\t\tdelete words for selected records
-i, --id=low[-high]\t\tselect according to doc recID
-m, --modified=from[,to]\tselect according to modification date
-c, --collection=c1[,c2]\tselect according to collection
-R, --reindex\treindex the selected indexes from scratch
Repairing options:
-k, --check\t\tcheck consistency for all records in the table(s)
-r, --repair\t\ttry to repair all records in the table(s)
Specific options:
-w, --windex=w1[,w2]\tword/phrase indexes to consider (all)
-M, --maxmem=XXX\tmaximum memory usage in kB (no limit)
-f, --flush=NNN\t\tfull consistent table flush after NNN records (10000)
--force\tforce indexing of all records for provided indexes
-Z, --remove-dependent-index=w\tname of an index for removing from virtual index
""",
version=__revision__,
specific_params=("adi:m:c:w:krRM:f:oZ:", [
"add",
"del",
"id=",
"modified=",
"collection=",
"windex=",
"check",
"repair",
"reindex",
"maxmem=",
"flush=",
"force",
"remove-dependent-index="
]),
task_stop_helper_fnc=task_stop_table_close_fnc,
task_submit_elaborate_specific_parameter_fnc=task_submit_elaborate_specific_parameter,
task_run_fnc=task_run_core,
task_submit_check_options_fnc=task_submit_check_options)
def task_submit_check_options():
"""Check for options compatibility."""
if task_get_option("reindex"):
if task_get_option("cmd") != "add" or task_get_option('id') or task_get_option('collection'):
print >> sys.stderr, "ERROR: You can use --reindex only when adding modified record."
return False
return True
def task_submit_elaborate_specific_parameter(key, value, opts, args):
""" Given the string key it checks it's meaning, eventually using the
value. Usually it fills some key in the options dict.
It must return True if it has elaborated the key, False, if it doesn't
know that key.
eg:
if key in ['-n', '--number']:
self.options['number'] = value
return True
return False
"""
if key in ("-a", "--add"):
task_set_option("cmd", "add")
if ("-x", "") in opts or ("--del", "") in opts:
raise StandardError("Can not have --add and --del at the same time!")
elif key in ("-k", "--check"):
task_set_option("cmd", "check")
elif key in ("-r", "--repair"):
task_set_option("cmd", "repair")
elif key in ("-d", "--del"):
task_set_option("cmd", "del")
elif key in ("-i", "--id"):
task_set_option('id', task_get_option('id') + split_ranges(value))
elif key in ("-m", "--modified"):
task_set_option("modified", get_date_range(value))
elif key in ("-c", "--collection"):
task_set_option("collection", value)
elif key in ("-R", "--reindex"):
task_set_option("reindex", True)
elif key in ("-w", "--windex"):
task_set_option("windex", value)
elif key in ("-M", "--maxmem"):
task_set_option("maxmem", int(value))
if task_get_option("maxmem") < base_process_size + 1000:
raise StandardError("Memory usage should be higher than %d kB" % \
(base_process_size + 1000))
elif key in ("-f", "--flush"):
task_set_option("flush", int(value))
elif key in ("-o", "--force"):
task_set_option("force", True)
elif key in ("-Z", "--remove-dependent-index",):
task_set_option("remove-dependent-index", value)
else:
return False
return True
def task_stop_table_close_fnc():
""" Close tables to STOP. """
global _last_word_table
if _last_word_table:
_last_word_table.put_into_db()
def get_recIDs_by_date_bibliographic(dates, index_name, force_all=False):
""" Finds records that were modified between DATES[0] and DATES[1]
for given index.
If DATES is not set, then finds records that were modified since
the last update of the index.
@param wordtable_type: can be 'Words', 'Pairs' or 'Phrases'
"""
index_id = get_index_id_from_index_name(index_name)
if not dates:
query = """SELECT last_updated FROM idxINDEX WHERE id=%s"""
res = run_sql(query, (index_id,))
if not res:
return set([])
if not res[0][0] or force_all:
dates = ("0000-00-00", None)
else:
dates = (res[0][0], None)
if dates[1] is None:
res = intbitset(run_sql("""SELECT b.id FROM bibrec AS b WHERE b.modification_date >= %s""",
(dates[0],)))
if index_name == 'fulltext':
res |= intbitset(run_sql("""SELECT id_bibrec FROM bibrec_bibdoc JOIN bibdoc ON id_bibdoc=id
WHERE text_extraction_date <= modification_date AND
modification_date >= %s
AND status<>'DELETED'""",
(dates[0],)))
elif dates[0] is None:
res = intbitset(run_sql("""SELECT b.id FROM bibrec AS b WHERE b.modification_date <= %s""",
(dates[1],)))
if index_name == 'fulltext':
res |= intbitset(run_sql("""SELECT id_bibrec FROM bibrec_bibdoc JOIN bibdoc ON id_bibdoc=id
WHERE text_extraction_date <= modification_date
AND modification_date <= %s
AND status<>'DELETED'""",
(dates[1],)))
else:
res = intbitset(run_sql("""SELECT b.id FROM bibrec AS b
WHERE b.modification_date >= %s AND
b.modification_date <= %s""",
(dates[0], dates[1])))
if index_name == 'fulltext':
res |= intbitset(run_sql("""SELECT id_bibrec FROM bibrec_bibdoc JOIN bibdoc ON id_bibdoc=id
WHERE text_extraction_date <= modification_date AND
modification_date >= %s AND
modification_date <= %s AND
status<>'DELETED'""",
(dates[0], dates[1],)))
# special case of author indexes where we need to re-index
# those records that were affected by changed BibAuthorID attributions:
if index_name in ('author', 'firstauthor', 'exactauthor', 'exactfirstauthor'):
from invenio.bibauthorid_personid_maintenance import get_recids_affected_since
# dates[1] is ignored, since BibAuthorID API does not offer upper limit search
rec_list_author = intbitset(get_recids_affected_since(dates[0]))
res = res | rec_list_author
return set(res)
def get_recIDs_by_date_authority(dates, index_name, force_all=False):
""" Finds records that were modified between DATES[0] and DATES[1]
for given index.
If DATES is not set, then finds records that were modified since
the last update of the index.
Searches for bibliographic records connected to authority records
that have been changed.
"""
index_id = get_index_id_from_index_name(index_name)
index_tags = get_index_tags(index_name)
if not dates:
query = """SELECT last_updated FROM idxINDEX WHERE id=%s"""
res = run_sql(query, (index_id,))
if not res:
return set([])
if not res[0][0] or force_all:
dates = ("0000-00-00", None)
else:
dates = (res[0][0], None)
res = intbitset()
for tag in index_tags:
pattern = tag.replace('%', '*')
matches = fnmatch.filter(CFG_BIBAUTHORITY_CONTROLLED_FIELDS_BIBLIOGRAPHIC.keys(), pattern)
if not len(matches):
continue
for tag_match in matches:
# get the type of authority record associated with this field
auth_type = CFG_BIBAUTHORITY_CONTROLLED_FIELDS_BIBLIOGRAPHIC.get(tag_match)
# find updated authority records of this type
# dates[1] is ignored, needs dates[0] to find res
now = datetime.now()
auth_recIDs = search_pattern(p='980__a:' + auth_type) \
& search_unit_in_bibrec(str(dates[0]), str(now), type='m')
# now find dependent bibliographic records
for auth_recID in auth_recIDs:
# get the fix authority identifier of this authority record
control_nos = get_control_nos_from_recID(auth_recID)
# there may be multiple control number entries! (the '035' field is repeatable!)
for control_no in control_nos:
# get the bibrec IDs that refer to AUTHORITY_ID in TAG
tag_0 = tag_match[:5] + '0' # possibly do the same for '4' subfields ?
fieldvalue = '"' + control_no + '"'
res |= search_pattern(p=tag_0 + ':' + fieldvalue)
return set(res)
def get_not_updated_recIDs(modified_dates, indexes, force_all=False):
"""Finds not updated recIDs in database for indexes.
@param modified_dates: between this dates we should look for modified records
@type modified_dates: [date_old, date_new]
@param indexes: list of indexes
@type indexes: string separated by coma
@param force_all: if True all records will be taken
"""
found_recIDs = set()
write_message(CFG_BIBINDEX_UPDATE_MESSAGE)
for index in indexes:
found_recIDs |= get_recIDs_by_date_bibliographic(modified_dates, index, force_all)
found_recIDs |= get_recIDs_by_date_authority(modified_dates, index, force_all)
return list(sorted(found_recIDs))
def get_recIDs_from_cli(indexes=[]):
"""
Gets recIDs ranges from CLI for indexing when
user specified 'id' or 'collection' option or
search for modified recIDs for provided indexes
when recIDs are not specified.
@param indexes: it's a list of specified indexes, which
can be obtained from CLI with use of:
get_indexes_from_cli() function.
@type indexes: list of strings
"""
# need to first update idxINDEX table to find proper recIDs for reindexing
if task_get_option("reindex"):
for index_name in indexes:
run_sql("""UPDATE idxINDEX SET last_updated='0000-00-00 00:00:00'
WHERE name=%s""", (index_name,))
if task_get_option("id"):
return task_get_option("id")
elif task_get_option("collection"):
l_of_colls = task_get_option("collection").split(",")
recIDs = perform_request_search(c=l_of_colls)
recIDs_range = []
for recID in recIDs:
recIDs_range.append([recID, recID])
return recIDs_range
elif task_get_option("cmd") == "add":
recs = get_not_updated_recIDs(task_get_option("modified"),
indexes,
task_get_option("force"))
recIDs_range = beautify_range_list(create_range_list(recs))
return recIDs_range
return []
def get_indexes_from_cli():
"""
Gets indexes from CLI and checks if they are
valid. If indexes weren't specified function
will return all known indexes.
"""
indexes = task_get_option("windex")
if not indexes:
indexes = get_all_indexes()
else:
indexes = indexes.split(",")
indexes = remove_inexistent_indexes(indexes, leave_virtual=True)
return indexes
def remove_dependent_index(virtual_indexes, dependent_index):
"""
Removes dependent index from virtual indexes.
@param virtual_indexes: names of virtual_indexes
@type virtual_indexes: list of strings
@param dependent_index: name of dependent index
@type dependent_index: string
"""
if not virtual_indexes:
write_message("You should specify a name of a virtual index...")
id_dependent = get_index_id_from_index_name(dependent_index)
wordTables = get_word_tables(virtual_indexes)
for index_id, index_name, index_tags in wordTables:
wordTable = WordTable(index_name=index_name,
index_id=index_id,
fields_to_index=index_tags,
table_name_pattern='idxWORD%02dF',
wordtable_type=CFG_BIBINDEX_INDEX_TABLE_TYPE["Words"],
tag_to_tokenizer_map={'8564_u': "BibIndexEmptyTokenizer"},
wash_index_terms=50)
wordTable.remove_dependent_index(id_dependent)
wordTable.report_on_table_consistency()
task_sleep_now_if_required()
wordTable = WordTable(index_name=index_name,
index_id=index_id,
fields_to_index=index_tags,
table_name_pattern='idxPAIR%02dF',
wordtable_type=CFG_BIBINDEX_INDEX_TABLE_TYPE["Pairs"],
tag_to_tokenizer_map={'8564_u': "BibIndexEmptyTokenizer"},
wash_index_terms=50)
wordTable.remove_dependent_index(id_dependent)
wordTable.report_on_table_consistency()
task_sleep_now_if_required()
wordTable = WordTable(index_name=index_name,
index_id=index_id,
fields_to_index=index_tags,
table_name_pattern='idxPHRASE%02dF',
wordtable_type=CFG_BIBINDEX_INDEX_TABLE_TYPE["Words"],
tag_to_tokenizer_map={'8564_u': "BibIndexEmptyTokenizer"},
wash_index_terms=50)
wordTable.remove_dependent_index(id_dependent)
wordTable.report_on_table_consistency()
query = """DELETE FROM idxINDEX_idxINDEX WHERE id_virtual=%s AND id_normal=%s"""
run_sql(query, (index_id, id_dependent))
def task_run_core():
"""Runs the task by fetching arguments from the BibSched task queue.
This is what BibSched will be invoking via daemon call.
"""
global _last_word_table
indexes = get_indexes_from_cli()
if len(indexes) == 0:
write_message("Specified indexes can't be found.")
return True
# check tables consistency
if task_get_option("cmd") == "check":
wordTables = get_word_tables(indexes)
for index_id, index_name, index_tags in wordTables:
wordTable = WordTable(index_name=index_name,
index_id=index_id,
fields_to_index=index_tags,
table_name_pattern='idxWORD%02dF',
wordtable_type=CFG_BIBINDEX_INDEX_TABLE_TYPE["Words"],
tag_to_tokenizer_map={'8564_u': "BibIndexFulltextTokenizer"},
wash_index_terms=50)
_last_word_table = wordTable
wordTable.report_on_table_consistency()
task_sleep_now_if_required(can_stop_too=True)
wordTable = WordTable(index_name=index_name,
index_id=index_id,
fields_to_index=index_tags,
table_name_pattern='idxPAIR%02dF',
wordtable_type=CFG_BIBINDEX_INDEX_TABLE_TYPE["Pairs"],
tag_to_tokenizer_map={'8564_u': "BibIndexEmptyTokenizer"},
wash_index_terms=100)
_last_word_table = wordTable
wordTable.report_on_table_consistency()
task_sleep_now_if_required(can_stop_too=True)
wordTable = WordTable(index_name=index_name,
index_id=index_id,
fields_to_index=index_tags,
table_name_pattern='idxPHRASE%02dF',
wordtable_type=CFG_BIBINDEX_INDEX_TABLE_TYPE["Phrases"],
tag_to_tokenizer_map={'8564_u': "BibIndexEmptyTokenizer"},
wash_index_terms=0)
_last_word_table = wordTable
wordTable.report_on_table_consistency()
task_sleep_now_if_required(can_stop_too=True)
_last_word_table = None
return True
#virtual index: remove dependent index
if task_get_option("remove-dependent-index"):
remove_dependent_index(indexes,
task_get_option("remove-dependent-index"))
return True
#initialization for Words,Pairs,Phrases
recIDs_range = get_recIDs_from_cli(indexes)
recIDs_for_index = find_affected_records_for_index(indexes,
recIDs_range,
(task_get_option("force") or \
task_get_option("reindex") or \
task_get_option("cmd") == "del"))
wordTables = get_word_tables(recIDs_for_index.keys())
if not wordTables:
write_message("Selected indexes/recIDs are up to date.")
# Let's work on single words!
for index_id, index_name, index_tags in wordTables:
reindex_prefix = ""
if task_get_option("reindex"):
reindex_prefix = "tmp_"
init_temporary_reindex_tables(index_id, reindex_prefix)
wordTable = WordTable(index_name=index_name,
index_id=index_id,
fields_to_index=index_tags,
table_name_pattern=reindex_prefix + 'idxWORD%02dF',
wordtable_type=CFG_BIBINDEX_INDEX_TABLE_TYPE["Words"],
tag_to_tokenizer_map={'8564_u': "BibIndexFulltextTokenizer"},
wash_index_terms=50)
_last_word_table = wordTable
wordTable.report_on_table_consistency()
try:
if task_get_option("cmd") == "del":
if task_get_option("id") or task_get_option("collection"):
wordTable.del_recIDs(recIDs_range)
task_sleep_now_if_required(can_stop_too=True)
else:
error_message = "Missing IDs of records to delete from " \
"index %s." % wordTable.tablename
write_message(error_message, stream=sys.stderr)
raise StandardError(error_message)
elif task_get_option("cmd") == "add":
final_recIDs = beautify_range_list(create_range_list(recIDs_for_index[index_name]))
wordTable.add_recIDs(final_recIDs, task_get_option("flush"))
task_sleep_now_if_required(can_stop_too=True)
elif task_get_option("cmd") == "repair":
wordTable.repair(task_get_option("flush"))
task_sleep_now_if_required(can_stop_too=True)
else:
error_message = "Invalid command found processing %s" % \
wordTable.tablename
write_message(error_message, stream=sys.stderr)
raise StandardError(error_message)
except StandardError, e:
write_message("Exception caught: %s" % e, sys.stderr)
register_exception(alert_admin=True)
if _last_word_table:
_last_word_table.put_into_db()
raise
wordTable.report_on_table_consistency()
task_sleep_now_if_required(can_stop_too=True)
# Let's work on pairs now
wordTable = WordTable(index_name=index_name,
index_id=index_id,
fields_to_index=index_tags,
table_name_pattern=reindex_prefix + 'idxPAIR%02dF',
wordtable_type=CFG_BIBINDEX_INDEX_TABLE_TYPE["Pairs"],
tag_to_tokenizer_map={'8564_u': "BibIndexEmptyTokenizer"},
wash_index_terms=100)
_last_word_table = wordTable
wordTable.report_on_table_consistency()
try:
if task_get_option("cmd") == "del":
if task_get_option("id") or task_get_option("collection"):
wordTable.del_recIDs(recIDs_range)
task_sleep_now_if_required(can_stop_too=True)
else:
error_message = "Missing IDs of records to delete from " \
"index %s." % wordTable.tablename
write_message(error_message, stream=sys.stderr)
raise StandardError(error_message)
elif task_get_option("cmd") == "add":
final_recIDs = beautify_range_list(create_range_list(recIDs_for_index[index_name]))
wordTable.add_recIDs(final_recIDs, task_get_option("flush"))
task_sleep_now_if_required(can_stop_too=True)
elif task_get_option("cmd") == "repair":
wordTable.repair(task_get_option("flush"))
task_sleep_now_if_required(can_stop_too=True)
else:
error_message = "Invalid command found processing %s" % \
wordTable.tablename
write_message(error_message, stream=sys.stderr)
raise StandardError(error_message)
except StandardError, e:
write_message("Exception caught: %s" % e, sys.stderr)
register_exception()
if _last_word_table:
_last_word_table.put_into_db()
raise
wordTable.report_on_table_consistency()
task_sleep_now_if_required(can_stop_too=True)
# Let's work on phrases now
wordTable = WordTable(index_name=index_name,
index_id=index_id,
fields_to_index=index_tags,
table_name_pattern=reindex_prefix + 'idxPHRASE%02dF',
wordtable_type=CFG_BIBINDEX_INDEX_TABLE_TYPE["Phrases"],
tag_to_tokenizer_map={'8564_u': "BibIndexEmptyTokenizer"},
wash_index_terms=0)
_last_word_table = wordTable
wordTable.report_on_table_consistency()
try:
if task_get_option("cmd") == "del":
if task_get_option("id") or task_get_option("collection"):
wordTable.del_recIDs(recIDs_range)
task_sleep_now_if_required(can_stop_too=True)
else:
error_message = "Missing IDs of records to delete from " \
"index %s." % wordTable.tablename
write_message(error_message, stream=sys.stderr)
raise StandardError(error_message)
elif task_get_option("cmd") == "add":
final_recIDs = beautify_range_list(create_range_list(recIDs_for_index[index_name]))
wordTable.add_recIDs(final_recIDs, task_get_option("flush"))
if not task_get_option("id") and not task_get_option("collection"):
update_index_last_updated([index_name], task_get_task_param('task_starting_time'))
task_sleep_now_if_required(can_stop_too=True)
elif task_get_option("cmd") == "repair":
wordTable.repair(task_get_option("flush"))
task_sleep_now_if_required(can_stop_too=True)
else:
error_message = "Invalid command found processing %s" % \
wordTable.tablename
write_message(error_message, stream=sys.stderr)
raise StandardError(error_message)
except StandardError, e:
write_message("Exception caught: %s" % e, sys.stderr)
register_exception()
if _last_word_table:
_last_word_table.put_into_db()
raise
wordTable.report_on_table_consistency()
task_sleep_now_if_required(can_stop_too=True)
if task_get_option("reindex"):
swap_temporary_reindex_tables(index_id, reindex_prefix)
update_index_last_updated([index_name], task_get_task_param('task_starting_time'))
task_sleep_now_if_required(can_stop_too=True)
# update modification date also for indexes that were up to date
if not task_get_option("id") and not task_get_option("collection") and \
task_get_option("cmd") == "add":
up_to_date = set(indexes) - set(recIDs_for_index.keys())
update_index_last_updated(list(up_to_date), task_get_task_param('task_starting_time'))
_last_word_table = None
return True
### okay, here we go:
if __name__ == '__main__':
main()
diff --git a/invenio/legacy/bibrank/bridge_config.py b/invenio/legacy/bibrank/bridge_config.py
index 8311ccc91..082d8cf1f 100644
--- a/invenio/legacy/bibrank/bridge_config.py
+++ b/invenio/legacy/bibrank/bridge_config.py
@@ -1,72 +1,72 @@
## This file is part of Invenio.
## Copyright (C) 2010, 2011 CERN.
##
## Invenio is free software; you can redistribute it and/or
## modify it under the terms of the GNU General Public License as
## published by the Free Software Foundation; either version 2 of the
## License, or (at your option) any later version.
##
## Invenio is distributed in the hope that it will be useful, but
## WITHOUT ANY WARRANTY; without even the implied warranty of
## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
## General Public License for more details.
##
## You should have received a copy of the GNU General Public License
## along with Invenio; if not, write to the Free Software Foundation, Inc.,
## 59 Temple Place, Suite 330, Boston, MA 02111-1307, USA.
from invenio.config import CFG_ETCDIR
from invenio.modules.formatter.api import get_tag_from_name
-from invenio.errorlib import raise_exception
+from invenio.ext.logging import raise_exception
CFG_BIBRANK_WRD_CFG_PATH = '%s/bibrank/wrd.cfg' % CFG_ETCDIR
def alert_admin(name):
raise_exception(ValueError, 'No marc tag for %s defined' % name, alert_admin=True)
# abstract:
marc_tag_abstract = get_tag_from_name('abstract')
if marc_tag_abstract:
CFG_MARC_ABSTRACT = marc_tag_abstract
else:
CFG_MARC_ABSTRACT = '520__a'
alert_admin('abstract')
# author name:
marc_tag_author = get_tag_from_name('first author name')
if marc_tag_author:
CFG_MARC_AUTHOR_NAME = marc_tag_author
else:
CFG_MARC_AUTHOR_NAME = '100__a'
alert_admin('first author name')
# additional author name:
marc_tag_contributor_name = get_tag_from_name('additional author name')
if marc_tag_contributor_name:
CFG_MARC_ADDITIONAL_AUTHOR_NAME = marc_tag_contributor_name
else:
CFG_MARC_ADDITIONAL_AUTHOR_NAME = '700__a'
alert_admin('additional author name')
# keyword:
marc_tag_keyword = get_tag_from_name('keyword')
if marc_tag_keyword:
CFG_MARC_KEYWORD= marc_tag_keyword
else:
CFG_MARC_KEYWORD = '6531_a'
alert_admin('keyword')
# title:
marc_tag_title = get_tag_from_name('title')
if marc_tag_title:
CFG_MARC_TITLE = marc_tag_title
else:
CFG_MARC_TITLE = '245__a'
alert_admin('title')
diff --git a/invenio/legacy/bibrank/citation_indexer.py b/invenio/legacy/bibrank/citation_indexer.py
index ae80931f9..30b52974f 100644
--- a/invenio/legacy/bibrank/citation_indexer.py
+++ b/invenio/legacy/bibrank/citation_indexer.py
@@ -1,1017 +1,1017 @@
# -*- coding: utf-8 -*-
##
## This file is part of Invenio.
## Copyright (C) 2005, 2006, 2007, 2008, 2009, 2010, 2011, 2012, 2013 CERN.
##
## Invenio is free software; you can redistribute it and/or
## modify it under the terms of the GNU General Public License as
## published by the Free Software Foundation; either version 2 of the
## License, or (at your option) any later version.
##
## Invenio is distributed in the hope that it will be useful, but
## WITHOUT ANY WARRANTY; without even the implied warranty of
## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
## General Public License for more details.
##
## You should have received a copy of the GNU General Public License
## along with Invenio; if not, write to the Free Software Foundation, Inc.,
## 59 Temple Place, Suite 330, Boston, MA 02111-1307, USA.
__revision__ = "$Id$"
import re
import time
import os
import sys
import ConfigParser
from itertools import islice
from datetime import datetime
from invenio.dbquery import run_sql, serialize_via_marshal, \
deserialize_via_marshal
from invenio.bibindex_tokenizers.BibIndexJournalTokenizer import \
CFG_JOURNAL_PUBINFO_STANDARD_FORM, \
CFG_JOURNAL_PUBINFO_STANDARD_FORM_REGEXP_CHECK
from invenio.search_engine import search_pattern, search_unit
from invenio.search_engine_utils import get_fieldvalues
from invenio.modules.formatter.utils import parse_tag
from invenio.bibknowledge import get_kb_mappings
from invenio.bibtask import write_message, task_get_option, \
task_update_progress, task_sleep_now_if_required, \
task_get_task_param
-from invenio.errorlib import register_exception
-from invenio.bibindex_engine_utils import get_field_tags
+from invenio.ext.logging import register_exception
+from invenio.bibindex_engine import get_field_tags
INTBITSET_OF_DELETED_RECORDS = search_unit(p='DELETED', f='980', m='a')
re_CFG_JOURNAL_PUBINFO_STANDARD_FORM_REGEXP_CHECK = re.compile(CFG_JOURNAL_PUBINFO_STANDARD_FORM_REGEXP_CHECK)
def get_recids_matching_query(p, f, m='e'):
"""Return set of recIDs matching query for pattern p in field f."""
return search_pattern(p=p, f=f, m=m) - INTBITSET_OF_DELETED_RECORDS
def get_citation_weight(rank_method_code, config, chunk_size=20000):
"""return a dictionary which is used by bibrank daemon for generating
the index of sorted research results by citation information
"""
begin_time = time.time()
quick = task_get_option("quick") != "no"
# id option forces re-indexing a certain range
# even if there are no new recs
if task_get_option("id"):
# construct a range of records to index
updated_recids = []
for first, last in task_get_option("id"):
updated_recids += range(first, last+1)
if len(updated_recids) > 10000:
str_updated_recids = str(updated_recids[:10]) + ' ... ' + str(updated_recids[-10:])
else:
str_updated_recids = str(updated_recids)
write_message('Records to process: %s' % str_updated_recids)
index_update_time = None
else:
bibrank_update_time = get_bibrankmethod_lastupdate(rank_method_code)
if not quick:
bibrank_update_time = "0000-00-00 00:00:00"
write_message("bibrank: %s" % bibrank_update_time)
index_update_time = get_bibindex_update_time()
write_message("bibindex: %s" % index_update_time)
if index_update_time > datetime.now().strftime("%Y-%m-%d %H:%M:%S"):
index_update_time = "0000-00-00 00:00:00"
updated_recids = get_modified_recs(bibrank_update_time,
index_update_time)
if len(updated_recids) > 10000:
str_updated_recids = str(updated_recids[:10]) + ' ... ' + str(updated_recids[-10:])
else:
str_updated_recids = str(updated_recids)
write_message("%s records to update" % str_updated_recids)
if updated_recids:
# result_intermediate should be warranted to exists!
# but if the user entered a "-R" (do all) option, we need to
# make an empty start set
if quick:
dicts = {
'cites_weight': last_updated_result(rank_method_code),
'cites': get_cit_dict("citationdict"),
'refs': get_cit_dict("reversedict"),
'selfcites': get_cit_dict("selfcitdict"),
'selfrefs': get_cit_dict("selfcitedbydict"),
'authorcites': get_initial_author_dict(),
}
else:
dicts = {
'cites_weight': {},
'cites': {},
'refs': {},
'selfcites': {},
'selfrefs': {},
'authorcites': {},
}
# Process fully the updated records
process_and_store(updated_recids, config, dicts, chunk_size, quick)
end_time = time.time()
write_message("Total time of get_citation_weight(): %.2f sec" % \
(end_time - begin_time))
task_update_progress("citation analysis done")
cites_weight = dicts['cites_weight']
else:
cites_weight = {}
write_message("No new records added since last time this " \
"rank method was executed")
return cites_weight, index_update_time
def process_and_store(recids, config, dicts, chunk_size, quick):
# Process recent records first
# The older records were most likely added by the above steps
# to be reprocessed so they only have minor changes
recids_iter = iter(sorted(recids, reverse=True))
# Split records to process into chunks so that we do not
# fill up too much memory
while True:
task_sleep_now_if_required()
chunk = list(islice(recids_iter, chunk_size))
if not chunk:
if not quick:
store_dicts(dicts)
break
write_message("Processing chunk #%s to #%s" % (chunk[0], chunk[-1]))
# dicts are modified in-place
process_chunk(chunk, config, dicts)
if quick:
# Store partial result as it is just an update and not
# a creation from scratch
store_dicts(dicts)
def process_chunk(recids, config, dicts):
cites_weight = dicts['cites_weight']
cites = dicts['cites']
refs = dicts['refs']
old_refs = {}
for recid in recids:
old_refs[recid] = set(refs.get(recid, []))
old_cites = {}
for recid in recids:
old_cites[recid] = set(cites.get(recid, []))
process_inner(recids, config, dicts)
# Records cited by updated_recid_list
# They can only loose references as added references
# are already added to the dicts at this point
for somerecid in recids:
for recid in set(old_cites[somerecid]) - set(cites.get(somerecid, [])):
refs[recid] = list(set(refs.get(recid, [])) - set([somerecid]))
if not refs[recid]:
del refs[recid]
# Records referenced by updated_recid_list
# They can only loose citations as added citations
# are already added to the dicts at this point
for somerecid in recids:
for recid in set(old_refs[somerecid]) - set(refs.get(somerecid, [])):
cites[recid] = list(set(cites.get(recid, [])) - set([somerecid]))
cites_weight[recid] = len(cites[recid])
if not cites[recid]:
del cites[recid]
del cites_weight[recid]
def process_inner(recids, config, dicts, do_catchup=True):
tags = get_tags_config(config)
# call the procedure that does the hard work by reading fields of
# citations and references in the updated_recid's (but nothing else)!
write_message("Entering get_citation_informations", verbose=9)
citation_informations = get_citation_informations(recids, tags,
fetch_catchup_info=do_catchup)
write_message("Entering ref_analyzer", verbose=9)
# call the analyser that uses the citation_informations to really
# search x-cites-y in the coll..
return ref_analyzer(citation_informations,
dicts,
recids,
tags,
do_catchup=do_catchup)
def get_bibrankmethod_lastupdate(rank_method_code):
"""return the last excution date of bibrank method
"""
query = """SELECT DATE_FORMAT(last_updated, '%%Y-%%m-%%d %%H:%%i:%%s')
FROM rnkMETHOD WHERE name =%s"""
last_update_time = run_sql(query, [rank_method_code])
try:
r = last_update_time[0][0]
except IndexError:
r = "0000-00-00 00:00:00"
return r
def get_bibindex_update_time():
try:
# check indexing times of `journal' and `reportnumber`
# indexes, and only fetch records which have been indexed
sql = "SELECT DATE_FORMAT(MIN(last_updated), " \
"'%%Y-%%m-%%d %%H:%%i:%%s') FROM idxINDEX WHERE name IN (%s,%s)"
index_update_time = run_sql(sql, ('journal', 'reportnumber'), 1)[0][0]
except IndexError:
write_message("Not running citation indexer since journal/reportnumber"
" indexes are not created yet.")
index_update_time = "0000-00-00 00:00:00"
return index_update_time
def get_modified_recs(bibrank_method_lastupdate, indexes_lastupdate):
"""Get records to be updated by bibrank indexing
Return the list of records which have been modified between the last
execution of bibrank method and the latest journal/report index updates.
The result is expected to have ascending id order.
"""
query = """SELECT id FROM bibrec
WHERE modification_date >= %s
AND modification_date < %s
ORDER BY id ASC"""
records = run_sql(query, (bibrank_method_lastupdate, indexes_lastupdate))
return [r[0] for r in records]
def last_updated_result(rank_method_code):
""" return the last value of dictionary in rnkMETHODDATA table if it
exists and initialize the value of last updated records by zero,
otherwise an initial dictionary with zero as value for all recids
"""
query = """SELECT relevance_data FROM rnkMETHOD, rnkMETHODDATA WHERE
rnkMETHOD.id = rnkMETHODDATA.id_rnkMETHOD
AND rnkMETHOD.Name = '%s'""" % rank_method_code
try:
rdict = run_sql(query)[0][0]
except IndexError:
dic = {}
else:
dic = deserialize_via_marshal(rdict)
return dic
def format_journal(format_string, mappings):
"""format the publ infostring according to the format"""
def replace(char, data):
return data.get(char, char)
return ''.join(replace(c, mappings) for c in format_string)
def get_tags_config(config):
"""Fetch needs config from our config file"""
# Probably "citation" unless this file gets renamed
function = config.get("rank_method", "function")
write_message("config function %s" % function, verbose=9)
tags = {}
# 037a: contains (often) the "hep-ph/0501084" tag of THIS record
try:
tag = config.get(function, "primary_report_number")
except ConfigParser.NoOptionError:
tags['record_pri_number'] = None
else:
tags['record_pri_number'] = tagify(parse_tag(tag))
# 088a: additional short identifier for the record
try:
tag = config.get(function, "additional_report_number")
except ConfigParser.NoOptionError:
tags['record_add_number'] = None
else:
tags['record_add_number'] = tagify(parse_tag(tag))
# 999C5r. this is in the reference list, refers to other records.
# Looks like: hep-ph/0408002
try:
tag = config.get(function, "reference_via_report_number")
except ConfigParser.NoOptionError:
tags['refs_report_number'] = None
else:
tags['refs_report_number'] = tagify(parse_tag(tag))
# 999C5s. this is in the reference list, refers to other records.
# Looks like: Phys.Rev.,A21,78
try:
tag = config.get(function, "reference_via_pubinfo")
except ConfigParser.NoOptionError:
tags['refs_journal'] = None
else:
tags['refs_journal'] = tagify(parse_tag(tag))
# 999C5a. this is in the reference list, refers to other records.
# Looks like: 10.1007/BF03170733
try:
tag = config.get(function, "reference_via_doi")
except ConfigParser.NoOptionError:
tags['refs_doi'] = None
else:
tags['refs_doi'] = tagify(parse_tag(tag))
# Fields needed to construct the journals for this record
try:
tag = {
'pages': config.get(function, "pubinfo_journal_page"),
'year': config.get(function, "pubinfo_journal_year"),
'journal': config.get(function, "pubinfo_journal_title"),
'volume': config.get(function, "pubinfo_journal_volume"),
}
except ConfigParser.NoOptionError:
tags['publication'] = None
else:
tags['publication'] = {
'pages': tagify(parse_tag(tag['pages'])),
'year': tagify(parse_tag(tag['year'])),
'journal': tagify(parse_tag(tag['journal'])),
'volume': tagify(parse_tag(tag['volume'])),
}
# Fields needed to lookup the DOIs
tags['doi'] = get_field_tags('doi')
# 999C5s. A standardized way of writing a reference in the reference list.
# Like: Nucl. Phys. B 710 (2000) 371
try:
tags['publication_format'] = config.get(function,
"pubinfo_journal_format")
except ConfigParser.NoOptionError:
tags['publication_format'] = CFG_JOURNAL_PUBINFO_STANDARD_FORM
# Print values of tags for debugging
write_message("tag values: %r" % [tags], verbose=9)
return tags
def get_journal_info(recid, tags):
record_info = []
# TODO: handle recors with multiple journals
tagsvalues = {} # we store the tags and their values here
# like c->444 y->1999 p->"journal of foo",
# v->20
tmp = get_fieldvalues(recid, tags['publication']['journal'])
if tmp:
tagsvalues["p"] = tmp[0]
tmp = get_fieldvalues(recid, tags['publication']['volume'])
if tmp:
tagsvalues["v"] = tmp[0]
tmp = get_fieldvalues(recid, tags['publication']['year'])
if tmp:
tagsvalues["y"] = tmp[0]
tmp = get_fieldvalues(recid, tags['publication']['pages'])
if tmp:
# if the page numbers have "x-y" take just x
pages = tmp[0]
hpos = pages.find("-")
if hpos > 0:
pages = pages[:hpos]
tagsvalues["c"] = pages
# check if we have the required data
ok = True
for c in tags['publication_format']:
if c in ('p', 'v', 'y', 'c'):
if c not in tagsvalues:
ok = False
if ok:
publ = format_journal(tags['publication_format'], tagsvalues)
record_info += [publ]
alt_volume = get_alt_volume(tagsvalues['v'])
if alt_volume:
tagsvalues2 = tagsvalues.copy()
tagsvalues2['v'] = alt_volume
publ = format_journal(tags['publication_format'], tagsvalues2)
record_info += [publ]
# Add codens
for coden in get_kb_mappings('CODENS',
value=tagsvalues['p']):
tagsvalues2 = tagsvalues.copy()
tagsvalues2['p'] = coden['key']
publ = format_journal(tags['publication_format'], tagsvalues2)
record_info += [publ]
return record_info
def get_alt_volume(volume):
alt_volume = None
if re.match(ur'[a-zA-Z]\d+', volume, re.U|re.I):
alt_volume = volume[1:] + volume[0]
elif re.match(ur'\d+[a-zA-Z]', volume, re.U|re.I):
alt_volume = volume[-1] + volume[:-1]
return alt_volume
def get_citation_informations(recid_list, tags, fetch_catchup_info=True):
"""scans the collections searching references (999C5x -fields) and
citations for items in the recid_list
returns a 4 list of dictionaries that contains the citation information
of cds records
examples: [ {} {} {} {} ]
[ {5: 'SUT-DP-92-70-5'},
{ 93: ['astro-ph/9812088']},
{ 93: ['Phys. Rev. Lett. 96 (2006) 081301'] }, {} ]
NB: stuff here is for analysing new or changed records.
see "ref_analyzer" for more.
"""
begin_time = os.times()[4]
records_info = {
'report-numbers': {},
'journals': {},
'doi': {},
}
references_info = {
'report-numbers': {},
'journals': {},
'doi': {},
}
# perform quick check to see if there are some records with
# reference tags, because otherwise get.cit.inf would be slow even
# if there is nothing to index:
if run_sql("SELECT value FROM bib%sx WHERE tag=%%s LIMIT 1" % tags['refs_journal'][0:2],
(tags['refs_journal'], )) or \
run_sql("SELECT value FROM bib%sx WHERE tag=%%s LIMIT 1" % tags['refs_report_number'][0:2],
(tags['refs_report_number'], )):
done = 0 # for status reporting
for recid in recid_list:
if done % 10 == 0:
task_sleep_now_if_required()
# in fact we can sleep any time here
if done % 1000 == 0:
mesg = "get cit.inf done %s of %s" % (done, len(recid_list))
write_message(mesg)
task_update_progress(mesg)
done += 1
if recid in INTBITSET_OF_DELETED_RECORDS:
# do not treat this record since it was deleted; we
# skip it like this in case it was only soft-deleted
# e.g. via bibedit (i.e. when collection tag 980 is
# DELETED but other tags like report number or journal
# publication info remained the same, so the calls to
# get_fieldvalues() below would return old values)
continue
if tags['refs_report_number']:
references_info['report-numbers'][recid] \
= get_fieldvalues(recid,
tags['refs_report_number'],
sort=False)
msg = "references_info['report-numbers'][%s] = %r" \
% (recid, references_info['report-numbers'][recid])
write_message(msg, verbose=9)
if tags['refs_journal']:
references_info['journals'][recid] = []
for ref in get_fieldvalues(recid,
tags['refs_journal'],
sort=False):
try:
# Inspire specific parsing
journal, volume, page = ref.split(',')
except ValueError:
pass
else:
alt_volume = get_alt_volume(volume)
if alt_volume:
alt_ref = ','.join([journal, alt_volume, page])
references_info['journals'][recid] += [alt_ref]
references_info['journals'][recid] += [ref]
msg = "references_info['journals'][%s] = %r" \
% (recid, references_info['journals'][recid])
write_message(msg, verbose=9)
if tags['refs_doi']:
references_info['doi'][recid] \
= get_fieldvalues(recid, tags['refs_doi'], sort=False)
msg = "references_info['doi'][%s] = %r" \
% (recid, references_info['doi'][recid])
write_message(msg, verbose=9)
if not fetch_catchup_info:
# We do not need the extra info
continue
if tags['record_pri_number'] or tags['record_add_number']:
records_info['report-numbers'][recid] = []
if tags['record_pri_number']:
records_info['report-numbers'][recid] \
+= get_fieldvalues(recid,
tags['record_pri_number'],
sort=False)
if tags['record_add_number']:
records_info['report-numbers'][recid] \
+= get_fieldvalues(recid,
tags['record_add_number'],
sort=False)
msg = "records_info[%s]['report-numbers'] = %r" \
% (recid, records_info['report-numbers'][recid])
write_message(msg, verbose=9)
if tags['doi']:
records_info['doi'][recid] = []
for tag in tags['doi']:
records_info['doi'][recid] += get_fieldvalues(recid,
tag,
sort=False)
msg = "records_info[%s]['doi'] = %r" \
% (recid, records_info['doi'][recid])
write_message(msg, verbose=9)
# get a combination of
# journal vol (year) pages
if tags['publication']:
records_info['journals'][recid] = get_journal_info(recid, tags)
msg = "records_info[%s]['journals'] = %r" \
% (recid, records_info['journals'][recid])
write_message(msg, verbose=9)
else:
mesg = "Warning: there are no records with tag values for " \
"%s or %s. Nothing to do." % \
(tags['refs_journal'], tags['refs_report_number'])
write_message(mesg)
mesg = "get cit.inf done fully"
write_message(mesg)
task_update_progress(mesg)
end_time = os.times()[4]
write_message("Execution time for generating citation info "
"from record: %.2f sec" % (end_time - begin_time))
return records_info, references_info
def standardize_report_number(report_number):
# Remove category for arxiv papers
report_number = re.sub(ur'(?:arXiv:)?(\d{4}\.\d{4}) \[[a-zA-Z\.-]+\]',
ur'arXiv:\g<1>',
report_number,
re.I | re.U)
return report_number
def ref_analyzer(citation_informations, dicts,
updated_recids, tags, do_catchup=True):
"""Analyze the citation informations and calculate the citation weight
and cited by list dictionary.
"""
citations_weight = dicts['cites_weight']
citations = dicts['cites']
references = dicts['refs']
selfcites = dicts['selfcites']
selfrefs = dicts['selfrefs']
authorcites = dicts['authorcites']
def step(msg_prefix, recid, done, total):
if done % 30 == 0:
task_sleep_now_if_required()
if done % 1000 == 0:
mesg = "%s done %s of %s" % (msg_prefix, done, total)
write_message(mesg)
task_update_progress(mesg)
write_message("Processing: %s" % recid, verbose=9)
def add_to_dicts(citer, cited):
# Make sure we don't add ourselves
# Workaround till we know why we are adding ourselves.
if citer == cited:
return
if cited not in citations_weight:
citations_weight[cited] = 0
# Citations and citations weight
if citer not in citations.setdefault(cited, []):
citations[cited].append(citer)
citations_weight[cited] += 1
# References
if cited not in references.setdefault(citer, []):
references[citer].append(cited)
# dict of recid -> institute_give_publ_id
records_info, references_info = citation_informations
t1 = os.times()[4]
write_message("Phase 0: temporarily remove changed records from " \
"citation dictionaries; they will be filled later")
if do_catchup:
for somerecid in updated_recids:
try:
del citations[somerecid]
except KeyError:
pass
for somerecid in updated_recids:
try:
del references[somerecid]
except KeyError:
pass
# Try to find references based on 999C5r
# e.g 8 -> ([astro-ph/9889],[hep-ph/768])
# meaning: rec 8 contains these in bibliography
write_message("Phase 1: Report numbers references")
done = 0
for thisrecid, refnumbers in references_info['report-numbers'].iteritems():
step("Report numbers references", thisrecid, done,
len(references_info['report-numbers']))
done += 1
for refnumber in (r for r in refnumbers if r):
field = 'reportnumber'
refnumber = standardize_report_number(refnumber)
# Search for "hep-th/5644654 or such" in existing records
recids = get_recids_matching_query(p=refnumber, f=field)
write_message("These match searching %s in %s: %s" % \
(refnumber, field, list(recids)), verbose=9)
if not recids:
insert_into_missing(thisrecid, refnumber)
else:
remove_from_missing(refnumber)
if len(recids) > 1:
store_citation_warning('multiple-matches', refnumber)
msg = "Whoops: record '%d' report number value '%s' " \
"matches many records; taking only the first one. %s" % \
(thisrecid, refnumber, repr(recids))
write_message(msg, stream=sys.stderr)
for recid in list(recids)[:1]: # take only the first one
add_to_dicts(thisrecid, recid)
mesg = "done fully"
write_message(mesg)
task_update_progress(mesg)
t2 = os.times()[4]
# Try to find references based on 999C5s
# e.g. Phys.Rev.Lett. 53 (1986) 2285
write_message("Phase 2: Journal references")
done = 0
for thisrecid, refs in references_info['journals'].iteritems():
step("Journal references", thisrecid, done,
len(references_info['journals']))
done += 1
for reference in (r for r in refs if r):
p = reference
field = 'journal'
# check reference value to see whether it is well formed:
if not re_CFG_JOURNAL_PUBINFO_STANDARD_FORM_REGEXP_CHECK.match(p):
store_citation_warning('not-well-formed', p)
msg = "Whoops, record '%d' reference value '%s' " \
"is not well formed; skipping it." % (thisrecid, p)
write_message(msg, stream=sys.stderr)
continue # skip this ill-formed value
recids = search_unit(p, field) - INTBITSET_OF_DELETED_RECORDS
write_message("These match searching %s in %s: %s" \
% (reference, field, list(recids)), verbose=9)
if not recids:
insert_into_missing(thisrecid, p)
else:
remove_from_missing(p)
if len(recids) > 1:
store_citation_warning('multiple-matches', p)
msg = "Whoops: record '%d' reference value '%s' " \
"matches many records; taking only the first one. %s" % \
(thisrecid, p, repr(recids))
write_message(msg, stream=sys.stderr)
for recid in list(recids)[:1]: # take only the first one
add_to_dicts(thisrecid, recid)
mesg = "done fully"
write_message(mesg)
task_update_progress(mesg)
t3 = os.times()[4]
# Try to find references based on 999C5a
# e.g. 10.1007/BF03170733
write_message("Phase 3: DOI references")
done = 0
for thisrecid, refs in references_info['doi'].iteritems():
step("DOI references", thisrecid, done, len(references_info['doi']))
done += 1
for reference in (r for r in refs if r):
p = reference
field = 'doi'
recids = get_recids_matching_query(p, field)
write_message("These match searching %s in %s: %s" \
% (reference, field, list(recids)), verbose=9)
if not recids:
insert_into_missing(thisrecid, p)
else:
remove_from_missing(p)
if len(recids) > 1:
store_citation_warning('multiple-matches', p)
msg = "Whoops: record '%d' DOI value '%s' " \
"matches many records; taking only the first one. %s" % \
(thisrecid, p, repr(recids))
write_message(msg, stream=sys.stderr)
for recid in list(recids)[:1]: # take only the first one
add_to_dicts(thisrecid, recid)
mesg = "done fully"
write_message(mesg)
task_update_progress(mesg)
t4 = os.times()[4]
# Search for stuff like CERN-TH-4859/87 in list of refs
write_message("Phase 4: report numbers catchup")
done = 0
for thisrecid, reportcodes in records_info['report-numbers'].iteritems():
step("Report numbers catchup", thisrecid, done,
len(records_info['report-numbers']))
done += 1
for reportcode in (r for r in reportcodes if r):
if reportcode.startswith('arXiv'):
std_reportcode = standardize_report_number(reportcode)
report_pattern = r'^%s( *\[[a-zA-Z.-]*\])?' % \
re.escape(std_reportcode)
recids = get_recids_matching_query(report_pattern,
tags['refs_report_number'],
'r')
else:
recids = get_recids_matching_query(reportcode,
tags['refs_report_number'],
'e')
for recid in recids:
add_to_dicts(recid, thisrecid)
mesg = "done fully"
write_message(mesg)
task_update_progress(mesg)
# Find this record's pubinfo in other records' bibliography
write_message("Phase 5: journals catchup")
done = 0
t5 = os.times()[4]
for thisrecid, rec_journals in records_info['journals'].iteritems():
step("Journals catchup", thisrecid, done,
len(records_info['journals']))
done += 1
for journal in rec_journals:
journal = journal.replace("\"", "")
# Search the publication string like
# Phys. Lett., B 482 (2000) 417 in 999C5s
recids = search_unit(p=journal, f=tags['refs_journal'], m='a') \
- INTBITSET_OF_DELETED_RECORDS
write_message("These records match %s in %s: %s" \
% (journal, tags['refs_journal'], list(recids)), verbose=9)
for recid in recids:
add_to_dicts(recid, thisrecid)
mesg = "done fully"
write_message(mesg)
task_update_progress(mesg)
write_message("Phase 6: DOI catchup")
done = 0
t6 = os.times()[4]
for thisrecid, dois in records_info['doi'].iteritems():
step("DOI catchup", thisrecid, done, len(records_info['doi']))
done += 1
for doi in dois:
# Search the publication string like
# Phys. Lett., B 482 (2000) 417 in 999C5a
recids = search_unit(p=doi, f=tags['refs_doi'], m='a') \
- INTBITSET_OF_DELETED_RECORDS
write_message("These records match %s in %s: %s" \
% (doi, tags['refs_doi'], list(recids)), verbose=9)
for recid in recids:
add_to_dicts(recid, thisrecid)
mesg = "done fully"
write_message(mesg)
task_update_progress(mesg)
write_message("Phase 7: remove empty lists from dicts")
# Remove empty lists in citation and reference
keys = citations.keys()
for k in keys:
if not citations[k]:
del citations[k]
keys = references.keys()
for k in keys:
if not references[k]:
del references[k]
if task_get_task_param('verbose') >= 3:
# Print only X first to prevent flood
write_message("citation_list (x is cited by y):")
write_message(dict(islice(citations.iteritems(), 10)))
write_message("size: %s" % len(citations))
write_message("reference_list (x cites y):")
write_message(dict(islice(references.iteritems(), 10)))
write_message("size: %s" % len(references))
write_message("selfcitedbydic (x is cited by y and one of the " \
"authors of x same as y's):")
write_message(dict(islice(selfcites.iteritems(), 10)))
write_message("size: %s" % len(selfcites))
write_message("selfdic (x cites y and one of the authors of x " \
"same as y's):")
write_message(dict(islice(selfrefs.iteritems(), 10)))
write_message("size: %s" % len(selfrefs))
write_message("authorcitdic (author is cited in recs):")
write_message(dict(islice(authorcites.iteritems(), 10)))
write_message("size: %s" % len(authorcites))
t7 = os.times()[4]
write_message("Execution time for analyzing the citation information " \
"generating the dictionary:")
write_message("... checking ref report numbers: %.2f sec" % (t2-t1))
write_message("... checking ref journals: %.2f sec" % (t3-t2))
write_message("... checking ref DOI: %.2f sec" % (t4-t3))
write_message("... checking rec report numbers: %.2f sec" % (t5-t4))
write_message("... checking rec journals: %.2f sec" % (t6-t5))
write_message("... checking rec DOI: %.2f sec" % (t7-t6))
write_message("... total time of ref_analyze: %.2f sec" % (t7-t1))
return citations_weight, citations, references, selfcites, \
selfrefs, authorcites
def store_dicts(dicts):
"""Insert the reference and citation list into the database"""
insert_into_cit_db(dicts['refs'], "reversedict")
insert_into_cit_db(dicts['cites'], "citationdict")
insert_into_cit_db(dicts['selfcites'], "selfcitedbydict")
insert_into_cit_db(dicts['selfrefs'], "selfcitdict")
def insert_into_cit_db(dic, name):
"""Stores citation dictionary in the database"""
ndate = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime())
s = serialize_via_marshal(dic)
write_message("size of %s %s" % (name, len(s)))
# check that this column really exists
run_sql("""REPLACE INTO rnkCITATIONDATA(object_name, object_value,
last_updated) VALUES (%s, %s, %s)""", (name, s, ndate))
def get_cit_dict(name):
"""get a named citation dict from the db"""
cdict = run_sql("""SELECT object_value FROM rnkCITATIONDATA
WHERE object_name = %s""", (name, ))
if cdict and cdict[0] and cdict[0][0]:
dict_from_db = deserialize_via_marshal(cdict[0][0])
else:
dict_from_db = {}
return dict_from_db
def get_initial_author_dict():
"""read author->citedinlist dict from the db"""
adict = {}
try:
ah = run_sql("SELECT aterm,hitlist FROM rnkAUTHORDATA")
for (a, h) in ah:
adict[a] = deserialize_via_marshal(h)
return adict
except:
register_exception(prefix="could not read rnkAUTHORDATA",
alert_admin=True)
return {}
def insert_into_missing(recid, report):
"""put the referingrecordnum-publicationstring into
the "we are missing these" table"""
if len(report) >= 255:
# Invalid report, it is too long
# and does not fit in the database column
# (currently varchar 255)
return
wasalready = run_sql("""SELECT id_bibrec
FROM rnkCITATIONDATAEXT
WHERE id_bibrec = %s
AND extcitepubinfo = %s""",
(recid, report))
if not wasalready:
run_sql("""INSERT INTO rnkCITATIONDATAEXT(id_bibrec, extcitepubinfo)
VALUES (%s,%s)""", (recid, report))
def remove_from_missing(report):
"""remove the recid-ref -pairs from the "missing" table for report x: prob
in the case ref got in our library collection"""
run_sql("""DELETE FROM rnkCITATIONDATAEXT
WHERE extcitepubinfo = %s""", (report,))
def create_analysis_tables():
"""temporary simple table + index"""
sql1 = "CREATE TABLE IF NOT EXISTS tmpcit (citer mediumint(10), \
cited mediumint(10)) TYPE=MyISAM"
sql2 = "CREATE UNIQUE INDEX citercited ON tmpcit(citer, cited)"
sql3 = "CREATE INDEX citer ON tmpcit(citer)"
sql4 = "CREATE INDEX cited ON tmpcit(cited)"
run_sql(sql1)
run_sql(sql2)
run_sql(sql3)
run_sql(sql4)
def write_citer_cited(citer, cited):
"""write an entry to tmp table"""
run_sql("INSERT INTO tmpcit(citer, cited) VALUES (%s,%s)", (citer, cited))
def print_missing(num):
"""
Print the contents of rnkCITATIONDATAEXT table containing external
records that were cited by NUM or more internal records.
NUM is by default taken from the -E command line option.
"""
if not num:
num = task_get_option("print-extcites")
write_message("Listing external papers cited by %i or more \
internal records:" % num)
res = run_sql("""SELECT COUNT(id_bibrec), extcitepubinfo
FROM rnkCITATIONDATAEXT
GROUP BY extcitepubinfo HAVING COUNT(id_bibrec) >= %s
ORDER BY COUNT(id_bibrec) DESC""", (num,))
for (cnt, brec) in res:
print str(cnt)+"\t"+brec
write_message("Listing done.")
def tagify(parsedtag):
"""aux auf to make '100__a' out of ['100','','','a']"""
tag = ""
for t in parsedtag:
if t == '':
t = '_'
tag += t
return tag
def store_citation_warning(warning_type, cit_info):
r = run_sql("""SELECT 1 FROM rnkCITATIONDATAERR
WHERE type = %s
AND citinfo = %s""", (warning_type, cit_info))
if not r:
run_sql("""INSERT INTO rnkCITATIONDATAERR (type, citinfo)
VALUES (%s, %s)""", (warning_type, cit_info))
diff --git a/invenio/legacy/bibrank/cli.py b/invenio/legacy/bibrank/cli.py
index 39cdc1e23..c85f1e698 100644
--- a/invenio/legacy/bibrank/cli.py
+++ b/invenio/legacy/bibrank/cli.py
@@ -1,295 +1,295 @@
## -*- mode: python; coding: utf-8; -*-
##
## This file is part of Invenio.
## Copyright (C) 2007, 2008, 2010, 2011 CERN.
##
## Invenio is free software; you can redistribute it and/or
## modify it under the terms of the GNU General Public License as
## published by the Free Software Foundation; either version 2 of the
## License, or (at your option) any later version.
##
## Invenio is distributed in the hope that it will be useful, but
## WITHOUT ANY WARRANTY; without even the implied warranty of
## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
## General Public License for more details.
##
## You should have received a copy of the GNU General Public License
## along with Invenio; if not, write to the Free Software Foundation, Inc.,
## 59 Temple Place, Suite 330, Boston, MA 02111-1307, USA.
"""
BibRank ranking daemon.
Usage: bibrank [options]
Ranking examples:
bibrank -wjif -a --id=0-30000,30001-860000 --verbose=9
bibrank -wjif -d --modified='2002-10-27 13:57:26'
bibrank -wwrd --rebalance --collection=Articles
bibrank -wwrd -a -i 234-250,293,300-500 -u admin
Ranking options:
-w, --run=r1[,r2] runs each rank method in the order given
-c, --collection=c1[,c2] select according to collection
-i, --id=low[-high] select according to doc recID
-m, --modified=from[,to] select according to modification date
-l, --lastupdate select according to last update
-a, --add add or update words for selected records
-d, --del delete words for selected records
-S, --stat show statistics for a method
-R, --recalculate recalculate weigth data, used by word frequency
and citation methods, should be used if ca 1%
of the document has been changed since last
time -R was used
-E, --extcites=NUM print the top entries of the external cites table.
These are entries that should be entered in
your collection, since they have been cited
by NUM or more other records present in the
system. Useful for cataloguers to input
external papers manually.
Repairing options:
-k, --check check consistency for all records in the table(s)
check if update of ranking data is necessary
-r, --repair try to repair all records in the table(s)
Scheduling options:
-u, --user=USER user name to store task, password needed
-s, --sleeptime=SLEEP time after which to repeat tasks (no)
e.g.: 1s, 30m, 24h, 7d
-t, --time=TIME moment for the task to be active (now)
e.g.: +15s, 5m, 3h , 2002-10-27 13:57:26
General options:
-h, --help print this help and exit
-V, --version print version and exit
-v, --verbose=LEVEL verbose level (from 0 to 9, default 1)
"""
__revision__ = "$Id$"
import sys
import traceback
import ConfigParser
from invenio.config import CFG_ETCDIR
from invenio.dbquery import run_sql
-from invenio.errorlib import register_exception
+from invenio.ext.logging import register_exception
from invenio.bibtask import task_init, write_message, task_get_option, \
task_set_option, get_datetime, task_update_status, \
task_sleep_now_if_required
# pylint: disable=W0611
# Disabling unused import pylint check, since these are needed to get
# imported here, and are called later dynamically.
from invenio.bibrank_tag_based_indexer import \
single_tag_rank_method, \
citation, \
download_weight_filtering_user, \
download_weight_total, \
file_similarity_by_times_downloaded, \
index_term_count
from invenio.bibrank_word_indexer import word_similarity #@UnusedImport
from invenio.bibrank_citerank_indexer import citerank #@UnusedImport
from invenio.solrutils_bibrank_indexer import word_similarity_solr #@UnusedImport
from invenio.xapianutils_bibrank_indexer import word_similarity_xapian #@UnusedImport
from invenio.bibrank_selfcites_task import process_updates as selfcites
# pylint: enable=W0611
nb_char_in_line = 50 # for verbose pretty printing
chunksize = 1000 # default size of chunks that the records will be treated by
base_process_size = 4500 # process base size
def split_ranges(parse_string):
"""Split ranges of numbers"""
recIDs = []
ranges = parse_string.split(",")
for rang in ranges:
tmp_recIDs = rang.split("-")
if len(tmp_recIDs)==1:
recIDs.append([int(tmp_recIDs[0]), int(tmp_recIDs[0])])
else:
if int(tmp_recIDs[0]) > int(tmp_recIDs[1]): # sanity check
tmp = tmp_recIDs[0]
tmp_recIDs[0] = tmp_recIDs[1]
tmp_recIDs[1] = tmp
recIDs.append([int(tmp_recIDs[0]), int(tmp_recIDs[1])])
return recIDs
def get_date_range(var):
"Returns the two dates contained as a low,high tuple"
limits = var.split(",")
if len(limits)==1:
low = get_datetime(limits[0])
return low, None
if len(limits)==2:
low = get_datetime(limits[0])
high = get_datetime(limits[1])
return low, high
def task_run_core():
"""Run the indexing task. The row argument is the BibSched task
queue row, containing if, arguments, etc.
Return 1 in case of success and 0 in case of failure.
"""
if not task_get_option("run"):
task_set_option("run", [name[0] for name in run_sql("SELECT name from rnkMETHOD")])
try:
for key in task_get_option("run"):
task_sleep_now_if_required(can_stop_too=True)
write_message("")
filename = CFG_ETCDIR + "/bibrank/" + key + ".cfg"
write_message("Getting configuration from file: %s" % filename,
verbose=9)
config = ConfigParser.ConfigParser()
try:
config.readfp(open(filename))
except StandardError, e:
write_message("Cannot find configurationfile: %s. "
"The rankmethod may also not be registered using "
"the BibRank Admin Interface." % filename, sys.stderr)
raise StandardError
#Using the function variable to call the function related to the
#rank method
cfg_function = config.get("rank_method", "function")
func_object = globals().get(cfg_function)
if func_object:
func_object(key)
else:
write_message("Cannot run method '%s', no function to call"
% key)
except StandardError, e:
write_message("\nException caught: %s" % e, sys.stderr)
write_message(traceback.format_exc()[:-1])
register_exception()
task_update_status("ERROR")
sys.exit(1)
return True
def main():
"""Main that construct all the bibtask."""
task_init(authorization_action='runbibrank',
authorization_msg="BibRank Task Submission",
description="""Ranking examples:
bibrank -wjif -a --id=0-30000,30001-860000 --verbose=9
bibrank -wjif -d --modified='2002-10-27 13:57:26'
bibrank -wjif --rebalance --collection=Articles
bibrank -wsbr -a -i 234-250,293,300-500 -u admin
bibrank -u admin -w citation -E 10
bibrank -u admin -w citation -A
""",
help_specific_usage="""Ranking options:
-w, --run=r1[,r2] runs each rank method in the order given
-c, --collection=c1[,c2] select according to collection
-i, --id=low[-high] select according to doc recID
-m, --modified=from[,to] select according to modification date
-l, --lastupdate select according to last update
-a, --add add or update words for selected records
-d, --del delete words for selected records
-S, --stat show statistics for a method
-R, --recalculate recalculate weight data, used by word frequency
and citation methods, should be used if ca 1%
of the documents have been changed since last
time -R was used. NOTE: This will replace the
entire set of weights, regardless of date/id
selection.
-E, --extcites=NUM print the top entries of the external cites table.
These are entries that should be entered in
your collection, since they have been cited
by NUM or more other records present in the
system. Useful for cataloguers to input
external papers manually.
-A --author-citations Calculate author citations.
Repairing options:
-k, --check check consistency for all records in the table(s)
check if update of ranking data is necessary
-r, --repair try to repair all records in the table(s)
""",
version=__revision__,
specific_params=("AE:ladSi:m:c:kUrRM:f:w:", [
"author-citations",
"print-extcites=",
"lastupdate",
"add",
"del",
"repair",
"maxmem",
"flush",
"stat",
"rebalance",
"id=",
"collection=",
"check",
"modified=",
"update",
"run="]),
task_submit_elaborate_specific_parameter_fnc=
task_submit_elaborate_specific_parameter,
task_run_fnc=task_run_core)
def task_submit_elaborate_specific_parameter(key, value, opts, dummy):
"""Elaborate a specific parameter of CLI bibrank."""
if key in ("-a", "--add"):
task_set_option("cmd", "add")
if ("-x","") in opts or ("--del","") in opts:
raise StandardError, "--add incompatible with --del"
elif key in ("--run", "-w"):
task_set_option("run", [])
run = value.split(",")
for run_key in range(0, len(run)):
task_get_option('run').append(run[run_key])
elif key in ("-r", "--repair"):
task_set_option("cmd", "repair")
elif key in ("-E", "--print-extcites"):
try:
task_set_option("print-extcites", int(value))
except:
task_set_option("print-extcites", 10) # default fallback value
task_set_option("cmd", "print-missing")
elif key in ("-A", "--author-citations"):
task_set_option("author-citations", "1")
elif key in ("-d", "--del"):
task_set_option("cmd", "del")
elif key in ("-k", "--check"):
task_set_option("cmd", "check")
elif key in ("-S", "--stat"):
task_set_option("cmd", "stat")
elif key in ("-i", "--id"):
task_set_option("id", task_get_option("id") + split_ranges(value))
task_set_option("last_updated", "")
elif key in ("-c", "--collection"):
task_set_option("collection", value)
elif key in ("-R", "--rebalance"):
task_set_option("quick", "no")
elif key in ("-f", "--flush"):
task_set_option("flush", int(value))
elif key in ("-M", "--maxmem"):
task_set_option("maxmem", int(value))
if task_get_option("maxmem") < base_process_size + 1000:
raise StandardError, "Memory usage should be higher than %d kB" % \
(base_process_size + 1000)
elif key in ("-m", "--modified"):
task_set_option("modified", get_date_range(value))#2002-10-27 13:57:26)
task_set_option("last_updated", "")
elif key in ("-l", "--lastupdate"):
task_set_option("last_updated", "last_updated")
else:
return False
return True
if __name__ == "__main__":
main()
diff --git a/invenio/legacy/bibrank/record_sorter.py b/invenio/legacy/bibrank/record_sorter.py
index b3a2b3f19..7e927122c 100644
--- a/invenio/legacy/bibrank/record_sorter.py
+++ b/invenio/legacy/bibrank/record_sorter.py
@@ -1,442 +1,442 @@
# -*- coding: utf-8 -*-
## Ranking of records using different parameters and methods on the fly.
##
## This file is part of Invenio.
## Copyright (C) 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011, 2012 CERN.
##
## Invenio is free software; you can redistribute it and/or
## modify it under the terms of the GNU General Public License as
## published by the Free Software Foundation; either version 2 of the
## License, or (at your option) any later version.
##
## Invenio is distributed in the hope that it will be useful, but
## WITHOUT ANY WARRANTY; without even the implied warranty of
## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
## General Public License for more details.
##
## You should have received a copy of the GNU General Public License
## along with Invenio; if not, write to the Free Software Foundation, Inc.,
## 59 Temple Place, Suite 330, Boston, MA 02111-1307, USA.
__revision__ = "$Id$"
import string
import time
import math
import re
import ConfigParser
import copy
from invenio.config import \
CFG_SITE_LANG, \
CFG_ETCDIR, \
CFG_WEBSEARCH_DEF_RECORDS_IN_GROUPS
from invenio.dbquery import run_sql, deserialize_via_marshal, wash_table_column_name
-from invenio.errorlib import register_exception
+from invenio.ext.logging import register_exception
from invenio.webpage import adderrorbox
from invenio.bibindex_engine_stemmer import stem
from invenio.bibindex_engine_stopwords import is_stopword
from invenio.bibrank_citation_searcher import get_cited_by, get_cited_by_weight
from invenio.intbitset import intbitset
from invenio.bibrank_word_searcher import find_similar
# Do not remove these lines, it is necessary for func_object = globals().get(function)
from invenio.bibrank_word_searcher import word_similarity
from invenio.solrutils_bibrank_searcher import word_similarity_solr
from invenio.xapianutils_bibrank_searcher import word_similarity_xapian
def compare_on_val(first, second):
return cmp(second[1], first[1])
def check_term(term, col_size, term_rec, max_occ, min_occ, termlength):
"""Check if the tem is valid for use
term - the term to check
col_size - the number of records in database
term_rec - the number of records which contains this term
max_occ - max frequency of the term allowed
min_occ - min frequence of the term allowed
termlength - the minimum length of the terms allowed"""
try:
if is_stopword(term) or (len(term) <= termlength) or ((float(term_rec) / float(col_size)) >= max_occ) or ((float(term_rec) / float(col_size)) <= min_occ):
return ""
if int(term):
return ""
except StandardError, e:
pass
return "true"
def create_external_ranking_settings(rank_method_code, config):
methods[rank_method_code]['fields'] = dict()
sections = config.sections()
field_pattern = re.compile('field[0-9]+')
for section in sections:
if field_pattern.search(section):
field_name = config.get(section, 'name')
methods[rank_method_code]['fields'][field_name] = dict()
for option in config.options(section):
if option != 'name':
create_external_ranking_option(section, option, methods[rank_method_code]['fields'][field_name], config)
elif section == 'find_similar_to_recid':
methods[rank_method_code][section] = dict()
for option in config.options(section):
create_external_ranking_option(section, option, methods[rank_method_code][section], config)
elif section == 'field_settings':
for option in config.options(section):
create_external_ranking_option(section, option, methods[rank_method_code], config)
def create_external_ranking_option(section, option, dictionary, config):
value = config.get(section, option)
if value.isdigit():
value = int(value)
dictionary[option] = value
def create_rnkmethod_cache():
"""Create cache with vital information for each rank method."""
global methods
bibrank_meths = run_sql("SELECT name from rnkMETHOD")
methods = {}
global voutput
voutput = ""
for (rank_method_code,) in bibrank_meths:
try:
file = CFG_ETCDIR + "/bibrank/" + rank_method_code + ".cfg"
config = ConfigParser.ConfigParser()
config.readfp(open(file))
except StandardError, e:
pass
cfg_function = config.get("rank_method", "function")
if config.has_section(cfg_function):
methods[rank_method_code] = {}
methods[rank_method_code]["function"] = cfg_function
methods[rank_method_code]["prefix"] = config.get(cfg_function, "relevance_number_output_prologue")
methods[rank_method_code]["postfix"] = config.get(cfg_function, "relevance_number_output_epilogue")
methods[rank_method_code]["chars_alphanumericseparators"] = r"[1234567890\!\"\#\$\%\&\'\(\)\*\+\,\-\.\/\:\;\<\=\>\?\@\[\\\]\^\_\`\{\|\}\~]"
else:
raise Exception("Error in configuration file: %s" % (CFG_ETCDIR + "/bibrank/" + rank_method_code + ".cfg"))
i8n_names = run_sql("""SELECT ln,value from rnkMETHODNAME,rnkMETHOD where id_rnkMETHOD=rnkMETHOD.id and rnkMETHOD.name=%s""", (rank_method_code,))
for (ln, value) in i8n_names:
methods[rank_method_code][ln] = value
if config.has_option(cfg_function, "table"):
methods[rank_method_code]["rnkWORD_table"] = config.get(cfg_function, "table")
query = "SELECT count(*) FROM %sR" % wash_table_column_name(methods[rank_method_code]["rnkWORD_table"][:-1])
methods[rank_method_code]["col_size"] = run_sql(query)[0][0]
if config.has_option(cfg_function, "stemming") and config.get(cfg_function, "stemming"):
try:
methods[rank_method_code]["stemmer"] = config.get(cfg_function, "stemming")
except Exception,e:
pass
if config.has_option(cfg_function, "stopword"):
methods[rank_method_code]["stopwords"] = config.get(cfg_function, "stopword")
if config.has_section("find_similar"):
methods[rank_method_code]["max_word_occurence"] = float(config.get("find_similar", "max_word_occurence"))
methods[rank_method_code]["min_word_occurence"] = float(config.get("find_similar", "min_word_occurence"))
methods[rank_method_code]["min_word_length"] = int(config.get("find_similar", "min_word_length"))
methods[rank_method_code]["min_nr_words_docs"] = int(config.get("find_similar", "min_nr_words_docs"))
methods[rank_method_code]["max_nr_words_upper"] = int(config.get("find_similar", "max_nr_words_upper"))
methods[rank_method_code]["max_nr_words_lower"] = int(config.get("find_similar", "max_nr_words_lower"))
methods[rank_method_code]["default_min_relevance"] = int(config.get("find_similar", "default_min_relevance"))
if cfg_function in ('word_similarity_solr', 'word_similarity_xapian'):
create_external_ranking_settings(rank_method_code, config)
if config.has_section("combine_method"):
i = 1
methods[rank_method_code]["combine_method"] = []
while config.has_option("combine_method", "method%s" % i):
methods[rank_method_code]["combine_method"].append(string.split(config.get("combine_method", "method%s" % i), ","))
i += 1
def is_method_valid(colID, rank_method_code):
"""
Check if RANK_METHOD_CODE method is valid for the collection given.
If colID is None, then check for existence regardless of collection.
"""
if colID is None:
return run_sql("SELECT COUNT(*) FROM rnkMETHOD WHERE name=%s", (rank_method_code,))[0][0]
enabled_colls = dict(run_sql("SELECT id_collection, score from collection_rnkMETHOD,rnkMETHOD WHERE id_rnkMETHOD=rnkMETHOD.id AND name=%s", (rank_method_code,)))
try:
colID = int(colID)
except TypeError:
return 0
if enabled_colls.has_key(colID):
return 1
else:
while colID:
colID = run_sql("SELECT id_dad FROM collection_collection WHERE id_son=%s", (colID,))
if colID and enabled_colls.has_key(colID[0][0]):
return 1
elif colID:
colID = colID[0][0]
return 0
def get_bibrank_methods(colID, ln=CFG_SITE_LANG):
"""
Return a list of rank methods enabled for collection colID and the
name of them in the language defined by the ln parameter.
"""
if not globals().has_key('methods'):
create_rnkmethod_cache()
avail_methods = []
for (rank_method_code, options) in methods.iteritems():
if options.has_key("function") and is_method_valid(colID, rank_method_code):
if options.has_key(ln):
avail_methods.append((rank_method_code, options[ln]))
elif options.has_key(CFG_SITE_LANG):
avail_methods.append((rank_method_code, options[CFG_SITE_LANG]))
else:
avail_methods.append((rank_method_code, rank_method_code))
return avail_methods
def rank_records(rank_method_code, rank_limit_relevance, hitset_global, pattern=[], verbose=0, field='', rg=None, jrec=None):
"""rank_method_code, e.g. `jif' or `sbr' (word frequency vector model)
rank_limit_relevance, e.g. `23' for `nbc' (number of citations) or `0.10' for `vec'
hitset, search engine hits;
pattern, search engine query or record ID (you check the type)
verbose, verbose level
output:
list of records
list of rank values
prefix
postfix
verbose_output"""
voutput = ""
configcreated = ""
starttime = time.time()
afterfind = starttime - time.time()
aftermap = starttime - time.time()
try:
hitset = copy.deepcopy(hitset_global) #we are receiving a global hitset
if not globals().has_key('methods'):
create_rnkmethod_cache()
function = methods[rank_method_code]["function"]
#we get 'citation' method correctly here
func_object = globals().get(function)
if verbose > 0:
voutput += "function: %s <br/> " % function
voutput += "pattern: %s <br/>" % str(pattern)
if func_object and pattern and pattern[0][0:6] == "recid:" and function == "word_similarity":
result = find_similar(rank_method_code, pattern[0][6:], hitset, rank_limit_relevance, verbose, methods)
elif rank_method_code == "citation":
#we get rank_method_code correctly here. pattern[0] is the search word - not used by find_cit
p = ""
if pattern and pattern[0]:
p = pattern[0][6:]
result = find_citations(rank_method_code, p, hitset, verbose)
elif func_object:
if function == "word_similarity":
result = func_object(rank_method_code, pattern, hitset, rank_limit_relevance, verbose, methods)
elif function in ("word_similarity_solr", "word_similarity_xapian"):
if not rg:
rg = CFG_WEBSEARCH_DEF_RECORDS_IN_GROUPS
if not jrec:
jrec = 0
ranked_result_amount = rg + jrec
if verbose > 0:
voutput += "Ranked result amount: %s<br/><br/>" % ranked_result_amount
if verbose > 0:
voutput += "field: %s<br/>" % field
if function == "word_similarity_solr":
if verbose > 0:
voutput += "In Solr part:<br/>"
result = word_similarity_solr(pattern, hitset, methods[rank_method_code], verbose, field, ranked_result_amount)
if function == "word_similarity_xapian":
if verbose > 0:
voutput += "In Xapian part:<br/>"
result = word_similarity_xapian(pattern, hitset, methods[rank_method_code], verbose, field, ranked_result_amount)
else:
result = func_object(rank_method_code, pattern, hitset, rank_limit_relevance, verbose)
else:
result = rank_by_method(rank_method_code, pattern, hitset, rank_limit_relevance, verbose)
except Exception, e:
register_exception()
result = (None, "", adderrorbox("An error occured when trying to rank the search result "+rank_method_code, ["Unexpected error: %s<br />" % (e,)]), voutput)
afterfind = time.time() - starttime
if result[0] and result[1]: #split into two lists for search_engine
results_similar_recIDs = map(lambda x: x[0], result[0])
results_similar_relevances = map(lambda x: x[1], result[0])
result = (results_similar_recIDs, results_similar_relevances, result[1], result[2], "%s" % configcreated + result[3])
aftermap = time.time() - starttime;
else:
result = (None, None, result[1], result[2], result[3])
#add stuff from here into voutput from result
tmp = voutput+result[4]
if verbose > 0:
tmp += "<br/>Elapsed time after finding: "+str(afterfind)+"\nElapsed after mapping: "+str(aftermap)
result = (result[0],result[1],result[2],result[3],tmp)
#dbg = string.join(map(str,methods[rank_method_code].items()))
#result = (None, "", adderrorbox("Debug ",rank_method_code+" "+dbg),"",voutput);
return result
def combine_method(rank_method_code, pattern, hitset, rank_limit_relevance,verbose):
"""combining several methods into one based on methods/percentage in config file"""
global voutput
result = {}
try:
for (method, percent) in methods[rank_method_code]["combine_method"]:
function = methods[method]["function"]
func_object = globals().get(function)
percent = int(percent)
if func_object:
this_result = func_object(method, pattern, hitset, rank_limit_relevance, verbose)[0]
else:
this_result = rank_by_method(method, pattern, hitset, rank_limit_relevance, verbose)[0]
for i in range(0, len(this_result)):
(recID, value) = this_result[i]
if value > 0:
result[recID] = result.get(recID, 0) + int((float(i) / len(this_result)) * float(percent))
result = result.items()
result.sort(lambda x, y: cmp(x[1], y[1]))
return (result, "(", ")", voutput)
except Exception, e:
return (None, "Warning: %s method cannot be used for ranking your query." % rank_method_code, "", voutput)
def rank_by_method(rank_method_code, lwords, hitset, rank_limit_relevance,verbose):
"""Ranking of records based on predetermined values.
input:
rank_method_code - the code of the method, from the name field in rnkMETHOD, used to get predetermined values from
rnkMETHODDATA
lwords - a list of words from the query
hitset - a list of hits for the query found by search_engine
rank_limit_relevance - show only records with a rank value above this
verbose - verbose value
output:
reclist - a list of sorted records, with unsorted added to the end: [[23,34], [344,24], [1,01]]
prefix - what to show before the rank value
postfix - what to show after the rank value
voutput - contains extra information, content dependent on verbose value"""
global voutput
voutput = ""
rnkdict = run_sql("SELECT relevance_data FROM rnkMETHODDATA,rnkMETHOD where rnkMETHOD.id=id_rnkMETHOD and rnkMETHOD.name=%s", (rank_method_code,))
if not rnkdict:
return (None, "Warning: Could not load ranking data for method %s." % rank_method_code, "", voutput)
max_recid = 0
res = run_sql("SELECT max(id) FROM bibrec")
if res and res[0][0]:
max_recid = int(res[0][0])
lwords_hitset = None
for j in range(0, len(lwords)): #find which docs to search based on ranges..should be done in search_engine...
if lwords[j] and lwords[j][:6] == "recid:":
if not lwords_hitset:
lwords_hitset = intbitset()
lword = lwords[j][6:]
if string.find(lword, "->") > -1:
lword = string.split(lword, "->")
if int(lword[0]) >= max_recid or int(lword[1]) >= max_recid + 1:
return (None, "Warning: Given record IDs are out of range.", "", voutput)
for i in range(int(lword[0]), int(lword[1])):
lwords_hitset.add(int(i))
elif lword < max_recid + 1:
lwords_hitset.add(int(lword))
else:
return (None, "Warning: Given record IDs are out of range.", "", voutput)
rnkdict = deserialize_via_marshal(rnkdict[0][0])
if verbose > 0:
voutput += "<br />Running rank method: %s, using rank_by_method function in bibrank_record_sorter<br />" % rank_method_code
voutput += "Ranking data loaded, size of structure: %s<br />" % len(rnkdict)
lrecIDs = list(hitset)
if verbose > 0:
voutput += "Number of records to rank: %s<br />" % len(lrecIDs)
reclist = []
reclist_addend = []
if not lwords_hitset: #rank all docs, can this be speed up using something else than for loop?
for recID in lrecIDs:
if rnkdict.has_key(recID):
reclist.append((recID, rnkdict[recID]))
del rnkdict[recID]
else:
reclist_addend.append((recID, 0))
else: #rank docs in hitset, can this be speed up using something else than for loop?
for recID in lwords_hitset:
if rnkdict.has_key(recID) and recID in hitset:
reclist.append((recID, rnkdict[recID]))
del rnkdict[recID]
elif recID in hitset:
reclist_addend.append((recID, 0))
if verbose > 0:
voutput += "Number of records ranked: %s<br />" % len(reclist)
voutput += "Number of records not ranked: %s<br />" % len(reclist_addend)
reclist.sort(lambda x, y: cmp(x[1], y[1]))
return (reclist_addend + reclist, methods[rank_method_code]["prefix"], methods[rank_method_code]["postfix"], voutput)
def find_citations(rank_method_code, recID, hitset, verbose):
"""Rank by the amount of citations."""
#calculate the cited-by values for all the members of the hitset
#returns: ((recordid,weight),prefix,postfix,message)
global voutput
voutput = ""
#If the recID is numeric, return only stuff that cites it. Otherwise return
#stuff that cites hitset
#try to convert to int
recisint = True
recidint = 0
try:
recidint = int(recID)
except:
recisint = False
ret = []
if recisint:
myrecords = get_cited_by(recidint) #this is a simple list
ret = get_cited_by_weight(myrecords)
else:
ret = get_cited_by_weight(hitset)
ret.sort(lambda x,y:cmp(x[1],y[1])) #ascending by the second member of the tuples
if verbose > 0:
voutput = voutput+"\nrecID "+str(recID)+" is int: "+str(recisint)+" hitset "+str(hitset)+"\n"+"find_citations retlist "+str(ret)
#voutput = voutput + str(ret)
if ret:
return (ret,"(", ")", "")
else:
return ((),"", "", "")
diff --git a/invenio/legacy/bibrank/tag_based_indexer.py b/invenio/legacy/bibrank/tag_based_indexer.py
index 1ffea9f52..210bb8b2a 100644
--- a/invenio/legacy/bibrank/tag_based_indexer.py
+++ b/invenio/legacy/bibrank/tag_based_indexer.py
@@ -1,533 +1,533 @@
# -*- coding: utf-8 -*-
## Ranking of records using different parameters and methods.
## This file is part of Invenio.
## Copyright (C) 2004, 2005, 2006, 2007, 2008, 2010, 2011, 2012 CERN.
##
## Invenio is free software; you can redistribute it and/or
## modify it under the terms of the GNU General Public License as
## published by the Free Software Foundation; either version 2 of the
## License, or (at your option) any later version.
##
## Invenio is distributed in the hope that it will be useful, but
## WITHOUT ANY WARRANTY; without even the implied warranty of
## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
## General Public License for more details.
##
## You should have received a copy of the GNU General Public License
## along with Invenio; if not, write to the Free Software Foundation, Inc.,
## 59 Temple Place, Suite 330, Boston, MA 02111-1307, USA.
import os
import sys
import time
import traceback
import ConfigParser
from invenio.config import \
CFG_SITE_LANG, \
CFG_ETCDIR, \
CFG_PREFIX
from invenio.search_engine import perform_request_search
from invenio.bibrank_citation_indexer import get_citation_weight, print_missing, get_cit_dict, insert_into_cit_db
from invenio.bibrank_downloads_indexer import *
from invenio.dbquery import run_sql, serialize_via_marshal, deserialize_via_marshal, \
wash_table_column_name, get_table_update_time
-from invenio.errorlib import register_exception
+from invenio.ext.logging import register_exception
from invenio.bibtask import task_get_option, write_message, task_sleep_now_if_required
from invenio.bibindex_engine import create_range_list
from invenio.intbitset import intbitset
options = {}
def remove_auto_cites(dic):
"""Remove auto-cites and dedupe."""
for key in dic.keys():
new_list = dic.fromkeys(dic[key]).keys()
try:
new_list.remove(key)
except ValueError:
pass
dic[key] = new_list
return dic
def citation_repair_exec():
"""Repair citation ranking method"""
## repair citations
for rowname in ["citationdict","reversedict"]:
## get dic
dic = get_cit_dict(rowname)
## repair
write_message("Repairing %s" % rowname)
dic = remove_auto_cites(dic)
## store healthy citation dic
insert_into_cit_db(dic, rowname)
return
def download_weight_filtering_user_repair_exec ():
"""Repair download weight filtering user ranking method"""
write_message("Repairing for this ranking method is not defined. Skipping.")
return
def download_weight_total_repair_exec():
"""Repair download weight total ranking method"""
write_message("Repairing for this ranking method is not defined. Skipping.")
return
def file_similarity_by_times_downloaded_repair_exec():
"""Repair file similarity by times downloaded ranking method"""
write_message("Repairing for this ranking method is not defined. Skipping.")
return
def single_tag_rank_method_repair_exec():
"""Repair single tag ranking method"""
write_message("Repairing for this ranking method is not defined. Skipping.")
return
def citation_exec(rank_method_code, name, config):
"""Rank method for citation analysis"""
#first check if this is a specific task
if task_get_option("cmd") == "print-missing":
num = task_get_option("num")
print_missing(num)
else:
dic, index_update_time = get_citation_weight(rank_method_code, config)
if dic:
if task_get_option("id") or task_get_option("collection") or \
task_get_option("modified"):
# user have asked to citation-index specific records
# only, so we should not update citation indexer's
# last run time stamp information
index_update_time = None
intoDB(dic, index_update_time, rank_method_code)
else:
write_message("No need to update the indexes for citations.")
def download_weight_filtering_user(run):
return bibrank_engine(run)
def download_weight_total(run):
return bibrank_engine(run)
def file_similarity_by_times_downloaded(run):
return bibrank_engine(run)
def download_weight_filtering_user_exec (rank_method_code, name, config):
"""Ranking by number of downloads per User.
Only one full Text Download is taken in account for one
specific userIP address"""
begin_date = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime())
time1 = time.time()
dic = fromDB(rank_method_code)
last_updated = get_lastupdated(rank_method_code)
keys = new_downloads_to_index(last_updated)
filter_downloads_per_hour(keys, last_updated)
dic = get_download_weight_filtering_user(dic, keys)
intoDB(dic, begin_date, rank_method_code)
time2 = time.time()
return {"time":time2-time1}
def download_weight_total_exec(rank_method_code, name, config):
"""rankink by total number of downloads without check the user ip
if users downloads 3 time the same full text document it has to be count as 3 downloads"""
begin_date = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime())
time1 = time.time()
dic = fromDB(rank_method_code)
last_updated = get_lastupdated(rank_method_code)
keys = new_downloads_to_index(last_updated)
filter_downloads_per_hour(keys, last_updated)
dic = get_download_weight_total(dic, keys)
intoDB(dic, begin_date, rank_method_code)
time2 = time.time()
return {"time":time2-time1}
def file_similarity_by_times_downloaded_exec(rank_method_code, name, config):
"""update dictionnary {recid:[(recid, nb page similarity), ()..]}"""
begin_date = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime())
time1 = time.time()
dic = fromDB(rank_method_code)
last_updated = get_lastupdated(rank_method_code)
keys = new_downloads_to_index(last_updated)
filter_downloads_per_hour(keys, last_updated)
dic = get_file_similarity_by_times_downloaded(dic, keys)
intoDB(dic, begin_date, rank_method_code)
time2 = time.time()
return {"time":time2-time1}
def single_tag_rank_method_exec(rank_method_code, name, config):
"""Creating the rank method data"""
begin_date = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime())
rnkset = {}
rnkset_old = fromDB(rank_method_code)
rnkset_new = single_tag_rank(config)
rnkset = union_dicts(rnkset_old, rnkset_new)
intoDB(rnkset, begin_date, rank_method_code)
def single_tag_rank(config):
"""Connect the given tag with the data from the kb file given"""
write_message("Loading knowledgebase file", verbose=9)
kb_data = {}
records = []
write_message("Reading knowledgebase file: %s" % \
config.get(config.get("rank_method", "function"), "kb_src"))
input = open(config.get(config.get("rank_method", "function"), "kb_src"), 'r')
data = input.readlines()
for line in data:
if not line[0:1] == "#":
kb_data[string.strip((string.split(string.strip(line), "---"))[0])] = (string.split(string.strip(line), "---"))[1]
write_message("Number of lines read from knowledgebase file: %s" % len(kb_data))
tag = config.get(config.get("rank_method", "function"), "tag")
tags = config.get(config.get("rank_method", "function"), "check_mandatory_tags").split(", ")
if tags == ['']:
tags = ""
records = []
for (recids, recide) in options["recid_range"]:
task_sleep_now_if_required(can_stop_too=True)
write_message("......Processing records #%s-%s" % (recids, recide))
recs = run_sql("SELECT id_bibrec, value FROM bib%sx, bibrec_bib%sx WHERE tag=%%s AND id_bibxxx=id and id_bibrec >=%%s and id_bibrec<=%%s" % (tag[0:2], tag[0:2]), (tag, recids, recide))
valid = intbitset(trailing_bits=1)
valid.discard(0)
for key in tags:
newset = intbitset()
newset += [recid[0] for recid in (run_sql("SELECT id_bibrec FROM bib%sx, bibrec_bib%sx WHERE id_bibxxx=id AND tag=%%s AND id_bibxxx=id and id_bibrec >=%%s and id_bibrec<=%%s" % (tag[0:2], tag[0:2]), (key, recids, recide)))]
valid.intersection_update(newset)
if tags:
recs = filter(lambda x: x[0] in valid, recs)
records = records + list(recs)
write_message("Number of records found with the necessary tags: %s" % len(records))
records = filter(lambda x: x[0] in options["validset"], records)
rnkset = {}
for key, value in records:
if kb_data.has_key(value):
if not rnkset.has_key(key):
rnkset[key] = float(kb_data[value])
else:
if kb_data.has_key(rnkset[key]) and float(kb_data[value]) > float((rnkset[key])[1]):
rnkset[key] = float(kb_data[value])
else:
rnkset[key] = 0
write_message("Number of records available in rank method: %s" % len(rnkset))
return rnkset
def get_lastupdated(rank_method_code):
"""Get the last time the rank method was updated"""
res = run_sql("SELECT rnkMETHOD.last_updated FROM rnkMETHOD WHERE name=%s", (rank_method_code, ))
if res:
return res[0][0]
else:
raise Exception("Is this the first run? Please do a complete update.")
def intoDB(dict, date, rank_method_code):
"""Insert the rank method data into the database"""
mid = run_sql("SELECT id from rnkMETHOD where name=%s", (rank_method_code, ))
del_rank_method_codeDATA(rank_method_code)
serdata = serialize_via_marshal(dict);
midstr = str(mid[0][0]);
run_sql("INSERT INTO rnkMETHODDATA(id_rnkMETHOD, relevance_data) VALUES (%s,%s)", (midstr, serdata,))
if date:
run_sql("UPDATE rnkMETHOD SET last_updated=%s WHERE name=%s", (date, rank_method_code))
def fromDB(rank_method_code):
"""Get the data for a rank method"""
id = run_sql("SELECT id from rnkMETHOD where name=%s", (rank_method_code, ))
res = run_sql("SELECT relevance_data FROM rnkMETHODDATA WHERE id_rnkMETHOD=%s", (id[0][0], ))
if res:
return deserialize_via_marshal(res[0][0])
else:
return {}
def del_rank_method_codeDATA(rank_method_code):
"""Delete the data for a rank method"""
id = run_sql("SELECT id from rnkMETHOD where name=%s", (rank_method_code, ))
run_sql("DELETE FROM rnkMETHODDATA WHERE id_rnkMETHOD=%s", (id[0][0], ))
def del_recids(rank_method_code, range_rec):
"""Delete some records from the rank method"""
id = run_sql("SELECT id from rnkMETHOD where name=%s", (rank_method_code, ))
res = run_sql("SELECT relevance_data FROM rnkMETHODDATA WHERE id_rnkMETHOD=%s", (id[0][0], ))
if res:
rec_dict = deserialize_via_marshal(res[0][0])
write_message("Old size: %s" % len(rec_dict))
for (recids, recide) in range_rec:
for i in range(int(recids), int(recide)):
if rec_dict.has_key(i):
del rec_dict[i]
write_message("New size: %s" % len(rec_dict))
begin_date = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime())
intoDB(rec_dict, begin_date, rank_method_code)
else:
write_message("Create before deleting!")
def union_dicts(dict1, dict2):
"Returns union of the two dicts."
union_dict = {}
for (key, value) in dict1.iteritems():
union_dict[key] = value
for (key, value) in dict2.iteritems():
union_dict[key] = value
return union_dict
def rank_method_code_statistics(rank_method_code):
"""Print statistics"""
method = fromDB(rank_method_code)
max = ('', -999999)
maxcount = 0
min = ('', 999999)
mincount = 0
for (recID, value) in method.iteritems():
if value < min and value > 0:
min = value
if value > max:
max = value
for (recID, value) in method.iteritems():
if value == min:
mincount += 1
if value == max:
maxcount += 1
write_message("Showing statistic for selected method")
write_message("Method name: %s" % getName(rank_method_code))
write_message("Short name: %s" % rank_method_code)
write_message("Last run: %s" % get_lastupdated(rank_method_code))
write_message("Number of records: %s" % len(method))
write_message("Lowest value: %s - Number of records: %s" % (min, mincount))
write_message("Highest value: %s - Number of records: %s" % (max, maxcount))
write_message("Divided into 10 sets:")
for i in range(1, 11):
setcount = 0
distinct_values = {}
lower = -1.0 + ((float(max + 1) / 10)) * (i - 1)
upper = -1.0 + ((float(max + 1) / 10)) * i
for (recID, value) in method.iteritems():
if value >= lower and value <= upper:
setcount += 1
distinct_values[value] = 1
write_message("Set %s (%s-%s) %s Distinct values: %s" % (i, lower, upper, len(distinct_values), setcount))
def check_method(rank_method_code):
write_message("Checking rank method...")
if len(fromDB(rank_method_code)) == 0:
write_message("Rank method not yet executed, please run it to create the necessary data.")
else:
if len(add_recIDs_by_date(rank_method_code)) > 0:
write_message("Records modified, update recommended")
else:
write_message("No records modified, update not necessary")
def bibrank_engine(run):
"""Run the indexing task.
Return 1 in case of success and 0 in case of failure.
"""
startCreate = time.time()
try:
options["run"] = []
options["run"].append(run)
for rank_method_code in options["run"]:
task_sleep_now_if_required(can_stop_too=True)
cfg_name = getName(rank_method_code)
write_message("Running rank method: %s." % cfg_name)
file = CFG_ETCDIR + "/bibrank/" + rank_method_code + ".cfg"
config = ConfigParser.ConfigParser()
try:
config.readfp(open(file))
except StandardError, e:
write_message("Cannot find configurationfile: %s" % file, sys.stderr)
raise StandardError
cfg_short = rank_method_code
cfg_function = config.get("rank_method", "function") + "_exec"
cfg_repair_function = config.get("rank_method", "function") + "_repair_exec"
cfg_name = getName(cfg_short)
options["validset"] = get_valid_range(rank_method_code)
if task_get_option("collection"):
l_of_colls = string.split(task_get_option("collection"), ", ")
recIDs = perform_request_search(c=l_of_colls)
recIDs_range = []
for recID in recIDs:
recIDs_range.append([recID, recID])
options["recid_range"] = recIDs_range
elif task_get_option("id"):
options["recid_range"] = task_get_option("id")
elif task_get_option("modified"):
options["recid_range"] = add_recIDs_by_date(rank_method_code, task_get_option("modified"))
elif task_get_option("last_updated"):
options["recid_range"] = add_recIDs_by_date(rank_method_code)
else:
write_message("No records specified, updating all", verbose=2)
min_id = run_sql("SELECT min(id) from bibrec")[0][0]
max_id = run_sql("SELECT max(id) from bibrec")[0][0]
options["recid_range"] = [[min_id, max_id]]
if task_get_option("quick") == "no":
write_message("Recalculate parameter not used, parameter ignored.", verbose=9)
if task_get_option("cmd") == "del":
del_recids(cfg_short, options["recid_range"])
elif task_get_option("cmd") == "add":
func_object = globals().get(cfg_function)
func_object(rank_method_code, cfg_name, config)
elif task_get_option("cmd") == "stat":
rank_method_code_statistics(rank_method_code)
elif task_get_option("cmd") == "check":
check_method(rank_method_code)
elif task_get_option("cmd") == "print-missing":
func_object = globals().get(cfg_function)
func_object(rank_method_code, cfg_name, config)
elif task_get_option("cmd") == "repair":
func_object = globals().get(cfg_repair_function)
func_object()
else:
write_message("Invalid command found processing %s" % rank_method_code, sys.stderr)
raise StandardError
except StandardError, e:
write_message("\nException caught: %s" % e, sys.stderr)
write_message(traceback.format_exc()[:-1])
register_exception()
raise StandardError
if task_get_option("verbose"):
showtime((time.time() - startCreate))
return 1
def get_valid_range(rank_method_code):
"""Return a range of records"""
write_message("Getting records from collections enabled for rank method.", verbose=9)
res = run_sql("SELECT collection.name FROM collection, collection_rnkMETHOD, rnkMETHOD WHERE collection.id=id_collection and id_rnkMETHOD=rnkMETHOD.id and rnkMETHOD.name=%s", (rank_method_code, ))
l_of_colls = []
for coll in res:
l_of_colls.append(coll[0])
if len(l_of_colls) > 0:
recIDs = perform_request_search(c=l_of_colls)
else:
recIDs = []
valid = intbitset()
valid += recIDs
return valid
def add_recIDs_by_date(rank_method_code, dates=""):
"""Return recID range from records modified between DATES[0] and DATES[1].
If DATES is not set, then add records modified since the last run of
the ranking method RANK_METHOD_CODE.
"""
if not dates:
try:
dates = (get_lastupdated(rank_method_code), '')
except Exception:
dates = ("0000-00-00 00:00:00", '')
if dates[0] is None:
dates = ("0000-00-00 00:00:00", '')
query = """SELECT b.id FROM bibrec AS b WHERE b.modification_date >= %s"""
if dates[1]:
query += " and b.modification_date <= %s"
query += " ORDER BY b.id ASC"""
if dates[1]:
res = run_sql(query, (dates[0], dates[1]))
else:
res = run_sql(query, (dates[0], ))
alist = create_range_list([row[0] for row in res])
if not alist:
write_message("No new records added since last time method was run")
return alist
def getName(rank_method_code, ln=CFG_SITE_LANG, type='ln'):
"""Returns the name of the method if it exists"""
try:
rnkid = run_sql("SELECT id FROM rnkMETHOD where name=%s", (rank_method_code, ))
if rnkid:
rnkid = str(rnkid[0][0])
res = run_sql("SELECT value FROM rnkMETHODNAME where type=%s and ln=%s and id_rnkMETHOD=%s", (type, ln, rnkid))
if not res:
res = run_sql("SELECT value FROM rnkMETHODNAME WHERE ln=%s and id_rnkMETHOD=%s and type=%s", (CFG_SITE_LANG, rnkid, type))
if not res:
return rank_method_code
return res[0][0]
else:
raise Exception
except Exception:
write_message("Cannot run rank method, either given code for method is wrong, or it has not been added using the webinterface.")
raise Exception
def single_tag_rank_method(run):
return bibrank_engine(run)
def showtime(timeused):
"""Show time used for method"""
write_message("Time used: %d second(s)." % timeused, verbose=9)
def citation(run):
return bibrank_engine(run)
# Hack to put index based sorting here, but this is very similar to tag
#based method and should re-use a lot of this code, so better to have here
#than separate
#
def index_term_count_exec(rank_method_code, name, config):
"""Creating the rank method data"""
write_message("Recreating index weighting data")
begin_date = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime())
# we must recalculate these every time for all records, since the
# weighting of a record is determined by the index entries of _other_
# records
rnkset = calculate_index_term_count(config)
intoDB(rnkset, begin_date, rank_method_code)
def calculate_index_term_count(config):
"""Calculate the weight of a record set based on number of enries of a
tag from the record in another index...useful for authority files"""
records = []
if config.has_section("index_term_count"):
index = config.get("index_term_count","index_table_name")
tag = config.get("index_term_count","index_term_value_from_tag")
# check against possible SQL injection:
dummy = get_table_update_time(index)
tag = wash_table_column_name(tag)
else:
raise Exception("Config file " + config + " does not have index_term_count section")
return()
task_sleep_now_if_required(can_stop_too=True)
write_message("......Processing all records")
query = "SELECT id_bibrec, value FROM bib%sx, bibrec_bib%sx WHERE tag=%%s AND id_bibxxx=id" % \
(tag[0:2], tag[0:2]) # we checked that tag is safe
records = list(run_sql(query, (tag,)))
write_message("Number of records found with the necessary tags: %s" % len(records))
rnkset = {}
for key, value in records:
hits = 0
if len(value):
query = "SELECT hitlist from %s where term = %%s" % index # we checked that index is a table
row = run_sql(query, (value,))
if row and row[0] and row[0][0]:
#has to be prepared for corrupted data!
try:
hits = len(intbitset(row[0][0]))
except:
hits = 0
rnkset[key] = hits
write_message("Number of records available in rank method: %s" % len(rnkset))
return rnkset
def index_term_count(run):
return bibrank_engine(run)
diff --git a/invenio/legacy/bibrank/word_indexer.py b/invenio/legacy/bibrank/word_indexer.py
index d53548c0d..80baf6951 100644
--- a/invenio/legacy/bibrank/word_indexer.py
+++ b/invenio/legacy/bibrank/word_indexer.py
@@ -1,1195 +1,1195 @@
## This file is part of Invenio.
## Copyright (C) 2004, 2005, 2006, 2007, 2008, 2010, 2011 CERN.
##
## Invenio is free software; you can redistribute it and/or
## modify it under the terms of the GNU General Public License as
## published by the Free Software Foundation; either version 2 of the
## License, or (at your option) any later version.
##
## Invenio is distributed in the hope that it will be useful, but
## WITHOUT ANY WARRANTY; without even the implied warranty of
## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
## General Public License for more details.
##
## You should have received a copy of the GNU General Public License
## along with Invenio; if not, write to the Free Software Foundation, Inc.,
## 59 Temple Place, Suite 330, Boston, MA 02111-1307, USA.
__revision__ = "$Id$"
import sys
import time
import urllib
import math
import re
import ConfigParser
from invenio.config import \
CFG_SITE_LANG, \
CFG_ETCDIR
from invenio.search_engine import perform_request_search, wash_index_term
from invenio.dbquery import run_sql, DatabaseError, serialize_via_marshal, deserialize_via_marshal
from invenio.bibindex_engine_stemmer import is_stemmer_available_for_language, stem
from invenio.bibindex_engine_stopwords import is_stopword
from invenio.bibindex_engine import beautify_range_list, \
kill_sleepy_mysql_threads, create_range_list
from invenio.bibtask import write_message, task_get_option, task_update_progress, \
task_update_status, task_sleep_now_if_required
from invenio.intbitset import intbitset
-from invenio.errorlib import register_exception
+from invenio.ext.logging import register_exception
from invenio.utils.text import strip_accents
options = {} # global variable to hold task options
## safety parameters concerning DB thread-multiplication problem:
CFG_CHECK_MYSQL_THREADS = 0 # to check or not to check the problem?
CFG_MAX_MYSQL_THREADS = 50 # how many threads (connections) we consider as still safe
CFG_MYSQL_THREAD_TIMEOUT = 20 # we'll kill threads that were sleeping for more than X seconds
## override urllib's default password-asking behaviour:
class MyFancyURLopener(urllib.FancyURLopener):
def prompt_user_passwd(self, host, realm):
# supply some dummy credentials by default
return ("mysuperuser", "mysuperpass")
def http_error_401(self, url, fp, errcode, errmsg, headers):
# do not bother with protected pages
raise IOError, (999, 'unauthorized access')
return None
#urllib._urlopener = MyFancyURLopener()
nb_char_in_line = 50 # for verbose pretty printing
chunksize = 1000 # default size of chunks that the records will be treated by
base_process_size = 4500 # process base size
## Dictionary merging functions
def dict_union(list1, list2):
"Returns union of the two dictionaries."
union_dict = {}
for (e, count) in list1.iteritems():
union_dict[e] = count
for (e, count) in list2.iteritems():
if not union_dict.has_key(e):
union_dict[e] = count
else:
union_dict[e] = (union_dict[e][0] + count[0], count[1])
#for (e, count) in list2.iteritems():
# list1[e] = (list1.get(e, (0, 0))[0] + count[0], count[1])
#return list1
return union_dict
# tagToFunctions mapping. It offers an indirection level necesary for
# indexing fulltext. The default is get_words_from_phrase
tagToWordsFunctions = {}
def get_words_from_phrase(phrase, weight, lang="",
chars_punctuation=r"[\.\,\:\;\?\!\"]",
chars_alphanumericseparators=r"[1234567890\!\"\#\$\%\&\'\(\)\*\+\,\-\.\/\:\;\<\=\>\?\@\[\\\]\^\_\`\{\|\}\~]",
split=str.split):
"Returns list of words from phrase 'phrase'."
words = {}
phrase = strip_accents(phrase)
phrase = phrase.lower()
#Getting rid of strange characters
phrase = re.sub("&eacute;", 'e', phrase)
phrase = re.sub("&egrave;", 'e', phrase)
phrase = re.sub("&agrave;", 'a', phrase)
phrase = re.sub("&nbsp;", ' ', phrase)
phrase = re.sub("&laquo;", ' ', phrase)
phrase = re.sub("&raquo;", ' ', phrase)
phrase = re.sub("&ecirc;", ' ', phrase)
phrase = re.sub("&amp;", ' ', phrase)
if phrase.find("</") > -1:
#Most likely html, remove html code
phrase = re.sub("(?s)<[^>]*>|&#?\w+;", ' ', phrase)
#removes http links
phrase = re.sub("(?s)http://[^( )]*", '', phrase)
phrase = re.sub(chars_punctuation, ' ', phrase)
#By doing this like below, characters standing alone, like c a b is not added to the inedx, but when they are together with characters like c++ or c$ they are added.
for word in split(phrase):
if options["remove_stopword"] == "True" and not is_stopword(word) and check_term(word, 0):
if lang and lang !="none" and options["use_stemming"]:
word = stem(word, lang)
if not words.has_key(word):
words[word] = (0, 0)
else:
if not words.has_key(word):
words[word] = (0, 0)
words[word] = (words[word][0] + weight, 0)
elif options["remove_stopword"] == "True" and not is_stopword(word):
phrase = re.sub(chars_alphanumericseparators, ' ', word)
for word_ in split(phrase):
if lang and lang !="none" and options["use_stemming"]:
word_ = stem(word_, lang)
if word_:
if not words.has_key(word_):
words[word_] = (0,0)
words[word_] = (words[word_][0] + weight, 0)
return words
class WordTable:
"A class to hold the words table."
def __init__(self, tablename, fields_to_index, separators="[^\s]"):
"Creates words table instance."
self.tablename = tablename
self.recIDs_in_mem = []
self.fields_to_index = fields_to_index
self.separators = separators
self.value = {}
def get_field(self, recID, tag):
"""Returns list of values of the MARC-21 'tag' fields for the
record 'recID'."""
out = []
bibXXx = "bib" + tag[0] + tag[1] + "x"
bibrec_bibXXx = "bibrec_" + bibXXx
query = """SELECT value FROM %s AS b, %s AS bb
WHERE bb.id_bibrec=%s AND bb.id_bibxxx=b.id
AND tag LIKE '%s'""" % (bibXXx, bibrec_bibXXx, recID, tag);
res = run_sql(query)
for row in res:
out.append(row[0])
return out
def clean(self):
"Cleans the words table."
self.value={}
def put_into_db(self, mode="normal"):
"""Updates the current words table in the corresponding DB
rnkWORD table. Mode 'normal' means normal execution,
mode 'emergency' means words index reverting to old state.
"""
write_message("%s %s wordtable flush started" % (self.tablename,mode))
write_message('...updating %d words into %sR started' % \
(len(self.value), self.tablename[:-1]))
task_update_progress("%s flushed %d/%d words" % (self.tablename, 0, len(self.value)))
self.recIDs_in_mem = beautify_range_list(self.recIDs_in_mem)
if mode == "normal":
for group in self.recIDs_in_mem:
query = """UPDATE %sR SET type='TEMPORARY' WHERE id_bibrec
BETWEEN '%d' AND '%d' AND type='CURRENT'""" % \
(self.tablename[:-1], group[0], group[1])
write_message(query, verbose=9)
run_sql(query)
nb_words_total = len(self.value)
nb_words_report = int(nb_words_total/10)
nb_words_done = 0
for word in self.value.keys():
self.put_word_into_db(word, self.value[word])
nb_words_done += 1
if nb_words_report!=0 and ((nb_words_done % nb_words_report) == 0):
write_message('......processed %d/%d words' % (nb_words_done, nb_words_total))
task_update_progress("%s flushed %d/%d words" % (self.tablename, nb_words_done, nb_words_total))
write_message('...updating %d words into %s ended' % \
(nb_words_total, self.tablename), verbose=9)
#if options["verbose"]:
# write_message('...updating reverse table %sR started' % self.tablename[:-1])
if mode == "normal":
for group in self.recIDs_in_mem:
query = """UPDATE %sR SET type='CURRENT' WHERE id_bibrec
BETWEEN '%d' AND '%d' AND type='FUTURE'""" % \
(self.tablename[:-1], group[0], group[1])
write_message(query, verbose=9)
run_sql(query)
query = """DELETE FROM %sR WHERE id_bibrec
BETWEEN '%d' AND '%d' AND type='TEMPORARY'""" % \
(self.tablename[:-1], group[0], group[1])
write_message(query, verbose=9)
run_sql(query)
write_message('End of updating wordTable into %s' % self.tablename, verbose=9)
elif mode == "emergency":
write_message("emergency")
for group in self.recIDs_in_mem:
query = """UPDATE %sR SET type='CURRENT' WHERE id_bibrec
BETWEEN '%d' AND '%d' AND type='TEMPORARY'""" % \
(self.tablename[:-1], group[0], group[1])
write_message(query, verbose=9)
run_sql(query)
query = """DELETE FROM %sR WHERE id_bibrec
BETWEEN '%d' AND '%d' AND type='FUTURE'""" % \
(self.tablename[:-1], group[0], group[1])
write_message(query, verbose=9)
run_sql(query)
write_message('End of emergency flushing wordTable into %s' % self.tablename, verbose=9)
#if options["verbose"]:
# write_message('...updating reverse table %sR ended' % self.tablename[:-1])
self.clean()
self.recIDs_in_mem = []
write_message("%s %s wordtable flush ended" % (self.tablename, mode))
task_update_progress("%s flush ended" % (self.tablename))
def load_old_recIDs(self,word):
"""Load existing hitlist for the word from the database index files."""
query = "SELECT hitlist FROM %s WHERE term=%%s" % self.tablename
res = run_sql(query, (word,))
if res:
return deserialize_via_marshal(res[0][0])
else:
return None
def merge_with_old_recIDs(self,word,recIDs, set):
"""Merge the system numbers stored in memory (hash of recIDs with value[0] > 0 or -1
according to whether to add/delete them) with those stored in the database index
and received in set universe of recIDs for the given word.
Return 0 in case no change was done to SET, return 1 in case SET was changed.
"""
set_changed_p = 0
for recID,sign in recIDs.iteritems():
if sign[0] == -1 and set.has_key(recID):
# delete recID if existent in set and if marked as to be deleted
del set[recID]
set_changed_p = 1
elif sign[0] > -1 and not set.has_key(recID):
# add recID if not existent in set and if marked as to be added
set[recID] = sign
set_changed_p = 1
elif sign[0] > -1 and sign[0] != set[recID][0]:
set[recID] = sign
set_changed_p = 1
return set_changed_p
def put_word_into_db(self, word, recIDs, split=str.split):
"""Flush a single word to the database and delete it from memory"""
set = self.load_old_recIDs(word)
#write_message("%s %s" % (word, self.value[word]))
if set is not None: # merge the word recIDs found in memory:
options["modified_words"][word] = 1
if not self.merge_with_old_recIDs(word, recIDs, set):
# nothing to update:
write_message("......... unchanged hitlist for ``%s''" % word, verbose=9)
pass
else:
# yes there were some new words:
write_message("......... updating hitlist for ``%s''" % word, verbose=9)
run_sql("UPDATE %s SET hitlist=%%s WHERE term=%%s" % self.tablename,
(serialize_via_marshal(set), word))
else: # the word is new, will create new set:
write_message("......... inserting hitlist for ``%s''" % word, verbose=9)
set = self.value[word]
if len(set) > 0:
#new word, add to list
options["modified_words"][word] = 1
try:
run_sql("INSERT INTO %s (term, hitlist) VALUES (%%s, %%s)" % self.tablename,
(word, serialize_via_marshal(set)))
except Exception, e:
## FIXME: This is for debugging encoding errors
register_exception(prefix="Error when putting the term '%s' into db (hitlist=%s): %s\n" % (repr(word), set, e), alert_admin=True)
if not set: # never store empty words
run_sql("DELETE from %s WHERE term=%%s" % self.tablename,
(word,))
del self.value[word]
def display(self):
"Displays the word table."
keys = self.value.keys()
keys.sort()
for k in keys:
write_message("%s: %s" % (k, self.value[k]))
def count(self):
"Returns the number of words in the table."
return len(self.value)
def info(self):
"Prints some information on the words table."
write_message("The words table contains %d words." % self.count())
def lookup_words(self, word=""):
"Lookup word from the words table."
if not word:
done = 0
while not done:
try:
word = raw_input("Enter word: ")
done = 1
except (EOFError, KeyboardInterrupt):
return
if self.value.has_key(word):
write_message("The word '%s' is found %d times." \
% (word, len(self.value[word])))
else:
write_message("The word '%s' does not exist in the word file."\
% word)
def update_last_updated(self, rank_method_code, starting_time=None):
"""Update last_updated column of the index table in the database.
Puts starting time there so that if the task was interrupted for record download,
the records will be reindexed next time."""
if starting_time is None:
return None
write_message("updating last_updated to %s..." % starting_time, verbose=9)
return run_sql("UPDATE rnkMETHOD SET last_updated=%s WHERE name=%s",
(starting_time, rank_method_code,))
def add_recIDs(self, recIDs):
"""Fetches records which id in the recIDs arange list and adds
them to the wordTable. The recIDs arange list is of the form:
[[i1_low,i1_high],[i2_low,i2_high], ..., [iN_low,iN_high]].
"""
global chunksize
flush_count = 0
records_done = 0
records_to_go = 0
for arange in recIDs:
records_to_go = records_to_go + arange[1] - arange[0] + 1
time_started = time.time() # will measure profile time
for arange in recIDs:
i_low = arange[0]
chunksize_count = 0
while i_low <= arange[1]:
# calculate chunk group of recIDs and treat it:
i_high = min(i_low+task_get_option("flush")-flush_count-1,arange[1])
i_high = min(i_low+chunksize-chunksize_count-1, i_high)
try:
self.chk_recID_range(i_low, i_high)
except StandardError, e:
write_message("Exception caught: %s" % e, sys.stderr)
register_exception()
task_update_status("ERROR")
sys.exit(1)
write_message("%s adding records #%d-#%d started" % \
(self.tablename, i_low, i_high))
if CFG_CHECK_MYSQL_THREADS:
kill_sleepy_mysql_threads()
task_update_progress("%s adding recs %d-%d" % (self.tablename, i_low, i_high))
self.del_recID_range(i_low, i_high)
just_processed = self.add_recID_range(i_low, i_high)
flush_count = flush_count + i_high - i_low + 1
chunksize_count = chunksize_count + i_high - i_low + 1
records_done = records_done + just_processed
write_message("%s adding records #%d-#%d ended " % \
(self.tablename, i_low, i_high))
if chunksize_count >= chunksize:
chunksize_count = 0
# flush if necessary:
if flush_count >= task_get_option("flush"):
self.put_into_db()
self.clean()
write_message("%s backing up" % (self.tablename))
flush_count = 0
self.log_progress(time_started,records_done,records_to_go)
# iterate:
i_low = i_high + 1
if flush_count > 0:
self.put_into_db()
self.log_progress(time_started,records_done,records_to_go)
def add_recIDs_by_date(self, dates=""):
"""Add recIDs modified between DATES[0] and DATES[1].
If DATES is not set, then add records modified since the last run of
the ranking method.
"""
if not dates:
write_message("Using the last update time for the rank method")
query = """SELECT last_updated FROM rnkMETHOD WHERE name='%s'
""" % options["current_run"]
res = run_sql(query)
if not res:
return
if not res[0][0]:
dates = ("0000-00-00",'')
else:
dates = (res[0][0],'')
query = """SELECT b.id FROM bibrec AS b WHERE b.modification_date >=
'%s'""" % dates[0]
if dates[1]:
query += "and b.modification_date <= '%s'" % dates[1]
query += " ORDER BY b.id ASC"""
res = run_sql(query)
alist = create_range_list([row[0] for row in res])
if not alist:
write_message( "No new records added. %s is up to date" % self.tablename)
else:
self.add_recIDs(alist)
return alist
def add_recID_range(self, recID1, recID2):
"""Add records from RECID1 to RECID2."""
wlist = {}
normalize = {}
self.recIDs_in_mem.append([recID1,recID2])
# secondly fetch all needed tags:
for (tag, weight, lang) in self.fields_to_index:
if tag in tagToWordsFunctions.keys():
get_words_function = tagToWordsFunctions[tag]
else:
get_words_function = get_words_from_phrase
bibXXx = "bib" + tag[0] + tag[1] + "x"
bibrec_bibXXx = "bibrec_" + bibXXx
query = """SELECT bb.id_bibrec,b.value FROM %s AS b, %s AS bb
WHERE bb.id_bibrec BETWEEN %d AND %d
AND bb.id_bibxxx=b.id AND tag LIKE '%s'""" % (bibXXx, bibrec_bibXXx, recID1, recID2, tag)
res = run_sql(query)
nb_total_to_read = len(res)
verbose_idx = 0 # for verbose pretty printing
for row in res:
recID, phrase = row
if recID in options["validset"]:
if not wlist.has_key(recID): wlist[recID] = {}
new_words = get_words_function(phrase, weight, lang) # ,self.separators
wlist[recID] = dict_union(new_words,wlist[recID])
# were there some words for these recIDs found?
if len(wlist) == 0: return 0
recIDs = wlist.keys()
for recID in recIDs:
# was this record marked as deleted?
if "DELETED" in self.get_field(recID, "980__c"):
wlist[recID] = {}
write_message("... record %d was declared deleted, removing its word list" % recID, verbose=9)
write_message("... record %d, termlist: %s" % (recID, wlist[recID]), verbose=9)
# put words into reverse index table with FUTURE status:
for recID in recIDs:
run_sql("INSERT INTO %sR (id_bibrec,termlist,type) VALUES (%%s,%%s,'FUTURE')" % self.tablename[:-1],
(recID, serialize_via_marshal(wlist[recID])))
# ... and, for new records, enter the CURRENT status as empty:
try:
run_sql("INSERT INTO %sR (id_bibrec,termlist,type) VALUES (%%s,%%s,'CURRENT')" % self.tablename[:-1],
(recID, serialize_via_marshal([])))
except DatabaseError:
# okay, it's an already existing record, no problem
pass
# put words into memory word list:
put = self.put
for recID in recIDs:
for (w, count) in wlist[recID].iteritems():
put(recID, w, count)
return len(recIDs)
def log_progress(self, start, done, todo):
"""Calculate progress and store it.
start: start time,
done: records processed,
todo: total number of records"""
time_elapsed = time.time() - start
# consistency check
if time_elapsed == 0 or done > todo:
return
time_recs_per_min = done/(time_elapsed/60.0)
write_message("%d records took %.1f seconds to complete.(%1.f recs/min)"\
% (done, time_elapsed, time_recs_per_min))
if time_recs_per_min:
write_message("Estimated runtime: %.1f minutes" % \
((todo-done)/time_recs_per_min))
def put(self, recID, word, sign):
"Adds/deletes a word to the word list."
try:
word = wash_index_term(word)
if self.value.has_key(word):
# the word 'word' exist already: update sign
self.value[word][recID] = sign
# PROBLEM ?
else:
self.value[word] = {recID: sign}
except:
write_message("Error: Cannot put word %s with sign %d for recID %s." % (word, sign, recID))
def del_recIDs(self, recIDs):
"""Fetches records which id in the recIDs range list and adds
them to the wordTable. The recIDs range list is of the form:
[[i1_low,i1_high],[i2_low,i2_high], ..., [iN_low,iN_high]].
"""
count = 0
for range in recIDs:
self.del_recID_range(range[0],range[1])
count = count + range[1] - range[0]
self.put_into_db()
def del_recID_range(self, low, high):
"""Deletes records with 'recID' system number between low
and high from memory words index table."""
write_message("%s fetching existing words for records #%d-#%d started" % \
(self.tablename, low, high), verbose=3)
self.recIDs_in_mem.append([low,high])
query = """SELECT id_bibrec,termlist FROM %sR as bb WHERE bb.id_bibrec
BETWEEN '%d' AND '%d'""" % (self.tablename[:-1], low, high)
recID_rows = run_sql(query)
for recID_row in recID_rows:
recID = recID_row[0]
wlist = deserialize_via_marshal(recID_row[1])
for word in wlist:
self.put(recID, word, (-1, 0))
write_message("%s fetching existing words for records #%d-#%d ended" % \
(self.tablename, low, high), verbose=3)
def report_on_table_consistency(self):
"""Check reverse words index tables (e.g. rnkWORD01R) for
interesting states such as 'TEMPORARY' state.
Prints small report (no of words, no of bad words).
"""
# find number of words:
query = """SELECT COUNT(*) FROM %s""" % (self.tablename)
res = run_sql(query, None, 1)
if res:
nb_words = res[0][0]
else:
nb_words = 0
# find number of records:
query = """SELECT COUNT(DISTINCT(id_bibrec)) FROM %sR""" % (self.tablename[:-1])
res = run_sql(query, None, 1)
if res:
nb_records = res[0][0]
else:
nb_records = 0
# report stats:
write_message("%s contains %d words from %d records" % (self.tablename, nb_words, nb_records))
# find possible bad states in reverse tables:
query = """SELECT COUNT(DISTINCT(id_bibrec)) FROM %sR WHERE type <> 'CURRENT'""" % (self.tablename[:-1])
res = run_sql(query)
if res:
nb_bad_records = res[0][0]
else:
nb_bad_records = 999999999
if nb_bad_records:
write_message("EMERGENCY: %s needs to repair %d of %d index records" % \
(self.tablename, nb_bad_records, nb_records))
else:
write_message("%s is in consistent state" % (self.tablename))
return nb_bad_records
def repair(self):
"""Repair the whole table"""
# find possible bad states in reverse tables:
query = """SELECT COUNT(DISTINCT(id_bibrec)) FROM %sR WHERE type <> 'CURRENT'""" % (self.tablename[:-1])
res = run_sql(query, None, 1)
if res:
nb_bad_records = res[0][0]
else:
nb_bad_records = 0
# find number of records:
query = """SELECT COUNT(DISTINCT(id_bibrec)) FROM %sR""" % (self.tablename[:-1])
res = run_sql(query)
if res:
nb_records = res[0][0]
else:
nb_records = 0
if nb_bad_records == 0:
return
query = """SELECT id_bibrec FROM %sR WHERE type <> 'CURRENT' ORDER BY id_bibrec""" \
% (self.tablename[:-1])
res = run_sql(query)
recIDs = create_range_list([row[0] for row in res])
flush_count = 0
records_done = 0
records_to_go = 0
for range in recIDs:
records_to_go = records_to_go + range[1] - range[0] + 1
time_started = time.time() # will measure profile time
for range in recIDs:
i_low = range[0]
chunksize_count = 0
while i_low <= range[1]:
# calculate chunk group of recIDs and treat it:
i_high = min(i_low+task_get_option("flush")-flush_count-1,range[1])
i_high = min(i_low+chunksize-chunksize_count-1, i_high)
try:
self.fix_recID_range(i_low, i_high)
except StandardError, e:
write_message("Exception caught: %s" % e, sys.stderr)
register_exception()
task_update_status("ERROR")
sys.exit(1)
flush_count = flush_count + i_high - i_low + 1
chunksize_count = chunksize_count + i_high - i_low + 1
records_done = records_done + i_high - i_low + 1
if chunksize_count >= chunksize:
chunksize_count = 0
# flush if necessary:
if flush_count >= task_get_option("flush"):
self.put_into_db("emergency")
self.clean()
flush_count = 0
self.log_progress(time_started,records_done,records_to_go)
# iterate:
i_low = i_high + 1
if flush_count > 0:
self.put_into_db("emergency")
self.log_progress(time_started,records_done,records_to_go)
write_message("%s inconsistencies repaired." % self.tablename)
def chk_recID_range(self, low, high):
"""Check if the reverse index table is in proper state"""
## check db
query = """SELECT COUNT(*) FROM %sR WHERE type <> 'CURRENT'
AND id_bibrec BETWEEN '%d' AND '%d'""" % (self.tablename[:-1], low, high)
res = run_sql(query, None, 1)
if res[0][0]==0:
write_message("%s for %d-%d is in consistent state"%(self.tablename,low,high))
return # okay, words table is consistent
## inconsistency detected!
write_message("EMERGENCY: %s inconsistencies detected..." % self.tablename)
write_message("""EMERGENCY: Errors found. You should check consistency of the %s - %sR tables.\nRunning 'bibrank --repair' is recommended.""" \
% (self.tablename, self.tablename[:-1]))
raise StandardError
def fix_recID_range(self, low, high):
"""Try to fix reverse index database consistency (e.g. table rnkWORD01R) in the low,high doc-id range.
Possible states for a recID follow:
CUR TMP FUT: very bad things have happened: warn!
CUR TMP : very bad things have happened: warn!
CUR FUT: delete FUT (crash before flushing)
CUR : database is ok
TMP FUT: add TMP to memory and del FUT from memory
flush (revert to old state)
TMP : very bad things have happened: warn!
FUT: very bad things have happended: warn!
"""
state = {}
query = "SELECT id_bibrec,type FROM %sR WHERE id_bibrec BETWEEN '%d' AND '%d'"\
% (self.tablename[:-1], low, high)
res = run_sql(query)
for row in res:
if not state.has_key(row[0]):
state[row[0]]=[]
state[row[0]].append(row[1])
ok = 1 # will hold info on whether we will be able to repair
for recID in state.keys():
if not 'TEMPORARY' in state[recID]:
if 'FUTURE' in state[recID]:
if 'CURRENT' not in state[recID]:
write_message("EMERGENCY: Index record %d is in inconsistent state. Can't repair it" % recID)
ok = 0
else:
write_message("EMERGENCY: Inconsistency in index record %d detected" % recID)
query = """DELETE FROM %sR
WHERE id_bibrec='%d'""" % (self.tablename[:-1], recID)
run_sql(query)
write_message("EMERGENCY: Inconsistency in index record %d repaired." % recID)
else:
if 'FUTURE' in state[recID] and not 'CURRENT' in state[recID]:
self.recIDs_in_mem.append([recID,recID])
# Get the words file
query = """SELECT type,termlist FROM %sR
WHERE id_bibrec='%d'""" % (self.tablename[:-1], recID)
write_message(query, verbose=9)
res = run_sql(query)
for row in res:
wlist = deserialize_via_marshal(row[1])
write_message("Words are %s " % wlist, verbose=9)
if row[0] == 'TEMPORARY':
sign = 1
else:
sign = -1
for word in wlist:
self.put(recID, word, wlist[word])
else:
write_message("EMERGENCY: %s for %d is in inconsistent state. Couldn't repair it." % (self.tablename, recID))
ok = 0
if not ok:
write_message("""EMERGENCY: Unrepairable errors found. You should check consistency
of the %s - %sR tables. Deleting affected TEMPORARY and FUTURE entries
from these tables is recommended; see the BibIndex Admin Guide.
(The repairing procedure is similar for bibrank word indexes.)""" % (self.tablename, self.tablename[:-1]))
raise StandardError
def word_index(run):
"""Run the indexing task. The row argument is the BibSched task
queue row, containing if, arguments, etc.
Return 1 in case of success and 0 in case of failure.
"""
global languages
max_recid = 0
res = run_sql("SELECT max(id) FROM bibrec")
if res and res[0][0]:
max_recid = int(res[0][0])
options["run"] = []
options["run"].append(run)
for rank_method_code in options["run"]:
task_sleep_now_if_required(can_stop_too=True)
method_starting_time = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime())
write_message("Running rank method: %s" % getName(rank_method_code))
try:
file = CFG_ETCDIR + "/bibrank/" + rank_method_code + ".cfg"
config = ConfigParser.ConfigParser()
config.readfp(open(file))
except StandardError, e:
write_message("Cannot find configurationfile: %s" % file, sys.stderr)
raise StandardError
options["current_run"] = rank_method_code
options["modified_words"] = {}
options["table"] = config.get(config.get("rank_method", "function"), "table")
options["use_stemming"] = config.get(config.get("rank_method","function"),"stemming")
options["remove_stopword"] = config.get(config.get("rank_method","function"),"stopword")
tags = get_tags(config) #get the tags to include
options["validset"] = get_valid_range(rank_method_code) #get the records from the collections the method is enabled for
function = config.get("rank_method","function")
wordTable = WordTable(options["table"], tags)
wordTable.report_on_table_consistency()
try:
if task_get_option("cmd") == "del":
if task_get_option("id"):
wordTable.del_recIDs(task_get_option("id"))
task_sleep_now_if_required(can_stop_too=True)
elif task_get_option("collection"):
l_of_colls = task_get_option("collection").split(",")
recIDs = perform_request_search(c=l_of_colls)
recIDs_range = []
for recID in recIDs:
recIDs_range.append([recID,recID])
wordTable.del_recIDs(recIDs_range)
task_sleep_now_if_required(can_stop_too=True)
else:
write_message("Missing IDs of records to delete from index %s.", wordTable.tablename,
sys.stderr)
raise StandardError
elif task_get_option("cmd") == "add":
if task_get_option("id"):
wordTable.add_recIDs(task_get_option("id"))
task_sleep_now_if_required(can_stop_too=True)
elif task_get_option("collection"):
l_of_colls = task_get_option("collection").split(",")
recIDs = perform_request_search(c=l_of_colls)
recIDs_range = []
for recID in recIDs:
recIDs_range.append([recID,recID])
wordTable.add_recIDs(recIDs_range)
task_sleep_now_if_required(can_stop_too=True)
elif task_get_option("last_updated"):
wordTable.add_recIDs_by_date("")
# only update last_updated if run via automatic mode:
wordTable.update_last_updated(rank_method_code, method_starting_time)
task_sleep_now_if_required(can_stop_too=True)
elif task_get_option("modified"):
wordTable.add_recIDs_by_date(task_get_option("modified"))
task_sleep_now_if_required(can_stop_too=True)
else:
wordTable.add_recIDs([[0,max_recid]])
task_sleep_now_if_required(can_stop_too=True)
elif task_get_option("cmd") == "repair":
wordTable.repair()
check_rnkWORD(options["table"])
task_sleep_now_if_required(can_stop_too=True)
elif task_get_option("cmd") == "check":
check_rnkWORD(options["table"])
options["modified_words"] = {}
task_sleep_now_if_required(can_stop_too=True)
elif task_get_option("cmd") == "stat":
rank_method_code_statistics(options["table"])
task_sleep_now_if_required(can_stop_too=True)
else:
write_message("Invalid command found processing %s" % \
wordTable.tablename, sys.stderr)
raise StandardError
update_rnkWORD(options["table"], options["modified_words"])
task_sleep_now_if_required(can_stop_too=True)
except StandardError, e:
register_exception(alert_admin=True)
write_message("Exception caught: %s" % e, sys.stderr)
sys.exit(1)
wordTable.report_on_table_consistency()
# We are done. State it in the database, close and quit
return 1
def get_tags(config):
"""Get the tags that should be used creating the index and each tag's parameter"""
tags = []
function = config.get("rank_method","function")
i = 1
shown_error = 0
#try:
if 1:
while config.has_option(function,"tag%s"% i):
tag = config.get(function, "tag%s" % i)
tag = tag.split(",")
tag[1] = int(tag[1].strip())
tag[2] = tag[2].strip()
#check if stemmer for language is available
if config.get(function, "stemming") and stem("information", "en") != "inform":
if shown_error == 0:
write_message("Warning: Stemming not working. Please check it out!")
shown_error = 1
elif tag[2] and tag[2] != "none" and config.get(function,"stemming") and not is_stemmer_available_for_language(tag[2]):
write_message("Warning: Stemming not available for language '%s'." % tag[2])
tags.append(tag)
i += 1
#except Exception:
# write_message("Could not read data from configuration file, please check for errors")
# raise StandardError
return tags
def get_valid_range(rank_method_code):
"""Returns which records are valid for this rank method, according to which collections it is enabled for."""
#if options["verbose"] >=9:
# write_message("Getting records from collections enabled for rank method.")
#res = run_sql("SELECT collection.name FROM collection,collection_rnkMETHOD,rnkMETHOD WHERE collection.id=id_collection and id_rnkMETHOD=rnkMETHOD.id and rnkMETHOD.name='%s'" % rank_method_code)
#l_of_colls = []
#for coll in res:
# l_of_colls.append(coll[0])
#if len(l_of_colls) > 0:
# recIDs = perform_request_search(c=l_of_colls)
#else:
# recIDs = []
valid = intbitset(trailing_bits=1)
valid.discard(0)
#valid.addlist(recIDs)
return valid
def check_term(term, termlength):
"""Check if term contains not allowed characters, or for any other reasons for not using this term."""
try:
if len(term) <= termlength:
return False
reg = re.compile(r"[1234567890\!\"\#\$\%\&\'\(\)\*\+\,\-\.\/\:\;\<\=\>\?\@\[\\\]\^\_\`\{\|\}\~]")
if re.search(reg, term):
return False
term = str.replace(term, "-", "")
term = str.replace(term, ".", "")
term = str.replace(term, ",", "")
if int(term):
return False
except StandardError, e:
pass
return True
def check_rnkWORD(table):
"""Checks for any problems in rnkWORD tables."""
i = 0
errors = {}
termslist = run_sql("SELECT term FROM %s" % table)
N = run_sql("select max(id_bibrec) from %sR" % table[:-1])[0][0]
write_message("Checking integrity of rank values in %s" % table)
terms = map(lambda x: x[0], termslist)
while i < len(terms):
query_params = ()
for j in range(i, ((i+5000)< len(terms) and (i+5000) or len(terms))):
query_params += (terms[j],)
terms_docs = run_sql("SELECT term, hitlist FROM %s WHERE term IN (%s)" % (table, (len(query_params)*"%s,")[:-1]),
query_params)
for (t, hitlist) in terms_docs:
term_docs = deserialize_via_marshal(hitlist)
if (term_docs.has_key("Gi") and term_docs["Gi"][1] == 0) or not term_docs.has_key("Gi"):
write_message("ERROR: Missing value for term: %s (%s) in %s: %s" % (t, repr(t), table, len(term_docs)))
errors[t] = 1
i += 5000
write_message("Checking integrity of rank values in %sR" % table[:-1])
i = 0
while i < N:
docs_terms = run_sql("SELECT id_bibrec, termlist FROM %sR WHERE id_bibrec>=%s and id_bibrec<=%s" % (table[:-1], i, i+5000))
for (j, termlist) in docs_terms:
termlist = deserialize_via_marshal(termlist)
for (t, tf) in termlist.iteritems():
if tf[1] == 0 and not errors.has_key(t):
errors[t] = 1
write_message("ERROR: Gi missing for record %s and term: %s (%s) in %s" % (j,t,repr(t), table))
terms_docs = run_sql("SELECT term, hitlist FROM %s WHERE term=%%s" % table, (t,))
termlist = deserialize_via_marshal(terms_docs[0][1])
i += 5000
if len(errors) == 0:
write_message("No direct errors found, but nonconsistent data may exist.")
else:
write_message("%s errors found during integrity check, repair and rebalancing recommended." % len(errors))
options["modified_words"] = errors
def rank_method_code_statistics(table):
"""Shows some statistics about this rank method."""
maxID = run_sql("select max(id) from %s" % table)
maxID = maxID[0][0]
terms = {}
Gi = {}
write_message("Showing statistics of terms in index:")
write_message("Important: For the 'Least used terms', the number of terms is shown first, and the number of occurences second.")
write_message("Least used terms---Most important terms---Least important terms")
i = 0
while i < maxID:
terms_docs=run_sql("SELECT term, hitlist FROM %s WHERE id>= %s and id < %s" % (table, i, i + 10000))
for (t, hitlist) in terms_docs:
term_docs=deserialize_via_marshal(hitlist)
terms[len(term_docs)] = terms.get(len(term_docs), 0) + 1
if term_docs.has_key("Gi"):
Gi[t] = term_docs["Gi"]
i=i + 10000
terms=terms.items()
terms.sort(lambda x, y: cmp(y[1], x[1]))
Gi=Gi.items()
Gi.sort(lambda x, y: cmp(y[1], x[1]))
for i in range(0, 20):
write_message("%s/%s---%s---%s" % (terms[i][0],terms[i][1], Gi[i][0],Gi[len(Gi) - i - 1][0]))
def update_rnkWORD(table, terms):
"""Updates rnkWORDF and rnkWORDR with Gi and Nj values. For each term in rnkWORDF, a Gi value for the term is added. And for each term in each document, the Nj value for that document is added. In rnkWORDR, the Gi value for each term in each document is added. For description on how things are computed, look in the hacking docs.
table - name of forward index to update
terms - modified terms"""
stime = time.time()
Gi = {}
Nj = {}
N = run_sql("select count(id_bibrec) from %sR" % table[:-1])[0][0]
if len(terms) == 0 and task_get_option("quick") == "yes":
write_message("No terms to process, ending...")
return ""
elif task_get_option("quick") == "yes": #not used -R option, fast calculation (not accurate)
write_message("Beginning post-processing of %s terms" % len(terms))
#Locating all documents related to the modified/new/deleted terms, if fast update,
#only take into account new/modified occurences
write_message("Phase 1: Finding records containing modified terms")
terms = terms.keys()
i = 0
while i < len(terms):
terms_docs = get_from_forward_index(terms, i, (i+5000), table)
for (t, hitlist) in terms_docs:
term_docs = deserialize_via_marshal(hitlist)
if term_docs.has_key("Gi"):
del term_docs["Gi"]
for (j, tf) in term_docs.iteritems():
if (task_get_option("quick") == "yes" and tf[1] == 0) or task_get_option("quick") == "no":
Nj[j] = 0
write_message("Phase 1: ......processed %s/%s terms" % ((i+5000>len(terms) and len(terms) or (i+5000)), len(terms)))
i += 5000
write_message("Phase 1: Finished finding records containing modified terms")
#Find all terms in the records found in last phase
write_message("Phase 2: Finding all terms in affected records")
records = Nj.keys()
i = 0
while i < len(records):
docs_terms = get_from_reverse_index(records, i, (i + 5000), table)
for (j, termlist) in docs_terms:
doc_terms = deserialize_via_marshal(termlist)
for (t, tf) in doc_terms.iteritems():
Gi[t] = 0
write_message("Phase 2: ......processed %s/%s records " % ((i+5000>len(records) and len(records) or (i+5000)), len(records)))
i += 5000
write_message("Phase 2: Finished finding all terms in affected records")
else: #recalculate
max_id = run_sql("SELECT MAX(id) FROM %s" % table)
max_id = max_id[0][0]
write_message("Beginning recalculation of %s terms" % max_id)
terms = []
i = 0
while i < max_id:
terms_docs = get_from_forward_index_with_id(i, (i+5000), table)
for (t, hitlist) in terms_docs:
Gi[t] = 0
term_docs = deserialize_via_marshal(hitlist)
if term_docs.has_key("Gi"):
del term_docs["Gi"]
for (j, tf) in term_docs.iteritems():
Nj[j] = 0
write_message("Phase 1: ......processed %s/%s terms" % ((i+5000)>max_id and max_id or (i+5000), max_id))
i += 5000
write_message("Phase 1: Finished finding which records contains which terms")
write_message("Phase 2: Jumping over..already done in phase 1 because of -R option")
terms = Gi.keys()
Gi = {}
i = 0
if task_get_option("quick") == "no":
#Calculating Fi and Gi value for each term
write_message("Phase 3: Calculating importance of all affected terms")
while i < len(terms):
terms_docs = get_from_forward_index(terms, i, (i+5000), table)
for (t, hitlist) in terms_docs:
term_docs = deserialize_via_marshal(hitlist)
if term_docs.has_key("Gi"):
del term_docs["Gi"]
Fi = 0
Gi[t] = 1
for (j, tf) in term_docs.iteritems():
Fi += tf[0]
for (j, tf) in term_docs.iteritems():
if tf[0] != Fi:
Gi[t] = Gi[t] + ((float(tf[0]) / Fi) * math.log(float(tf[0]) / Fi) / math.log(2)) / math.log(N)
write_message("Phase 3: ......processed %s/%s terms" % ((i+5000>len(terms) and len(terms) or (i+5000)), len(terms)))
i += 5000
write_message("Phase 3: Finished calculating importance of all affected terms")
else:
#Using existing Gi value instead of calculating a new one. Missing some accurancy.
write_message("Phase 3: Getting approximate importance of all affected terms")
while i < len(terms):
terms_docs = get_from_forward_index(terms, i, (i+5000), table)
for (t, hitlist) in terms_docs:
term_docs = deserialize_via_marshal(hitlist)
if term_docs.has_key("Gi"):
Gi[t] = term_docs["Gi"][1]
elif len(term_docs) == 1:
Gi[t] = 1
else:
Fi = 0
Gi[t] = 1
for (j, tf) in term_docs.iteritems():
Fi += tf[0]
for (j, tf) in term_docs.iteritems():
if tf[0] != Fi:
Gi[t] = Gi[t] + ((float(tf[0]) / Fi) * math.log(float(tf[0]) / Fi) / math.log(2)) / math.log(N)
write_message("Phase 3: ......processed %s/%s terms" % ((i+5000>len(terms) and len(terms) or (i+5000)), len(terms)))
i += 5000
write_message("Phase 3: Finished getting approximate importance of all affected terms")
write_message("Phase 4: Calculating normalization value for all affected records and updating %sR" % table[:-1])
records = Nj.keys()
i = 0
while i < len(records):
#Calculating the normalization value for each document, and adding the Gi value to each term in each document.
docs_terms = get_from_reverse_index(records, i, (i + 5000), table)
for (j, termlist) in docs_terms:
doc_terms = deserialize_via_marshal(termlist)
try:
for (t, tf) in doc_terms.iteritems():
if Gi.has_key(t):
Nj[j] = Nj.get(j, 0) + math.pow(Gi[t] * (1 + math.log(tf[0])), 2)
Git = int(math.floor(Gi[t]*100))
if Git >= 0:
Git += 1
doc_terms[t] = (tf[0], Git)
else:
Nj[j] = Nj.get(j, 0) + math.pow(tf[1] * (1 + math.log(tf[0])), 2)
Nj[j] = 1.0 / math.sqrt(Nj[j])
Nj[j] = int(Nj[j] * 100)
if Nj[j] >= 0:
Nj[j] += 1
run_sql("UPDATE %sR SET termlist=%%s WHERE id_bibrec=%%s" % table[:-1],
(serialize_via_marshal(doc_terms), j))
except (ZeroDivisionError, OverflowError), e:
## This is to try to isolate division by zero errors.
register_exception(prefix="Error when analysing the record %s (%s): %s\n" % (j, repr(docs_terms), e), alert_admin=True)
write_message("Phase 4: ......processed %s/%s records" % ((i+5000>len(records) and len(records) or (i+5000)), len(records)))
i += 5000
write_message("Phase 4: Finished calculating normalization value for all affected records and updating %sR" % table[:-1])
write_message("Phase 5: Updating %s with new normalization values" % table)
i = 0
terms = Gi.keys()
while i < len(terms):
#Adding the Gi value to each term, and adding the normalization value to each term in each document.
terms_docs = get_from_forward_index(terms, i, (i+5000), table)
for (t, hitlist) in terms_docs:
try:
term_docs = deserialize_via_marshal(hitlist)
if term_docs.has_key("Gi"):
del term_docs["Gi"]
for (j, tf) in term_docs.iteritems():
if Nj.has_key(j):
term_docs[j] = (tf[0], Nj[j])
Git = int(math.floor(Gi[t]*100))
if Git >= 0:
Git += 1
term_docs["Gi"] = (0, Git)
run_sql("UPDATE %s SET hitlist=%%s WHERE term=%%s" % table,
(serialize_via_marshal(term_docs), t))
except (ZeroDivisionError, OverflowError), e:
register_exception(prefix="Error when analysing the term %s (%s): %s\n" % (t, repr(terms_docs), e), alert_admin=True)
write_message("Phase 5: ......processed %s/%s terms" % ((i+5000>len(terms) and len(terms) or (i+5000)), len(terms)))
i += 5000
write_message("Phase 5: Finished updating %s with new normalization values" % table)
write_message("Time used for post-processing: %.1fmin" % ((time.time() - stime) / 60))
write_message("Finished post-processing")
def get_from_forward_index(terms, start, stop, table):
terms_docs = ()
for j in range(start, (stop < len(terms) and stop or len(terms))):
terms_docs += run_sql("SELECT term, hitlist FROM %s WHERE term=%%s" % table,
(terms[j],))
return terms_docs
def get_from_forward_index_with_id(start, stop, table):
terms_docs = run_sql("SELECT term, hitlist FROM %s WHERE id BETWEEN %s AND %s" % (table, start, stop))
return terms_docs
def get_from_reverse_index(records, start, stop, table):
current_recs = "%s" % records[start:stop]
current_recs = current_recs[1:-1]
docs_terms = run_sql("SELECT id_bibrec, termlist FROM %sR WHERE id_bibrec IN (%s)" % (table[:-1], current_recs))
return docs_terms
#def test_word_separators(phrase="hep-th/0101001"):
#"""Tests word separating policy on various input."""
#print "%s:" % phrase
#gwfp = get_words_from_phrase(phrase)
#for (word, count) in gwfp.iteritems():
#print "\t-> %s - %s" % (word, count)
def getName(methname, ln=CFG_SITE_LANG, type='ln'):
"""Returns the name of the rank method, either in default language or given language.
methname = short name of the method
ln - the language to get the name in
type - which name "type" to get."""
try:
rnkid = run_sql("SELECT id FROM rnkMETHOD where name='%s'" % methname)
if rnkid:
rnkid = str(rnkid[0][0])
res = run_sql("SELECT value FROM rnkMETHODNAME where type='%s' and ln='%s' and id_rnkMETHOD=%s" % (type, ln, rnkid))
if not res:
res = run_sql("SELECT value FROM rnkMETHODNAME WHERE ln='%s' and id_rnkMETHOD=%s and type='%s'" % (CFG_SITE_LANG, rnkid, type))
if not res:
return methname
return res[0][0]
else:
raise Exception
except Exception, e:
write_message("Cannot run rank method, either given code for method is wrong, or it has not been added using the webinterface.")
raise Exception
def word_similarity(run):
"""Call correct method"""
return word_index(run)
diff --git a/invenio/legacy/bibsched/bibtask.py b/invenio/legacy/bibsched/bibtask.py
index 985df75c0..64a04bc38 100644
--- a/invenio/legacy/bibsched/bibtask.py
+++ b/invenio/legacy/bibsched/bibtask.py
@@ -1,1186 +1,1186 @@
# -*- coding: utf-8 -*-
##
## This file is part of Invenio.
## Copyright (C) 2007, 2008, 2009, 2010, 2011, 2012 CERN.
##
## Invenio is free software; you can redistribute it and/or
## modify it under the terms of the GNU General Public License as
## published by the Free Software Foundation; either version 2 of the
## License, or (at your option) any later version.
##
## Invenio is distributed in the hope that it will be useful, but
## WITHOUT ANY WARRANTY; without even the implied warranty of
## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
## General Public License for more details.
##
## You should have received a copy of the GNU General Public License
## along with Invenio; if not, write to the Free Software Foundation, Inc.,
## 59 Temple Place, Suite 330, Boston, MA 02111-1307, USA.
"""Invenio Bibliographic Task Class.
BibTask class.
A BibTask is an executable under CFG_BINDIR, whose name is stored in
bibtask_config.CFG_BIBTASK_VALID_TASKS.
A valid task must call the task_init function with the proper parameters.
Generic task related parameters (user, sleeptime, runtime, task_id, task_name
verbose)
go to _TASK_PARAMS global dictionary accessible through task_get_task_param.
Option specific to the particular BibTask go to _OPTIONS global dictionary
and are accessible via task_get_option/task_set_option.
In order to log something properly, just use write_message(s) with the desired
verbose level.
task_update_status and task_update_progress can be used to update the status
of the task (DONE, FAILED, DONE WITH ERRORS...) and it's progress
(1 out 100..) within the bibsched monitor.
It is possible to enqueue a BibTask via API call by means of
task_low_level_submission.
"""
__revision__ = "$Id$"
import getopt
import getpass
import marshal
import os
import pwd
import re
import signal
import sys
import time
import datetime
import traceback
import logging
import logging.handlers
import random
from socket import gethostname
from invenio.dbquery import run_sql, _db_login
from invenio.access_control_engine import acc_authorize_action
from invenio.config import CFG_PREFIX, CFG_BINDIR, CFG_LOGDIR, \
CFG_BIBSCHED_PROCESS_USER, CFG_TMPDIR, CFG_SITE_SUPPORT_EMAIL
-from invenio.errorlib import register_exception
+from invenio.ext.logging import register_exception
from invenio.access_control_config import CFG_EXTERNAL_AUTH_USING_SSO, \
CFG_EXTERNAL_AUTHENTICATION
from invenio.webuser import get_user_preferences, get_email
from invenio.bibtask_config import CFG_BIBTASK_VALID_TASKS, \
CFG_BIBTASK_DEFAULT_TASK_SETTINGS, CFG_BIBTASK_FIXEDTIMETASKS
from invenio.utils.date import parse_runtime_limit
from invenio.shellutils import escape_shell_arg
from invenio.ext.email import send_email
from invenio.bibsched import bibsched_set_host, \
bibsched_get_host
# Global _TASK_PARAMS dictionary.
_TASK_PARAMS = {
'version': '',
'task_stop_helper_fnc': None,
'task_name': os.path.basename(sys.argv[0]),
'task_specific_name': '',
'task_id': 0,
'user': '',
# If the task is not initialized (usually a developer debugging
# a single method), output all messages.
'verbose': 9,
'sleeptime': '',
'runtime': time.strftime("%Y-%m-%d %H:%M:%S"),
'priority': 0,
'runtime_limit': None,
'profile': [],
'post-process': [],
'sequence-id':None,
'stop_queue_on_error': False,
'fixed_time': False,
'email_logs_to': [],
}
# Global _OPTIONS dictionary.
_OPTIONS = {}
# Which tasks don't need to ask the user for authorization?
CFG_VALID_PROCESSES_NO_AUTH_NEEDED = ("bibupload", )
CFG_TASK_IS_NOT_A_DEAMON = ("bibupload", )
def fix_argv_paths(paths, argv=None):
"""Given the argv vector of cli parameters, and a list of path that
can be relative and may have been specified within argv,
it substitute all the occurencies of these paths in argv.
argv is changed in place and returned.
"""
if argv is None:
argv = sys.argv
for path in paths:
for count in xrange(len(argv)):
if path == argv[count]:
argv[count] = os.path.abspath(path)
return argv
def task_low_level_submission(name, user, *argv):
"""Let special lowlevel enqueuing of a task on the bibsche queue.
@param name: is the name of the bibtask. It must be a valid executable under
C{CFG_BINDIR}.
@type name: string
@param user: is a string that will appear as the "user" submitting the task.
Since task are submitted via API it make sense to set the
user to the name of the module/function that called
task_low_level_submission.
@type user: string
@param argv: are all the additional CLI parameters that would have been
passed on the CLI (one parameter per variable).
e.g.:
>>> task_low_level_submission('bibupload', 'admin', '-a', '/tmp/z.xml')
@type: strings
@return: the task identifier when the task is correctly enqueued.
@rtype: int
@note: use absolute paths in argv
"""
def get_priority(argv):
"""Try to get the priority by analysing the arguments."""
priority = 0
argv = list(argv)
while True:
try:
opts, args = getopt.gnu_getopt(argv, 'P:', ['priority='])
except getopt.GetoptError, err:
## We remove one by one all the non recognized parameters
if len(err.opt) > 1:
argv = [arg for arg in argv if arg != '--%s' % err.opt and not arg.startswith('--%s=' % err.opt)]
else:
argv = [arg for arg in argv if not arg.startswith('-%s' % err.opt)]
else:
break
for opt in opts:
if opt[0] in ('-P', '--priority'):
try:
priority = int(opt[1])
except ValueError:
pass
return priority
def get_special_name(argv):
"""Try to get the special name by analysing the arguments."""
special_name = ''
argv = list(argv)
while True:
try:
opts, args = getopt.gnu_getopt(argv, 'N:', ['name='])
except getopt.GetoptError, err:
## We remove one by one all the non recognized parameters
if len(err.opt) > 1:
argv = [arg for arg in argv if arg != '--%s' % err.opt and not arg.startswith('--%s=' % err.opt)]
else:
argv = [arg for arg in argv if not arg.startswith('-%s' % err.opt)]
else:
break
for opt in opts:
if opt[0] in ('-N', '--name'):
special_name = opt[1]
return special_name
def get_runtime(argv):
"""Try to get the runtime by analysing the arguments."""
runtime = time.strftime("%Y-%m-%d %H:%M:%S")
argv = list(argv)
while True:
try:
opts, args = getopt.gnu_getopt(argv, 't:', ['runtime='])
except getopt.GetoptError, err:
## We remove one by one all the non recognized parameters
if len(err.opt) > 1:
argv = [arg for arg in argv if arg != '--%s' % err.opt and not arg.startswith('--%s=' % err.opt)]
else:
argv = [arg for arg in argv if not arg.startswith('-%s' % err.opt)]
else:
break
for opt in opts:
if opt[0] in ('-t', '--runtime'):
try:
runtime = get_datetime(opt[1])
except ValueError:
pass
return runtime
def get_sleeptime(argv):
"""Try to get the runtime by analysing the arguments."""
sleeptime = ""
argv = list(argv)
while True:
try:
opts, args = getopt.gnu_getopt(argv, 's:', ['sleeptime='])
except getopt.GetoptError, err:
## We remove one by one all the non recognized parameters
if len(err.opt) > 1:
argv = [arg for arg in argv if arg != '--%s' % err.opt and not arg.startswith('--%s=' % err.opt)]
else:
argv = [arg for arg in argv if not arg.startswith('-%s' % err.opt)]
else:
break
for opt in opts:
if opt[0] in ('-s', '--sleeptime'):
try:
sleeptime = opt[1]
except ValueError:
pass
return sleeptime
def get_sequenceid(argv):
"""Try to get the sequenceid by analysing the arguments."""
sequenceid = None
argv = list(argv)
while True:
try:
opts, args = getopt.gnu_getopt(argv, 'I:', ['sequence-id='])
except getopt.GetoptError, err:
## We remove one by one all the non recognized parameters
if len(err.opt) > 1:
argv = [arg for arg in argv if arg != '--%s' % err.opt and not arg.startswith('--%s=' % err.opt)]
else:
argv = [arg for arg in argv if not arg.startswith('-%s' % err.opt)]
else:
break
for opt in opts:
if opt[0] in ('-I', '--sequence-id'):
try:
sequenceid = opt[1]
except ValueError:
pass
return sequenceid
task_id = None
try:
if not name in CFG_BIBTASK_VALID_TASKS:
raise StandardError('%s is not a valid task name' % name)
new_argv = []
for arg in argv:
if isinstance(arg, unicode):
arg = arg.encode('utf8')
new_argv.append(arg)
argv = new_argv
priority = get_priority(argv)
special_name = get_special_name(argv)
runtime = get_runtime(argv)
sleeptime = get_sleeptime(argv)
sequenceid = get_sequenceid(argv)
argv = tuple([os.path.join(CFG_BINDIR, name)] + list(argv))
if special_name:
name = '%s:%s' % (name, special_name)
verbose_argv = 'Will execute: %s' % ' '.join([escape_shell_arg(str(arg)) for arg in argv])
## submit task:
task_id = run_sql("""INSERT INTO schTASK (proc,user,
runtime,sleeptime,status,progress,arguments,priority,sequenceid)
VALUES (%s,%s,%s,%s,'WAITING',%s,%s,%s,%s)""",
(name, user, runtime, sleeptime, verbose_argv[:254], marshal.dumps(argv), priority, sequenceid))
except Exception:
register_exception(alert_admin=True)
if task_id:
run_sql("""DELETE FROM schTASK WHERE id=%s""", (task_id, ))
raise
return task_id
def bibtask_allocate_sequenceid(curdir=None):
"""
Returns an almost unique number to be used a task sequence ID.
In WebSubmit functions, set C{curdir} to the curdir (!) to read
the shared sequence ID for all functions of this submission (reading
"access number").
@param curdir: in WebSubmit functions (ONLY) the value retrieved
from the curdir parameter of the function
@return: an integer for the sequence ID. 0 is returned if the
sequence ID could not be allocated
@rtype: int
"""
if curdir:
try:
fd = file(os.path.join(curdir, 'access'), "r")
access = fd.readline().strip()
fd.close()
return access.replace("_", "")[-9:]
except:
return 0
else:
return random.randrange(1, 4294967296)
def setup_loggers(task_id=None):
"""Sets up the logging system."""
logger = logging.getLogger()
for handler in logger.handlers:
## Let's clean the handlers in case some piece of code has already
## fired any write_message, i.e. any call to debug, info, etc.
## which triggered a call to logging.basicConfig()
logger.removeHandler(handler)
formatter = logging.Formatter('%(asctime)s --> %(message)s', '%Y-%m-%d %H:%M:%S')
if task_id is not None:
err_logger = logging.handlers.RotatingFileHandler(os.path.join(CFG_LOGDIR, 'bibsched_task_%d.err' % _TASK_PARAMS['task_id']), 'a', 1*1024*1024, 10)
log_logger = logging.handlers.RotatingFileHandler(os.path.join(CFG_LOGDIR, 'bibsched_task_%d.log' % _TASK_PARAMS['task_id']), 'a', 1*1024*1024, 10)
log_logger.setFormatter(formatter)
log_logger.setLevel(logging.DEBUG)
err_logger.setFormatter(formatter)
err_logger.setLevel(logging.WARNING)
logger.addHandler(err_logger)
logger.addHandler(log_logger)
stdout_logger = logging.StreamHandler(sys.stdout)
stdout_logger.setFormatter(formatter)
stdout_logger.setLevel(logging.DEBUG)
stderr_logger = logging.StreamHandler(sys.stderr)
stderr_logger.setFormatter(formatter)
stderr_logger.setLevel(logging.WARNING)
logger.addHandler(stderr_logger)
logger.addHandler(stdout_logger)
logger.setLevel(logging.INFO)
return logger
def task_init(
authorization_action="",
authorization_msg="",
description="",
help_specific_usage="",
version=__revision__,
specific_params=("", []),
task_stop_helper_fnc=None,
task_submit_elaborate_specific_parameter_fnc=None,
task_submit_check_options_fnc=None,
task_run_fnc=None):
""" Initialize a BibTask.
@param authorization_action: is the name of the authorization action
connected with this task;
@param authorization_msg: is the header printed when asking for an
authorization password;
@param description: is the generic description printed in the usage page;
@param help_specific_usage: is the specific parameter help
@param task_stop_fnc: is a function that will be called
whenever the task is stopped
@param task_submit_elaborate_specific_parameter_fnc: will be called passing
a key and a value, for parsing specific cli parameters. Must return True if
it has recognized the parameter. Must eventually update the options with
bibtask_set_option;
@param task_submit_check_options: must check the validity of options (via
bibtask_get_option) once all the options where parsed;
@param task_run_fnc: will be called as the main core function. Must return
False in case of errors.
"""
global _TASK_PARAMS, _OPTIONS
_TASK_PARAMS = {
"version" : version,
"task_stop_helper_fnc" : task_stop_helper_fnc,
"task_name" : os.path.basename(sys.argv[0]),
"task_specific_name" : '',
"user" : '',
"verbose" : 1,
"sleeptime" : '',
"runtime" : time.strftime("%Y-%m-%d %H:%M:%S"),
"priority" : 0,
"runtime_limit" : None,
"profile" : [],
"post-process": [],
"sequence-id": None,
"stop_queue_on_error": False,
"fixed_time": False,
}
to_be_submitted = True
if len(sys.argv) == 2 and sys.argv[1].isdigit():
_TASK_PARAMS['task_id'] = int(sys.argv[1])
argv = _task_get_options(_TASK_PARAMS['task_id'], _TASK_PARAMS['task_name'])
to_be_submitted = False
else:
argv = sys.argv
setup_loggers(_TASK_PARAMS.get('task_id'))
task_name = os.path.basename(sys.argv[0])
if task_name not in CFG_BIBTASK_VALID_TASKS or os.path.realpath(os.path.join(CFG_BINDIR, task_name)) != os.path.realpath(sys.argv[0]):
raise OSError("%s is not in the allowed modules" % sys.argv[0])
- from invenio.errorlib import wrap_warn
+ from invenio.ext.logging import wrap_warn
wrap_warn()
if type(argv) is dict:
# FIXME: REMOVE AFTER MAJOR RELEASE 1.0
# This is needed for old task submitted before CLI parameters
# where stored in DB and _OPTIONS dictionary was stored instead.
_OPTIONS = argv
else:
try:
_task_build_params(_TASK_PARAMS['task_name'], argv, description,
help_specific_usage, version, specific_params,
task_submit_elaborate_specific_parameter_fnc,
task_submit_check_options_fnc)
except (SystemExit, Exception), err:
if not to_be_submitted:
register_exception(alert_admin=True)
write_message("Error in parsing the parameters: %s." % err, sys.stderr)
write_message("Exiting.", sys.stderr)
task_update_status("ERROR")
raise
write_message('argv=%s' % (argv, ), verbose=9)
write_message('_OPTIONS=%s' % (_OPTIONS, ), verbose=9)
write_message('_TASK_PARAMS=%s' % (_TASK_PARAMS, ), verbose=9)
if to_be_submitted:
_task_submit(argv, authorization_action, authorization_msg)
else:
try:
try:
if task_get_task_param('profile'):
try:
from cStringIO import StringIO
import pstats
filename = os.path.join(CFG_TMPDIR, 'bibsched_task_%s.pyprof' % _TASK_PARAMS['task_id'])
existing_sorts = pstats.Stats.sort_arg_dict_default.keys()
required_sorts = []
profile_dump = []
for sort in task_get_task_param('profile'):
if sort not in existing_sorts:
sort = 'cumulative'
if sort not in required_sorts:
required_sorts.append(sort)
if sys.hexversion < 0x02050000:
import hotshot
import hotshot.stats
pr = hotshot.Profile(filename)
ret = pr.runcall(_task_run, task_run_fnc)
for sort_type in required_sorts:
tmp_out = sys.stdout
sys.stdout = StringIO()
hotshot.stats.load(filename).strip_dirs().sort_stats(sort_type).print_stats()
# pylint: disable=E1103
# This is a hack. sys.stdout is a StringIO in this case.
profile_dump.append(sys.stdout.getvalue())
# pylint: enable=E1103
sys.stdout = tmp_out
else:
import cProfile
pr = cProfile.Profile()
ret = pr.runcall(_task_run, task_run_fnc)
pr.dump_stats(filename)
for sort_type in required_sorts:
strstream = StringIO()
pstats.Stats(filename, stream=strstream).strip_dirs().sort_stats(sort_type).print_stats()
profile_dump.append(strstream.getvalue())
profile_dump = '\n'.join(profile_dump)
profile_dump += '\nYou can use profile=%s' % existing_sorts
open(os.path.join(CFG_LOGDIR, 'bibsched_task_%d.log' % _TASK_PARAMS['task_id']), 'a').write("%s" % profile_dump)
os.remove(filename)
except ImportError:
ret = _task_run(task_run_fnc)
write_message("ERROR: The Python Profiler is not installed!", stream=sys.stderr)
else:
ret = _task_run(task_run_fnc)
if not ret:
write_message("Error occurred. Exiting.", sys.stderr)
except Exception, e:
register_exception(alert_admin=True)
write_message("Unexpected error occurred: %s." % e, sys.stderr)
write_message("Traceback is:", sys.stderr)
write_messages(''.join(traceback.format_tb(sys.exc_info()[2])), sys.stderr)
write_message("Exiting.", sys.stderr)
task_update_status("ERROR")
finally:
_task_email_logs()
logging.shutdown()
def _task_build_params(
task_name,
argv,
description="",
help_specific_usage="",
version=__revision__,
specific_params=("", []),
task_submit_elaborate_specific_parameter_fnc=None,
task_submit_check_options_fnc=None):
""" Build the BibTask params.
@param argv: a list of string as in sys.argv
@param description: is the generic description printed in the usage page;
@param help_specific_usage: is the specific parameter help
@param task_submit_elaborate_specific_parameter_fnc: will be called passing
a key and a value, for parsing specific cli parameters. Must return True if
it has recognized the parameter. Must eventually update the options with
bibtask_set_option;
@param task_submit_check_options: must check the validity of options (via
bibtask_get_option) once all the options where parsed;
"""
global _OPTIONS
_OPTIONS = {}
if task_name in CFG_BIBTASK_DEFAULT_TASK_SETTINGS:
_OPTIONS.update(CFG_BIBTASK_DEFAULT_TASK_SETTINGS[task_name])
# set user-defined options:
try:
(short_params, long_params) = specific_params
opts, args = getopt.gnu_getopt(argv[1:], "hVv:u:s:t:P:N:L:I:" +
short_params, [
"help",
"version",
"verbose=",
"user=",
"sleep=",
"runtime=",
"priority=",
"name=",
"limit=",
"profile=",
"post-process=",
"sequence-id=",
"stop-on-error",
"continue-on-error",
"fixed-time",
"email-logs-to="
] + long_params)
except getopt.GetoptError, err:
_usage(1, err, help_specific_usage=help_specific_usage, description=description)
try:
for opt in opts:
if opt[0] in ("-h", "--help"):
_usage(0, help_specific_usage=help_specific_usage, description=description)
elif opt[0] in ("-V", "--version"):
print _TASK_PARAMS["version"]
sys.exit(0)
elif opt[0] in ("-u", "--user"):
_TASK_PARAMS["user"] = opt[1]
elif opt[0] in ("-v", "--verbose"):
_TASK_PARAMS["verbose"] = int(opt[1])
elif opt[0] in ("-s", "--sleeptime"):
if task_name not in CFG_TASK_IS_NOT_A_DEAMON:
get_datetime(opt[1]) # see if it is a valid shift
_TASK_PARAMS["sleeptime"] = opt[1]
elif opt[0] in ("-t", "--runtime"):
_TASK_PARAMS["runtime"] = get_datetime(opt[1])
elif opt[0] in ("-P", "--priority"):
_TASK_PARAMS["priority"] = int(opt[1])
elif opt[0] in ("-N", "--name"):
_TASK_PARAMS["task_specific_name"] = opt[1]
elif opt[0] in ("-L", "--limit"):
_TASK_PARAMS["runtime_limit"] = parse_runtime_limit(opt[1])
elif opt[0] in ("--profile", ):
_TASK_PARAMS["profile"] += opt[1].split(',')
elif opt[0] in ("--post-process", ):
_TASK_PARAMS["post-process"] += [opt[1]]
elif opt[0] in ("-I","--sequence-id"):
_TASK_PARAMS["sequence-id"] = opt[1]
elif opt[0] in ("--stop-on-error", ):
_TASK_PARAMS["stop_queue_on_error"] = True
elif opt[0] in ("--continue-on-error", ):
_TASK_PARAMS["stop_queue_on_error"] = False
elif opt[0] in ("--fixed-time", ):
_TASK_PARAMS["fixed_time"] = True
elif opt[0] in ("--email-logs-to",):
_TASK_PARAMS["email_logs_to"] = opt[1].split(',')
elif not callable(task_submit_elaborate_specific_parameter_fnc) or \
not task_submit_elaborate_specific_parameter_fnc(opt[0],
opt[1], opts, args):
_usage(1, help_specific_usage=help_specific_usage, description=description)
except StandardError, e:
_usage(e, help_specific_usage=help_specific_usage, description=description)
if callable(task_submit_check_options_fnc):
if not task_submit_check_options_fnc():
_usage(1, help_specific_usage=help_specific_usage, description=description)
def task_set_option(key, value):
"""Set an value to key in the option dictionary of the task"""
global _OPTIONS
try:
_OPTIONS[key] = value
except NameError:
_OPTIONS = {key : value}
def task_get_option(key, default=None):
"""Returns the value corresponding to key in the option dictionary of the task"""
try:
return _OPTIONS.get(key, default)
except NameError:
return default
def task_has_option(key):
"""Map the has_key query to _OPTIONS"""
try:
return _OPTIONS.has_key(key)
except NameError:
return False
def task_get_task_param(key, default=None):
"""Returns the value corresponding to the particular task param"""
try:
return _TASK_PARAMS.get(key, default)
except NameError:
return default
def task_set_task_param(key, value):
"""Set the value corresponding to the particular task param"""
global _TASK_PARAMS
try:
_TASK_PARAMS[key] = value
except NameError:
_TASK_PARAMS = {key : value}
def task_update_progress(msg):
"""Updates progress information in the BibSched task table."""
write_message("Updating task progress to %s." % msg, verbose=9)
if "task_id" in _TASK_PARAMS:
return run_sql("UPDATE schTASK SET progress=%s where id=%s",
(msg, _TASK_PARAMS["task_id"]))
def task_update_status(val):
"""Updates status information in the BibSched task table."""
write_message("Updating task status to %s." % val, verbose=9)
if "task_id" in _TASK_PARAMS:
return run_sql("UPDATE schTASK SET status=%s where id=%s",
(val, _TASK_PARAMS["task_id"]))
def task_read_status():
"""Read status information in the BibSched task table."""
res = run_sql("SELECT status FROM schTASK where id=%s",
(_TASK_PARAMS['task_id'],), 1)
try:
out = res[0][0]
except:
out = 'UNKNOWN'
return out
def write_messages(msgs, stream=None, verbose=1):
"""Write many messages through write_message"""
if stream is None:
stream = sys.stdout
for msg in msgs.split('\n'):
write_message(msg, stream, verbose)
def write_message(msg, stream=None, verbose=1):
"""Write message and flush output stream (may be sys.stdout or sys.stderr).
Useful for debugging stuff.
@note: msg could be a callable with no parameters. In this case it is
been called in order to obtain the string to be printed.
"""
if stream is None:
stream = sys.stdout
if msg and _TASK_PARAMS['verbose'] >= verbose:
if callable(msg):
msg = msg()
if stream == sys.stdout:
logging.info(msg)
elif stream == sys.stderr:
logging.error(msg)
else:
sys.stderr.write("Unknown stream %s. [must be sys.stdout or sys.stderr]\n" % stream)
else:
logging.debug(msg)
_RE_SHIFT = re.compile("([-\+]{0,1})([\d]+)([dhms])")
def get_datetime(var, format_string="%Y-%m-%d %H:%M:%S", now=None):
"""Returns a date string according to the format string.
It can handle normal date strings and shifts with respect
to now."""
date = now or datetime.datetime.now()
factors = {"d": 24 * 3600, "h": 3600, "m": 60, "s": 1}
m = _RE_SHIFT.match(var)
if m:
sign = m.groups()[0] == "-" and -1 or 1
factor = factors[m.groups()[2]]
value = float(m.groups()[1])
delta = sign * factor * value
while delta > 0 and date < datetime.datetime.now():
date = date + datetime.timedelta(seconds=delta)
date = date.strftime(format_string)
else:
date = time.strptime(var, format_string)
date = time.strftime(format_string, date)
return date
def task_sleep_now_if_required(can_stop_too=False):
"""This function should be called during safe state of BibTask,
e.g. after flushing caches or outside of run_sql calls.
"""
status = task_read_status()
write_message('Entering task_sleep_now_if_required with status=%s' % status, verbose=9)
if status == 'ABOUT TO SLEEP':
write_message("sleeping...")
task_update_status("SLEEPING")
signal.signal(signal.SIGTSTP, _task_sig_dumb)
os.kill(os.getpid(), signal.SIGSTOP)
time.sleep(1)
if task_read_status() == 'NOW STOP':
if can_stop_too:
write_message("stopped")
task_update_status("STOPPED")
sys.exit(0)
else:
write_message("stopping as soon as possible...")
task_update_status('ABOUT TO STOP')
else:
write_message("... continuing...")
task_update_status("CONTINUING")
signal.signal(signal.SIGTSTP, _task_sig_sleep)
elif status == 'ABOUT TO STOP':
if can_stop_too:
write_message("stopped")
task_update_status("STOPPED")
sys.exit(0)
else:
## I am a capricious baby. At least I am going to sleep :-)
write_message("sleeping...")
task_update_status("SLEEPING")
signal.signal(signal.SIGTSTP, _task_sig_dumb)
os.kill(os.getpid(), signal.SIGSTOP)
time.sleep(1)
## Putting back the status to "ABOUT TO STOP"
write_message("... continuing...")
task_update_status("ABOUT TO STOP")
signal.signal(signal.SIGTSTP, _task_sig_sleep)
if can_stop_too:
runtime_limit = task_get_option("limit")
if runtime_limit is not None:
if not (runtime_limit[0] <= datetime.datetime.now() <= runtime_limit[1]):
write_message("stopped (outside runtime limit)")
task_update_status("STOPPED")
sys.exit(0)
def authenticate(user, authorization_action, authorization_msg=""):
"""Authenticate the user against the user database.
Check for its password, if it exists.
Check for authorization_action access rights.
Return user name upon authorization success,
do system exit upon authorization failure.
"""
#FIXME
return user
# With SSO it's impossible to check for pwd
if CFG_EXTERNAL_AUTH_USING_SSO or os.path.basename(sys.argv[0]) in CFG_VALID_PROCESSES_NO_AUTH_NEEDED:
return user
if authorization_msg:
print authorization_msg
print "=" * len(authorization_msg)
if user == "":
print >> sys.stdout, "\rUsername: ",
try:
user = sys.stdin.readline().lower().strip()
except EOFError:
sys.stderr.write("\n")
sys.exit(1)
except KeyboardInterrupt:
sys.stderr.write("\n")
sys.exit(1)
else:
print >> sys.stdout, "\rUsername:", user
## first check user:
# p_un passed may be an email or a nickname:
res = run_sql("select id from user where email=%s", (user,), 1) + \
run_sql("select id from user where nickname=%s", (user,), 1)
if not res:
print "Sorry, %s does not exist." % user
sys.exit(1)
else:
uid = res[0][0]
ok = False
login_method = get_user_preferences(uid)['login_method']
if not CFG_EXTERNAL_AUTHENTICATION[login_method]:
#Local authentication, let's see if we want passwords.
res = run_sql("select id from user where id=%s "
"and password=AES_ENCRYPT(email,'')",
(uid,), 1)
if res:
ok = True
if not ok:
try:
password_entered = getpass.getpass()
except EOFError:
sys.stderr.write("\n")
sys.exit(1)
except KeyboardInterrupt:
sys.stderr.write("\n")
sys.exit(1)
if not CFG_EXTERNAL_AUTHENTICATION[login_method]:
res = run_sql("select id from user where id=%s "
"and password=AES_ENCRYPT(email, %s)",
(uid, password_entered), 1)
if res:
ok = True
else:
if CFG_EXTERNAL_AUTHENTICATION[login_method].auth_user(get_email(uid), password_entered):
ok = True
if not ok:
print "Sorry, wrong credentials for %s." % user
sys.exit(1)
else:
## secondly check authorization for the authorization_action:
(auth_code, auth_message) = acc_authorize_action(uid, authorization_action)
if auth_code != 0:
print auth_message
sys.exit(1)
return user
def _task_submit(argv, authorization_action, authorization_msg):
"""Submits task to the BibSched task queue. This is what people will
be invoking via command line."""
## check as whom we want to submit?
check_running_process_user()
## sanity check: remove eventual "task" option:
## authenticate user:
_TASK_PARAMS['user'] = authenticate(_TASK_PARAMS["user"], authorization_action, authorization_msg)
## submit task:
if _TASK_PARAMS['task_specific_name']:
task_name = '%s:%s' % (_TASK_PARAMS['task_name'], _TASK_PARAMS['task_specific_name'])
else:
task_name = _TASK_PARAMS['task_name']
write_message("storing task options %s\n" % argv, verbose=9)
verbose_argv = 'Will execute: %s' % ' '.join([escape_shell_arg(str(arg)) for arg in argv])
_TASK_PARAMS['task_id'] = run_sql("""INSERT INTO schTASK (proc,user,
runtime,sleeptime,status,progress,arguments,priority,sequenceid)
VALUES (%s,%s,%s,%s,'WAITING',%s,%s,%s,%s)""",
(task_name, _TASK_PARAMS['user'], _TASK_PARAMS["runtime"],
_TASK_PARAMS["sleeptime"], verbose_argv, marshal.dumps(argv), _TASK_PARAMS['priority'], _TASK_PARAMS['sequence-id']))
## update task number:
write_message("Task #%d submitted." % _TASK_PARAMS['task_id'])
return _TASK_PARAMS['task_id']
def _task_get_options(task_id, task_name):
"""Returns options for the task 'id' read from the BibSched task
queue table."""
out = {}
res = run_sql("SELECT arguments FROM schTASK WHERE id=%s AND proc LIKE %s",
(task_id, task_name+'%'))
try:
out = marshal.loads(res[0][0])
except:
write_message("Error: %s task %d does not seem to exist." \
% (task_name, task_id), sys.stderr)
task_update_status('ERROR')
sys.exit(1)
write_message('Options retrieved: %s' % (out, ), verbose=9)
return out
def _task_email_logs():
"""
In case this was requested, emails the logs.
"""
email_logs_to = task_get_task_param('email_logs_to')
if not email_logs_to:
return
status = task_read_status()
task_name = task_get_task_param('task_name')
task_specific_name = task_get_task_param('task_specific_name')
if task_specific_name:
task_name += ':' + task_specific_name
runtime = task_get_task_param('runtime')
title = "Execution of %s: %s" % (task_name, status)
body = """
Attached you can find the stdout and stderr logs of the execution of
name: %s
id: %s
runtime: %s
options: %s
status: %s
""" % (task_name, _TASK_PARAMS['task_id'], runtime, _OPTIONS, status)
err_file = os.path.join(CFG_LOGDIR, 'bibsched_task_%d.err' % _TASK_PARAMS['task_id'])
log_file = os.path.join(CFG_LOGDIR, 'bibsched_task_%d.log' % _TASK_PARAMS['task_id'])
return send_email(CFG_SITE_SUPPORT_EMAIL, email_logs_to, title, body, attachments=[(log_file, 'text/plain'), (err_file, 'text/plain')])
def _task_run(task_run_fnc):
"""Runs the task by fetching arguments from the BibSched task queue.
This is what BibSched will be invoking via daemon call.
The task prints Fibonacci numbers for up to NUM on the stdout, and some
messages on stderr.
@param task_run_fnc: will be called as the main core function. Must return
False in case of errors.
Return True in case of success and False in case of failure."""
from invenio.bibtasklet import _TASKLETS
## We prepare the pid file inside /prefix/var/run/taskname_id.pid
check_running_process_user()
try:
pidfile_name = os.path.join(CFG_PREFIX, 'var', 'run',
'bibsched_task_%d.pid' % _TASK_PARAMS['task_id'])
pidfile = open(pidfile_name, 'w')
pidfile.write(str(os.getpid()))
pidfile.close()
except OSError:
register_exception(alert_admin=True)
task_update_status("ERROR")
return False
## check task status:
task_status = task_read_status()
if task_status not in ("WAITING", "SCHEDULED"):
write_message("Error: The task #%d is %s. I expected WAITING or SCHEDULED." %
(_TASK_PARAMS['task_id'], task_status), sys.stderr)
return False
time_now = datetime.datetime.now()
if _TASK_PARAMS['runtime_limit'] is not None and os.environ.get('BIBSCHED_MODE', 'manual') != 'manual':
if not _TASK_PARAMS['runtime_limit'][0][0] <= time_now <= _TASK_PARAMS['runtime_limit'][0][1]:
if time_now <= _TASK_PARAMS['runtime_limit'][0][0]:
new_runtime = _TASK_PARAMS['runtime_limit'][0][0].strftime("%Y-%m-%d %H:%M:%S")
else:
new_runtime = _TASK_PARAMS['runtime_limit'][1][0].strftime("%Y-%m-%d %H:%M:%S")
progress = run_sql("SELECT progress FROM schTASK WHERE id=%s", (_TASK_PARAMS['task_id'], ))
if progress:
progress = progress[0][0]
else:
progress = ''
g = re.match(r'Postponed (\d+) time\(s\)', progress)
if g:
postponed_times = int(g.group(1))
else:
postponed_times = 0
if _TASK_PARAMS['sequence-id']:
## Also postponing other dependent tasks.
run_sql("UPDATE schTASK SET runtime=%s, progress=%s WHERE sequenceid=%s AND status='WAITING'", (new_runtime, 'Postponed as task %s' % _TASK_PARAMS['task_id'], _TASK_PARAMS['sequence-id'])) # kwalitee: disable=sql
run_sql("UPDATE schTASK SET runtime=%s, status='WAITING', progress=%s, host='' WHERE id=%s", (new_runtime, 'Postponed %d time(s)' % (postponed_times + 1), _TASK_PARAMS['task_id'])) # kwalitee: disable=sql
write_message("Task #%d postponed because outside of runtime limit" % _TASK_PARAMS['task_id'])
return True
# Make sure the host field is updated
# It will not be updated properly when we run
# a task from the cli (without using the bibsched monitor)
host = bibsched_get_host(_TASK_PARAMS['task_id'])
if host and host != gethostname():
write_message("Error: The task #%d is bound to %s." %
(_TASK_PARAMS['task_id'], host), sys.stderr)
return False
else:
bibsched_set_host(_TASK_PARAMS['task_id'], gethostname())
## initialize signal handler:
signal.signal(signal.SIGUSR2, signal.SIG_IGN)
signal.signal(signal.SIGTSTP, _task_sig_sleep)
signal.signal(signal.SIGTERM, _task_sig_stop)
signal.signal(signal.SIGQUIT, _task_sig_stop)
signal.signal(signal.SIGABRT, _task_sig_suicide)
signal.signal(signal.SIGINT, _task_sig_stop)
## we can run the task now:
write_message("Task #%d started." % _TASK_PARAMS['task_id'])
task_update_status("RUNNING")
## run the task:
_TASK_PARAMS['task_starting_time'] = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime())
sleeptime = _TASK_PARAMS['sleeptime']
try:
try:
if callable(task_run_fnc) and task_run_fnc():
task_update_status("DONE")
else:
task_update_status("DONE WITH ERRORS")
except SystemExit:
pass
except:
write_message(traceback.format_exc()[:-1])
register_exception(alert_admin=True)
if task_get_task_param('stop_queue_on_error'):
task_update_status("ERROR")
else:
task_update_status("CERROR")
finally:
task_status = task_read_status()
if sleeptime:
argv = _task_get_options(_TASK_PARAMS['task_id'], _TASK_PARAMS['task_name'])
verbose_argv = 'Will execute: %s' % ' '.join([escape_shell_arg(str(arg)) for arg in argv])
# Here we check if the task can shift away of has to be run at
# a fixed time
if task_get_task_param('fixed_time') or _TASK_PARAMS['task_name'] in CFG_BIBTASK_FIXEDTIMETASKS:
old_runtime = run_sql("SELECT runtime FROM schTASK WHERE id=%s", (_TASK_PARAMS['task_id'], ))[0][0]
else:
old_runtime = None
new_runtime = get_datetime(sleeptime, now=old_runtime)
## The task is a daemon. We resubmit it
if task_status == 'DONE':
## It has finished in a good way. We recycle the database row
run_sql("UPDATE schTASK SET runtime=%s, status='WAITING', progress=%s, host='' WHERE id=%s", (new_runtime, verbose_argv, _TASK_PARAMS['task_id']))
write_message("Task #%d finished and resubmitted." % _TASK_PARAMS['task_id'])
elif task_status == 'STOPPED':
run_sql("UPDATE schTASK SET status='WAITING', progress=%s, host='' WHERE id=%s", (verbose_argv, _TASK_PARAMS['task_id'], ))
write_message("Task #%d stopped and resubmitted." % _TASK_PARAMS['task_id'])
else:
## We keep the bad result and we resubmit with another id.
#res = run_sql('SELECT proc,user,sleeptime,arguments,priority FROM schTASK WHERE id=%s', (_TASK_PARAMS['task_id'], ))
#proc, user, sleeptime, arguments, priority = res[0]
#run_sql("""INSERT INTO schTASK (proc,user,
#runtime,sleeptime,status,arguments,priority)
#VALUES (%s,%s,%s,%s,'WAITING',%s, %s)""",
#(proc, user, new_runtime, sleeptime, arguments, priority))
write_message("Task #%d finished but not resubmitted. [%s]" % (_TASK_PARAMS['task_id'], task_status))
else:
## we are done:
write_message("Task #%d finished. [%s]" % (_TASK_PARAMS['task_id'], task_status))
## Removing the pid
os.remove(pidfile_name)
#Lets call the post-process tasklets
if task_get_task_param("post-process"):
split = re.compile(r"(bst_.*)\[(.*)\]")
for tasklet in task_get_task_param("post-process"):
if not split.match(tasklet): # wrong syntax
_usage(1, "There is an error in the post processing option "
"for this task.")
aux_tasklet = split.match(tasklet)
_TASKLETS[aux_tasklet.group(1)](**eval("dict(%s)" % (aux_tasklet.group(2))))
return True
def _usage(exitcode=1, msg="", help_specific_usage="", description=""):
"""Prints usage info."""
if msg:
sys.stderr.write("Error: %s.\n" % msg)
sys.stderr.write("Usage: %s [options]\n" % sys.argv[0])
if help_specific_usage:
sys.stderr.write("Command options:\n")
sys.stderr.write(help_specific_usage)
sys.stderr.write(" Scheduling options:\n")
sys.stderr.write(" -u, --user=USER\tUser name under which to submit this"
" task.\n")
sys.stderr.write(" -t, --runtime=TIME\tTime to execute the task. [default=now]\n"
"\t\t\tExamples: +15s, 5m, 3h, 2002-10-27 13:57:26.\n")
sys.stderr.write(" -s, --sleeptime=SLEEP\tSleeping frequency after"
" which to repeat the task.\n"
"\t\t\tExamples: 30m, 2h, 1d. [default=no]\n")
sys.stderr.write(" --fixed-time\t\tAvoid drifting of execution time when using --sleeptime\n")
sys.stderr.write(" -I, --sequence-id=SEQUENCE-ID\tSequence Id of the current process\n")
sys.stderr.write(" -L --limit=LIMIT\tTime limit when it is"
" allowed to execute the task.\n"
"\t\t\tExamples: 22:00-03:00, Sunday 01:00-05:00.\n"
"\t\t\tSyntax: [Wee[kday]] [hh[:mm][-hh[:mm]]].\n")
sys.stderr.write(" -P, --priority=PRI\tTask priority (0=default, 1=higher, etc).\n")
sys.stderr.write(" -N, --name=NAME\tTask specific name (advanced option).\n\n")
sys.stderr.write(" General options:\n")
sys.stderr.write(" -h, --help\t\tPrint this help.\n")
sys.stderr.write(" -V, --version\t\tPrint version information.\n")
sys.stderr.write(" -v, --verbose=LEVEL\tVerbose level (0=min,"
" 1=default, 9=max).\n")
sys.stderr.write(" --profile=STATS\tPrint profile information. STATS is a comma-separated\n\t\t\tlist of desired output stats (calls, cumulative,\n\t\t\tfile, line, module, name, nfl, pcalls, stdname, time).\n")
sys.stderr.write(" --stop-on-error\tIn case of unrecoverable error stop the bibsched queue.\n")
sys.stderr.write(" --continue-on-error\tIn case of unrecoverable error don't stop the bibsched queue.\n")
sys.stderr.write(" --post-process=BIB_TASKLET_NAME[parameters]\tPostprocesses the specified\n\t\t\tbibtasklet with the given parameters between square\n\t\t\tbrackets.\n")
sys.stderr.write("\t\t\tExample:--post-process \"bst_send_email[fromaddr=\n\t\t\t'foo@xxx.com', toaddr='bar@xxx.com', subject='hello',\n\t\t\tcontent='help']\"\n")
sys.stderr.write(" --email-logs-to=EMAILS Sends an email with the results of the execution\n\t\t\tof the task, and attached the logs (EMAILS could be a comma-\n\t\t\tseparated lists of email addresses)\n")
if description:
sys.stderr.write(description)
sys.exit(exitcode)
def _task_sig_sleep(sig, frame):
"""Signal handler for the 'sleep' signal sent by BibSched."""
signal.signal(signal.SIGTSTP, signal.SIG_IGN)
write_message("task_sig_sleep(), got signal %s frame %s"
% (sig, frame), verbose=9)
write_message("sleeping as soon as possible...")
_db_login(relogin=1)
task_update_status("ABOUT TO SLEEP")
def _task_sig_stop(sig, frame):
"""Signal handler for the 'stop' signal sent by BibSched."""
write_message("task_sig_stop(), got signal %s frame %s"
% (sig, frame), verbose=9)
write_message("stopping as soon as possible...")
_db_login(relogin=1) # To avoid concurrency with an interrupted run_sql call
task_update_status("ABOUT TO STOP")
def _task_sig_suicide(sig, frame):
"""Signal handler for the 'suicide' signal sent by BibSched."""
write_message("task_sig_suicide(), got signal %s frame %s"
% (sig, frame), verbose=9)
write_message("suiciding myself now...")
task_update_status("SUICIDING")
write_message("suicided")
_db_login(relogin=1)
task_update_status("SUICIDED")
sys.exit(1)
def _task_sig_dumb(sig, frame):
"""Dumb signal handler."""
pass
_RE_PSLINE = re.compile('^\s*(\w+)\s+(\w+)')
def guess_apache_process_user_from_ps():
"""Guess Apache process user by parsing the list of running processes."""
apache_users = []
try:
# Tested on Linux, Sun and MacOS X
for line in os.popen('ps -A -o user,comm').readlines():
g = _RE_PSLINE.match(line)
if g:
username = g.group(1)
process = os.path.basename(g.group(2))
if process in ('apache', 'apache2', 'httpd') :
if username not in apache_users and username != 'root':
apache_users.append(username)
except Exception, e:
print >> sys.stderr, "WARNING: %s" % e
return tuple(apache_users)
def guess_apache_process_user():
"""
Return the possible name of the user running the Apache server process.
(Look at running OS processes or look at OS users defined in /etc/passwd.)
"""
apache_users = guess_apache_process_user_from_ps() + ('apache2', 'apache', 'www-data')
for username in apache_users:
try:
userline = pwd.getpwnam(username)
return userline[0]
except KeyError:
pass
print >> sys.stderr, "ERROR: Cannot detect Apache server process user. Please set the correct value in CFG_BIBSCHED_PROCESS_USER."
sys.exit(1)
def check_running_process_user():
"""
Check that the user running this program is the same as the user
configured in CFG_BIBSCHED_PROCESS_USER or as the user running the
Apache webserver process.
"""
running_as_user = pwd.getpwuid(os.getuid())[0]
if CFG_BIBSCHED_PROCESS_USER:
# We have the expected bibsched process user defined in config,
# so check against her, not against Apache.
if running_as_user != CFG_BIBSCHED_PROCESS_USER:
print >> sys.stderr, """ERROR: You must run "%(x_proc)s" as the user set up in your
CFG_BIBSCHED_PROCESS_USER (seems to be "%(x_user)s").
You may want to do "sudo -u %(x_user)s %(x_proc)s ..." to do so.
If you think this is not right, please set CFG_BIBSCHED_PROCESS_USER
appropriately and rerun "inveniocfg --update-config-py".""" % \
{'x_proc': os.path.basename(sys.argv[0]), 'x_user': CFG_BIBSCHED_PROCESS_USER}
sys.exit(1)
elif running_as_user != guess_apache_process_user(): # not defined in config, check against Apache
print >> sys.stderr, """ERROR: You must run "%(x_proc)s" as the same user that runs your Apache server
process (seems to be "%(x_user)s").
You may want to do "sudo -u %(x_user)s %(x_proc)s ..." to do so.
If you think this is not right, please set CFG_BIBSCHED_PROCESS_USER
appropriately and rerun "inveniocfg --update-config-py".""" % \
{'x_proc': os.path.basename(sys.argv[0]), 'x_user': guess_apache_process_user()}
sys.exit(1)
return
diff --git a/invenio/legacy/bibsched/scripts/bibsched.py b/invenio/legacy/bibsched/scripts/bibsched.py
index 30da4e5d4..49823471b 100644
--- a/invenio/legacy/bibsched/scripts/bibsched.py
+++ b/invenio/legacy/bibsched/scripts/bibsched.py
@@ -1,1827 +1,1827 @@
# -*- coding: utf-8 -*-
##
## This file is part of Invenio.
## Copyright (C) 2006, 2007, 2008, 2009, 2010, 2011, 2012 CERN.
##
## Invenio is free software; you can redistribute it and/or
## modify it under the terms of the GNU General Public License as
## published by the Free Software Foundation; either version 2 of the
## License, or (at your option) any later version.
##
## Invenio is distributed in the hope that it will be useful, but
## WITHOUT ANY WARRANTY; without even the implied warranty of
## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
## General Public License for more details.
##
## You should have received a copy of the GNU General Public License
## along with Invenio; if not, write to the Free Software Foundation, Inc.,
## 59 Temple Place, Suite 330, Boston, MA 02111-1307, USA.
"""BibSched - task management, scheduling and executing system for Invenio
"""
__revision__ = "$Id$"
import os
import sys
import time
import re
import marshal
import getopt
from itertools import chain
from socket import gethostname
from subprocess import Popen
import signal
from invenio.bibtask_config import \
CFG_BIBTASK_VALID_TASKS, \
CFG_BIBTASK_MONOTASKS, \
CFG_BIBTASK_FIXEDTIMETASKS
from invenio.config import \
CFG_PREFIX, \
CFG_BIBSCHED_REFRESHTIME, \
CFG_BIBSCHED_LOG_PAGER, \
CFG_BIBSCHED_EDITOR, \
CFG_BINDIR, \
CFG_LOGDIR, \
CFG_BIBSCHED_GC_TASKS_OLDER_THAN, \
CFG_BIBSCHED_GC_TASKS_TO_REMOVE, \
CFG_BIBSCHED_GC_TASKS_TO_ARCHIVE, \
CFG_BIBSCHED_MAX_NUMBER_CONCURRENT_TASKS, \
CFG_SITE_URL, \
CFG_BIBSCHED_NODE_TASKS, \
CFG_BIBSCHED_MAX_ARCHIVED_ROWS_DISPLAY
from invenio.dbquery import run_sql, real_escape_string
from invenio.utils.text import wrap_text_in_a_box
-from invenio.errorlib import register_exception, register_emergency
+from invenio.ext.logging import register_exception, register_emergency
from invenio.shellutils import run_shell_command
CFG_VALID_STATUS = ('WAITING', 'SCHEDULED', 'RUNNING', 'CONTINUING',
'% DELETED', 'ABOUT TO STOP', 'ABOUT TO SLEEP', 'STOPPED',
'SLEEPING', 'KILLED', 'NOW STOP', 'ERRORS REPORTED')
CFG_MOTD_PATH = os.path.join(CFG_PREFIX, "var", "run", "bibsched.motd")
SHIFT_RE = re.compile("([-\+]{0,1})([\d]+)([dhms])")
class RecoverableError(StandardError):
pass
def get_pager():
"""
Return the first available pager.
"""
paths = (
os.environ.get('PAGER', ''),
CFG_BIBSCHED_LOG_PAGER,
'/usr/bin/less',
'/bin/more'
)
for pager in paths:
if os.path.exists(pager):
return pager
def get_editor():
"""
Return the first available editor.
"""
paths = (
os.environ.get('EDITOR', ''),
CFG_BIBSCHED_EDITOR,
'/usr/bin/vim',
'/usr/bin/emacs',
'/usr/bin/vi',
'/usr/bin/nano',
)
for editor in paths:
if os.path.exists(editor):
return editor
def get_datetime(var, format_string="%Y-%m-%d %H:%M:%S"):
"""Returns a date string according to the format string.
It can handle normal date strings and shifts with respect
to now."""
try:
date = time.time()
factors = {"d": 24*3600, "h": 3600, "m": 60, "s": 1}
m = SHIFT_RE.match(var)
if m:
sign = m.groups()[0] == "-" and -1 or 1
factor = factors[m.groups()[2]]
value = float(m.groups()[1])
date = time.localtime(date + sign * factor * value)
date = time.strftime(format_string, date)
else:
date = time.strptime(var, format_string)
date = time.strftime(format_string, date)
return date
except:
return None
def get_my_pid(process, args=''):
if sys.platform.startswith('freebsd'):
command = "ps -o pid,args | grep '%s %s' | grep -v 'grep' | sed -n 1p" % (process, args)
else:
command = "ps -C %s o '%%p%%a' | grep '%s %s' | grep -v 'grep' | sed -n 1p" % (process, process, args)
answer = run_shell_command(command)[1].strip()
if answer == '':
answer = 0
else:
answer = answer[:answer.find(' ')]
return int(answer)
def get_task_pid(task_name, task_id, ignore_error=False):
"""Return the pid of task_name/task_id"""
try:
path = os.path.join(CFG_PREFIX, 'var', 'run', 'bibsched_task_%d.pid' % task_id)
pid = int(open(path).read())
os.kill(pid, signal.SIGUSR2)
return pid
except (OSError, IOError):
if ignore_error:
return 0
register_exception()
return get_my_pid(task_name, str(task_id))
def get_last_taskid():
"""Return the last taskid used."""
return run_sql("SELECT MAX(id) FROM schTASK")[0][0]
def delete_task(task_id):
"""Delete the corresponding task."""
run_sql("DELETE FROM schTASK WHERE id=%s", (task_id, ))
def is_task_scheduled(task_name):
"""Check if a certain task_name is due for execution (WAITING or RUNNING)"""
sql = """SELECT COUNT(proc) FROM schTASK
WHERE proc = %s AND (status='WAITING' OR status='RUNNING')"""
return run_sql(sql, (task_name,))[0][0] > 0
def get_task_ids_by_descending_date(task_name, statuses=['SCHEDULED']):
"""Returns list of task ids, ordered by descending runtime."""
sql = """SELECT id FROM schTASK
WHERE proc=%s AND (%s)
ORDER BY runtime DESC""" \
% " OR ".join(["status = '%s'" % x for x in statuses])
return [x[0] for x in run_sql(sql, (task_name,))]
def get_task_options(task_id):
"""Returns options for task_id read from the BibSched task queue table."""
res = run_sql("SELECT arguments FROM schTASK WHERE id=%s", (task_id,))
try:
return marshal.loads(res[0][0])
except IndexError:
return list()
def gc_tasks(verbose=False, statuses=None, since=None, tasks=None): # pylint: disable=W0613
"""Garbage collect the task queue."""
if tasks is None:
tasks = CFG_BIBSCHED_GC_TASKS_TO_REMOVE + CFG_BIBSCHED_GC_TASKS_TO_ARCHIVE
if since is None:
since = '-%id' % CFG_BIBSCHED_GC_TASKS_OLDER_THAN
if statuses is None:
statuses = ['DONE']
statuses = [status.upper() for status in statuses if status.upper() != 'RUNNING']
date = get_datetime(since)
status_query = 'status in (%s)' % ','.join([repr(real_escape_string(status)) for status in statuses])
for task in tasks:
if task in CFG_BIBSCHED_GC_TASKS_TO_REMOVE:
res = run_sql("""DELETE FROM schTASK WHERE proc=%%s AND %s AND
runtime<%%s""" % status_query, (task, date))
write_message('Deleted %s %s tasks (created before %s) with %s'
% (res, task, date, status_query))
elif task in CFG_BIBSCHED_GC_TASKS_TO_ARCHIVE:
run_sql("""INSERT INTO hstTASK(id,proc,host,user,
runtime,sleeptime,arguments,status,progress)
SELECT id,proc,host,user,
runtime,sleeptime,arguments,status,progress
FROM schTASK WHERE proc=%%s AND %s AND
runtime<%%s""" % status_query, (task, date))
res = run_sql("""DELETE FROM schTASK WHERE proc=%%s AND %s AND
runtime<%%s""" % status_query, (task, date))
write_message('Archived %s %s tasks (created before %s) with %s'
% (res, task, date, status_query))
def spawn_task(command, wait=False):
"""
Spawn the provided command in a way that is detached from the current
group. In this way a signal received by bibsched is not going to be
automatically propagated to the spawned process.
"""
def preexec(): # Don't forward signals.
os.setsid()
devnull = open(os.devnull, "w")
process = Popen(command, preexec_fn=preexec, shell=True,
stderr=devnull, stdout=devnull)
if wait:
process.wait()
def bibsched_get_host(task_id):
"""Retrieve the hostname of the task"""
res = run_sql("SELECT host FROM schTASK WHERE id=%s LIMIT 1", (task_id, ), 1)
if res:
return res[0][0]
def bibsched_set_host(task_id, host=""):
"""Update the progress of task_id."""
return run_sql("UPDATE schTASK SET host=%s WHERE id=%s", (host, task_id))
def bibsched_get_status(task_id):
"""Retrieve the task status."""
res = run_sql("SELECT status FROM schTASK WHERE id=%s LIMIT 1", (task_id, ), 1)
if res:
return res[0][0]
def bibsched_set_status(task_id, status, when_status_is=None):
"""Update the status of task_id."""
if when_status_is is None:
return run_sql("UPDATE schTASK SET status=%s WHERE id=%s",
(status, task_id))
else:
return run_sql("UPDATE schTASK SET status=%s WHERE id=%s AND status=%s",
(status, task_id, when_status_is))
def bibsched_set_progress(task_id, progress):
"""Update the progress of task_id."""
return run_sql("UPDATE schTASK SET progress=%s WHERE id=%s", (progress, task_id))
def bibsched_set_priority(task_id, priority):
"""Update the priority of task_id."""
return run_sql("UPDATE schTASK SET priority=%s WHERE id=%s", (priority, task_id))
def bibsched_send_signal(proc, task_id, sig):
"""Send a signal to a given task."""
if bibsched_get_host(task_id) != gethostname():
return False
pid = get_task_pid(proc, task_id, True)
if pid:
try:
os.kill(pid, sig)
return True
except OSError:
return False
return False
def is_monotask(task_id, proc, runtime, status, priority, host, sequenceid): # pylint: disable=W0613
procname = proc.split(':')[0]
return procname in CFG_BIBTASK_MONOTASKS
def stop_task(other_task_id, other_proc, other_priority, other_status, other_sequenceid): # pylint: disable=W0613
Log("Send STOP signal to #%d (%s) which was in status %s" % (other_task_id, other_proc, other_status))
bibsched_set_status(other_task_id, 'ABOUT TO STOP', other_status)
def sleep_task(other_task_id, other_proc, other_priority, other_status, other_sequenceid): # pylint: disable=W0613
Log("Send SLEEP signal to #%d (%s) which was in status %s" % (other_task_id, other_proc, other_status))
bibsched_set_status(other_task_id, 'ABOUT TO SLEEP', other_status)
class Manager(object):
def __init__(self, old_stdout):
import curses
import curses.panel
from curses.wrapper import wrapper
self.old_stdout = old_stdout
self.curses = curses
self.helper_modules = CFG_BIBTASK_VALID_TASKS
self.running = 1
self.footer_auto_mode = "Automatic Mode [A Manual] [1/2/3 Display] [P Purge] [l/L Log] [O Opts] [E Edit motd] [Q Quit]"
self.footer_select_mode = "Manual Mode [A Automatic] [1/2/3 Display Type] [P Purge] [l/L Log] [O Opts] [E Edit motd] [Q Quit]"
self.footer_waiting_item = "[R Run] [D Delete] [N Priority]"
self.footer_running_item = "[S Sleep] [T Stop] [K Kill]"
self.footer_stopped_item = "[I Initialise] [D Delete] [K Acknowledge]"
self.footer_sleeping_item = "[W Wake Up] [T Stop] [K Kill]"
self.item_status = ""
self.rows = []
self.panel = None
self.display = 2
self.first_visible_line = 0
self.auto_mode = 0
self.currentrow = None
self.current_attr = 0
self.hostname = gethostname()
self.allowed_task_types = CFG_BIBSCHED_NODE_TASKS.get(self.hostname, CFG_BIBTASK_VALID_TASKS)
self.motd = ""
self.header_lines = 2
self.read_motd()
self.selected_line = self.header_lines
wrapper(self.start)
def read_motd(self):
"""Get a fresh motd from disk, if it exists."""
self.motd = ""
self.header_lines = 2
try:
if os.path.exists(CFG_MOTD_PATH):
motd = open(CFG_MOTD_PATH).read().strip()
if motd:
self.motd = "MOTD [%s] " % time.strftime("%Y-%m-%d %H:%M", time.localtime(os.path.getmtime(CFG_MOTD_PATH))) + motd
self.header_lines = 3
except IOError:
pass
def handle_keys(self, char):
if char == -1:
return
if self.auto_mode and (char not in (self.curses.KEY_UP,
self.curses.KEY_DOWN,
self.curses.KEY_PPAGE,
self.curses.KEY_NPAGE,
ord("g"), ord("G"), ord("n"),
ord("q"), ord("Q"), ord("a"),
ord("A"), ord("1"), ord("2"), ord("3"),
ord("p"), ord("P"), ord("o"), ord("O"),
ord("l"), ord("L"), ord("e"), ord("E"))):
self.display_in_footer("in automatic mode")
else:
status = self.currentrow and self.currentrow[5] or None
if char == self.curses.KEY_UP:
self.selected_line = max(self.selected_line - 1,
self.header_lines)
self.repaint()
if char == self.curses.KEY_PPAGE:
self.selected_line = max(self.selected_line - 10,
self.header_lines)
self.repaint()
elif char == self.curses.KEY_DOWN:
self.selected_line = min(self.selected_line + 1,
len(self.rows) + self.header_lines - 1)
self.repaint()
elif char == self.curses.KEY_NPAGE:
self.selected_line = min(self.selected_line + 10,
len(self.rows) + self.header_lines - 1)
self.repaint()
elif char == self.curses.KEY_HOME:
self.first_visible_line = 0
self.selected_line = self.header_lines
elif char == ord("g"):
self.selected_line = self.header_lines
self.repaint()
elif char == ord("G"):
self.selected_line = len(self.rows) + self.header_lines - 1
self.repaint()
elif char in (ord("a"), ord("A")):
self.change_auto_mode()
elif char == ord("l"):
self.openlog()
elif char == ord("L"):
self.openlog(err=True)
elif char in (ord("w"), ord("W")):
self.wakeup()
elif char in (ord("n"), ord("N")):
self.change_priority()
elif char in (ord("r"), ord("R")):
if status in ('WAITING', 'SCHEDULED'):
self.run()
elif char in (ord("s"), ord("S")):
self.sleep()
elif char in (ord("k"), ord("K")):
if status in ('ERROR', 'DONE WITH ERRORS', 'ERRORS REPORTED'):
self.acknowledge()
elif status is not None:
self.kill()
elif char in (ord("t"), ord("T")):
self.stop()
elif char in (ord("d"), ord("D")):
self.delete()
elif char in (ord("i"), ord("I")):
self.init()
elif char in (ord("p"), ord("P")):
self.purge_done()
elif char in (ord("o"), ord("O")):
self.display_task_options()
elif char in (ord("e"), ord("E")):
self.edit_motd()
self.read_motd()
elif char == ord("1"):
self.display = 1
self.first_visible_line = 0
self.selected_line = self.header_lines
# We need to update the display to display done tasks
self.update_rows()
self.repaint()
self.display_in_footer("only done processes are displayed")
elif char == ord("2"):
self.display = 2
self.first_visible_line = 0
self.selected_line = self.header_lines
# We need to update the display to display not done tasks
self.update_rows()
self.repaint()
self.display_in_footer("only not done processes are displayed")
elif char == ord("3"):
self.display = 3
self.first_visible_line = 0
self.selected_line = self.header_lines
# We need to update the display to display archived tasks
self.update_rows()
self.repaint()
self.display_in_footer("only archived processes are displayed")
elif char in (ord("q"), ord("Q")):
if self.curses.panel.top_panel() == self.panel:
self.panel = None
self.curses.panel.update_panels()
else:
self.running = 0
return
def openlog(self, err=False):
task_id = self.currentrow[0]
if err:
logname = os.path.join(CFG_LOGDIR, 'bibsched_task_%d.err' % task_id)
else:
logname = os.path.join(CFG_LOGDIR, 'bibsched_task_%d.log' % task_id)
if os.path.exists(logname):
pager = get_pager()
if os.path.exists(pager):
self.curses.endwin()
os.system('%s %s' % (pager, logname))
print >> self.old_stdout, "\rPress ENTER to continue",
self.old_stdout.flush()
raw_input()
# We need to redraw the bibsched task list
# since we are displaying "Press ENTER to continue"
self.repaint()
else:
self._display_message_box("No pager was found")
def edit_motd(self):
"""Add, delete or change the motd message that will be shown when the
bibsched monitor starts."""
editor = get_editor()
if editor:
previous = self.motd
self.curses.endwin()
os.system("%s %s" % (editor, CFG_MOTD_PATH))
# We need to redraw the MOTD part
self.read_motd()
self.repaint()
if previous[24:] != self.motd[24:]:
if len(previous) == 0:
Log('motd set to "%s"' % self.motd.replace("\n", "|"))
self.selected_line += 1
self.header_lines += 1
elif len(self.motd) == 0:
Log('motd deleted')
self.selected_line -= 1
self.header_lines -= 1
else:
Log('motd changed to "%s"' % self.motd.replace("\n", "|"))
else:
self._display_message_box("No editor was found")
def display_task_options(self):
"""Nicely display information about current process."""
msg = ' id: %i\n\n' % self.currentrow[0]
pid = get_task_pid(self.currentrow[1], self.currentrow[0], True)
if pid is not None:
msg += ' pid: %s\n\n' % pid
msg += ' priority: %s\n\n' % self.currentrow[8]
msg += ' proc: %s\n\n' % self.currentrow[1]
msg += ' user: %s\n\n' % self.currentrow[2]
msg += ' runtime: %s\n\n' % self.currentrow[3].strftime("%Y-%m-%d %H:%M:%S")
msg += ' sleeptime: %s\n\n' % self.currentrow[4]
msg += ' status: %s\n\n' % self.currentrow[5]
msg += ' progress: %s\n\n' % self.currentrow[6]
arguments = marshal.loads(self.currentrow[7])
if type(arguments) is dict:
# FIXME: REMOVE AFTER MAJOR RELEASE 1.0
msg += ' options : %s\n\n' % arguments
else:
msg += 'executable : %s\n\n' % arguments[0]
msg += ' arguments : %s\n\n' % ' '.join(arguments[1:])
msg += '\n\nPress q to quit this panel...'
msg = wrap_text_in_a_box(msg, style='no_border')
rows = msg.split('\n')
height = len(rows) + 2
width = max([len(row) for row in rows]) + 4
try:
self.win = self.curses.newwin(
height,
width,
(self.height - height) / 2 + 1,
(self.width - width) / 2 + 1
)
except self.curses.error:
return
self.panel = self.curses.panel.new_panel(self.win)
self.panel.top()
self.win.border()
i = 1
for row in rows:
self.win.addstr(i, 2, row, self.current_attr)
i += 1
self.win.refresh()
while self.win.getkey() != 'q':
pass
self.panel = None
def count_processes(self, status):
out = 0
res = run_sql("""SELECT COUNT(id) FROM schTASK
WHERE status=%s GROUP BY status""", (status,))
try:
out = res[0][0]
except:
pass
return out
def change_priority(self):
task_id = self.currentrow[0]
priority = self.currentrow[8]
new_priority = self._display_ask_number_box("Insert the desired \
priority for task %s. The smaller the number the less the priority. Note that \
a number less than -10 will mean to always postpone the task while a number \
bigger than 10 will mean some tasks with less priority could be stopped in \
order to let this task run. The current priority is %s. New value:"
% (task_id, priority))
try:
new_priority = int(new_priority)
except ValueError:
return
bibsched_set_priority(task_id, new_priority)
# We need to update the tasks list with our new priority
# to be able to display it
self.update_rows()
# We need to update the priority number next to the task
self.repaint()
def wakeup(self):
task_id = self.currentrow[0]
process = self.currentrow[1]
status = self.currentrow[5]
#if self.count_processes('RUNNING') + self.count_processes('CONTINUING') >= 1:
#self.display_in_footer("a process is already running!")
if status == "SLEEPING":
if not bibsched_send_signal(process, task_id, signal.SIGCONT):
bibsched_set_status(task_id, "ERROR", "SLEEPING")
self.update_rows()
self.repaint()
self.display_in_footer("process woken up")
else:
self.display_in_footer("process is not sleeping")
self.stdscr.refresh()
def _display_YN_box(self, msg):
"""Utility to display confirmation boxes."""
msg += ' (Y/N)'
msg = wrap_text_in_a_box(msg, style='no_border')
rows = msg.split('\n')
height = len(rows) + 2
width = max([len(row) for row in rows]) + 4
self.win = self.curses.newwin(
height,
width,
(self.height - height) / 2 + 1,
(self.width - width) / 2 + 1
)
self.panel = self.curses.panel.new_panel(self.win)
self.panel.top()
self.win.border()
i = 1
for row in rows:
self.win.addstr(i, 2, row, self.current_attr)
i += 1
self.win.refresh()
try:
while 1:
c = self.win.getch()
if c in (ord('y'), ord('Y')):
return True
elif c in (ord('n'), ord('N')):
return False
finally:
self.panel = None
def _display_ask_number_box(self, msg):
"""Utility to display confirmation boxes."""
msg = wrap_text_in_a_box(msg, style='no_border')
rows = msg.split('\n')
height = len(rows) + 3
width = max([len(row) for row in rows]) + 4
self.win = self.curses.newwin(
height,
width,
(self.height - height) / 2 + 1,
(self.width - width) / 2 + 1
)
self.panel = self.curses.panel.new_panel(self.win)
self.panel.top()
self.win.border()
i = 1
for row in rows:
self.win.addstr(i, 2, row, self.current_attr)
i += 1
self.win.refresh()
self.win.move(height - 2, 2)
self.curses.echo()
ret = self.win.getstr()
self.curses.noecho()
self.panel = None
return ret
def _display_message_box(self, msg):
"""Utility to display message boxes."""
rows = msg.split('\n')
height = len(rows) + 2
width = max([len(row) for row in rows]) + 3
self.win = self.curses.newwin(
height,
width,
(self.height - height) / 2 + 1,
(self.width - width) / 2 + 1
)
self.panel = self.curses.panel.new_panel(self.win)
self.panel.top()
self.win.border()
i = 1
for row in rows:
self.win.addstr(i, 2, row, self.current_attr)
i += 1
self.win.refresh()
self.win.move(height - 2, 2)
self.win.getkey()
self.curses.noecho()
self.panel = None
def purge_done(self):
"""Garbage collector."""
if self._display_YN_box(
"You are going to purge the list of DONE tasks.\n\n"
"%s tasks, submitted since %s days, will be archived.\n\n"
"%s tasks, submitted since %s days, will be deleted.\n\n"
"Are you sure?" % (
', '.join(CFG_BIBSCHED_GC_TASKS_TO_ARCHIVE),
CFG_BIBSCHED_GC_TASKS_OLDER_THAN,
', '.join(CFG_BIBSCHED_GC_TASKS_TO_REMOVE),
CFG_BIBSCHED_GC_TASKS_OLDER_THAN)):
gc_tasks()
# We removed some tasks from our list
self.update_rows()
self.repaint()
self.display_in_footer("DONE processes purged")
def run(self):
task_id = self.currentrow[0]
process = self.currentrow[1].split(':')[0]
status = self.currentrow[5]
if status == "WAITING":
if process in self.helper_modules:
if run_sql("""UPDATE schTASK SET status='SCHEDULED', host=%s
WHERE id=%s and status='WAITING'""",
(self.hostname, task_id)):
program = os.path.join(CFG_BINDIR, process)
command = "%s %s" % (program, str(task_id))
spawn_task(command)
Log("manually running task #%d (%s)" % (task_id, process))
# We changed the status of one of our tasks
self.update_rows()
self.repaint()
else:
## Process already running (typing too quickly on the keyboard?)
pass
else:
self.display_in_footer("Process %s is not in the list of allowed processes." % process)
else:
self.display_in_footer("Process status should be SCHEDULED or WAITING!")
def acknowledge(self):
task_id = self.currentrow[0]
status = self.currentrow[5]
if status in ('ERROR', 'DONE WITH ERRORS', 'ERRORS REPORTED'):
bibsched_set_status(task_id, 'ACK ' + status, status)
self.update_rows()
self.repaint()
self.display_in_footer("Acknowledged error")
def sleep(self):
task_id = self.currentrow[0]
status = self.currentrow[5]
if status in ('RUNNING', 'CONTINUING'):
bibsched_set_status(task_id, 'ABOUT TO SLEEP', status)
self.update_rows()
self.repaint()
self.display_in_footer("SLEEP signal sent to task #%s" % task_id)
else:
self.display_in_footer("Cannot put to sleep non-running processes")
def kill(self):
task_id = self.currentrow[0]
process = self.currentrow[1]
status = self.currentrow[5]
if status in ('RUNNING', 'CONTINUING', 'ABOUT TO STOP', 'ABOUT TO SLEEP', 'SLEEPING'):
if self._display_YN_box("Are you sure you want to kill the %s process %s?" % (process, task_id)):
bibsched_send_signal(process, task_id, signal.SIGKILL)
bibsched_set_status(task_id, 'KILLED')
self.update_rows()
self.repaint()
self.display_in_footer("KILL signal sent to task #%s" % task_id)
else:
self.display_in_footer("Cannot kill non-running processes")
def stop(self):
task_id = self.currentrow[0]
process = self.currentrow[1]
status = self.currentrow[5]
if status in ('RUNNING', 'CONTINUING', 'ABOUT TO SLEEP', 'SLEEPING'):
if status == 'SLEEPING':
bibsched_set_status(task_id, 'NOW STOP', 'SLEEPING')
bibsched_send_signal(process, task_id, signal.SIGCONT)
count = 10
while bibsched_get_status(task_id) == 'NOW STOP':
if count <= 0:
bibsched_set_status(task_id, 'ERROR', 'NOW STOP')
self.update_rows()
self.repaint()
self.display_in_footer("It seems impossible to wakeup this task.")
return
time.sleep(CFG_BIBSCHED_REFRESHTIME)
count -= 1
else:
bibsched_set_status(task_id, 'ABOUT TO STOP', status)
self.update_rows()
self.repaint()
self.display_in_footer("STOP signal sent to task #%s" % task_id)
else:
self.display_in_footer("Cannot stop non-running processes")
def delete(self):
task_id = self.currentrow[0]
status = self.currentrow[5]
if status not in ('RUNNING', 'CONTINUING', 'SLEEPING', 'SCHEDULED', 'ABOUT TO STOP', 'ABOUT TO SLEEP'):
bibsched_set_status(task_id, "%s_DELETED" % status, status)
self.display_in_footer("process deleted")
self.update_rows()
self.repaint()
else:
self.display_in_footer("Cannot delete running processes")
def init(self):
task_id = self.currentrow[0]
status = self.currentrow[5]
if status not in ('RUNNING', 'CONTINUING', 'SLEEPING'):
bibsched_set_status(task_id, "WAITING")
bibsched_set_progress(task_id, "")
bibsched_set_host(task_id, "")
self.update_rows()
self.repaint()
self.display_in_footer("process initialised")
else:
self.display_in_footer("Cannot initialise running processes")
def change_auto_mode(self):
program = os.path.join(CFG_BINDIR, "bibsched")
if self.auto_mode:
COMMAND = "%s -q halt" % program
else:
COMMAND = "%s -q start" % program
os.system(COMMAND)
self.auto_mode = not self.auto_mode
# We need to refresh the color of the header and footer
self.repaint()
def put_line(self, row, header=False, motd=False):
## ROW: (id,proc,user,runtime,sleeptime,status,progress,arguments,priority,host)
## 0 1 2 3 4 5 6 7 8 9
col_w = [8 , 25, 15, 21, 7, 12, 21, 60]
maxx = self.width
if self.y == self.selected_line - self.first_visible_line and self.y > 1:
self.item_status = row[5]
self.currentrow = row
if motd:
attr = self.curses.color_pair(1) + self.curses.A_BOLD
elif self.y == self.header_lines - 2:
if self.auto_mode:
attr = self.curses.color_pair(2) + self.curses.A_STANDOUT + self.curses.A_BOLD
else:
attr = self.curses.color_pair(8) + self.curses.A_STANDOUT + self.curses.A_BOLD
elif row[5] == "DONE":
attr = self.curses.color_pair(5) + self.curses.A_BOLD
elif row[5] == "STOPPED":
attr = self.curses.color_pair(6) + self.curses.A_BOLD
elif row[5].find("ERROR") > -1:
attr = self.curses.color_pair(4) + self.curses.A_BOLD
elif row[5] == "WAITING":
attr = self.curses.color_pair(3) + self.curses.A_BOLD
elif row[5] in ("RUNNING", "CONTINUING"):
attr = self.curses.color_pair(2) + self.curses.A_BOLD
elif not header and row[8]:
attr = self.curses.A_BOLD
else:
attr = self.curses.A_NORMAL
## If the task is not relevant for this instance ob BibSched because
## the type of the task can not be run, or it is running on another
## machine: make it a different color
if not header and (row[1].split(':')[0] not in self.allowed_task_types or
(row[9] != '' and row[9] != self.hostname)):
attr = self.curses.color_pair(6)
if not row[6]:
nrow = list(row)
nrow[6] = 'Not allowed on this instance'
row = tuple(nrow)
if self.y == self.selected_line - self.first_visible_line and self.y > 1:
self.current_attr = attr
attr += self.curses.A_REVERSE
if header: # Dirty hack. put_line should be better refactored.
# row contains one less element: arguments
## !!! FIXME: THIS IS CRAP
myline = str(row[0]).ljust(col_w[0]-1)
myline += str(row[1]).ljust(col_w[1]-1)
myline += str(row[2]).ljust(col_w[2]-1)
myline += str(row[3]).ljust(col_w[3]-1)
myline += str(row[4]).ljust(col_w[4]-1)
myline += str(row[5]).ljust(col_w[5]-1)
myline += str(row[6]).ljust(col_w[6]-1)
myline += str(row[7]).ljust(col_w[7]-1)
elif motd:
myline = str(row[0])
else:
## ROW: (id,proc,user,runtime,sleeptime,status,progress,arguments,priority,host)
## 0 1 2 3 4 5 6 7 8 9
priority = str(row[8] and ' [%s]' % row[8] or '')
myline = str(row[0]).ljust(col_w[0])[:col_w[0]-1]
myline += (str(row[1])[:col_w[1]-len(priority)-2] + priority).ljust(col_w[1]-1)
myline += str(row[2]).ljust(col_w[2])[:col_w[2]-1]
myline += str(row[3]).ljust(col_w[3])[:col_w[3]-1]
myline += str(row[4]).ljust(col_w[4])[:col_w[4]-1]
myline += str(row[5]).ljust(col_w[5])[:col_w[5]-1]
myline += str(row[9]).ljust(col_w[6])[:col_w[6]-1]
myline += str(row[6]).ljust(col_w[7])[:col_w[7]-1]
myline = myline.ljust(maxx)
try:
self.stdscr.addnstr(self.y, 0, myline, maxx, attr)
except self.curses.error:
pass
self.y += 1
def display_in_footer(self, footer, i=0, print_time_p=0):
if print_time_p:
footer = "%s %s" % (footer, time.strftime("%Y-%m-%d %H:%M:%S", time.localtime()))
maxx = self.stdscr.getmaxyx()[1]
footer = footer.ljust(maxx)
if self.auto_mode:
colorpair = 2
else:
colorpair = 1
try:
self.stdscr.addnstr(self.y - i, 0, footer, maxx - 1, self.curses.A_STANDOUT + self.curses.color_pair(colorpair) + self.curses.A_BOLD)
except self.curses.error:
pass
def repaint(self):
if server_pid():
self.auto_mode = 1
else:
if self.auto_mode == 1:
self.curses.beep()
self.auto_mode = 0
self.y = 0
self.stdscr.erase()
self.height, self.width = self.stdscr.getmaxyx()
maxy = self.height - 2
#maxx = self.width
if len(self.motd) > 0:
self.put_line((self.motd.strip().replace("\n", " - ")[:79], "", "", "", "", "", "", "", ""), header=False, motd=True)
self.put_line(("ID", "PROC [PRI]", "USER", "RUNTIME", "SLEEP", "STATUS", "HOST", "PROGRESS"), header=True)
self.put_line(("", "", "", "", "", "", "", ""), header=True)
if self.selected_line > maxy + self.first_visible_line - 1:
self.first_visible_line = self.selected_line - maxy + 1
if self.selected_line < self.first_visible_line + 2:
self.first_visible_line = self.selected_line - 2
for row in self.rows[self.first_visible_line:self.first_visible_line+maxy-2]:
self.put_line(row)
self.y = self.stdscr.getmaxyx()[0] - 1
if self.auto_mode:
self.display_in_footer(self.footer_auto_mode, print_time_p=1)
else:
self.display_in_footer(self.footer_select_mode, print_time_p=1)
footer2 = ""
if self.item_status.find("DONE") > -1 or self.item_status in ("ERROR", "STOPPED", "KILLED", "ERRORS REPORTED"):
footer2 += self.footer_stopped_item
elif self.item_status in ("RUNNING", "CONTINUING", "ABOUT TO STOP", "ABOUT TO SLEEP"):
footer2 += self.footer_running_item
elif self.item_status == "SLEEPING":
footer2 += self.footer_sleeping_item
elif self.item_status == "WAITING":
footer2 += self.footer_waiting_item
self.display_in_footer(footer2, 1)
self.stdscr.refresh()
def update_rows(self):
if self.display == 1:
table = "schTASK"
where = "and (status='DONE' or status LIKE 'ACK%')"
order = "runtime DESC"
limit = ""
elif self.display == 2:
table = "schTASK"
where = "and (status<>'DONE' and status NOT LIKE 'ACK%')"
order = "runtime ASC"
limit = "limit %s" % CFG_BIBSCHED_MAX_ARCHIVED_ROWS_DISPLAY
else:
table = "hstTASK"
order = "runtime DESC"
where = ""
limit = ""
self.rows = run_sql("""SELECT id, proc, user, runtime, sleeptime,
status, progress, arguments, priority, host,
sequenceid
FROM %s
WHERE status NOT LIKE '%%_DELETED' %s
ORDER BY %s
%s""" % (table, where, order, limit))
# Make sure we are not selecting a line that disappeared
self.selected_line = min(self.selected_line,
len(self.rows) + self.header_lines - 1)
def start(self, stdscr):
os.environ['BIBSCHED_MODE'] = 'manual'
if self.curses.has_colors():
self.curses.start_color()
self.curses.init_pair(8, self.curses.COLOR_WHITE, self.curses.COLOR_BLACK)
self.curses.init_pair(1, self.curses.COLOR_WHITE, self.curses.COLOR_RED)
self.curses.init_pair(2, self.curses.COLOR_GREEN, self.curses.COLOR_BLACK)
self.curses.init_pair(3, self.curses.COLOR_MAGENTA, self.curses.COLOR_BLACK)
self.curses.init_pair(4, self.curses.COLOR_RED, self.curses.COLOR_BLACK)
self.curses.init_pair(5, self.curses.COLOR_BLUE, self.curses.COLOR_BLACK)
self.curses.init_pair(6, self.curses.COLOR_CYAN, self.curses.COLOR_BLACK)
self.curses.init_pair(7, self.curses.COLOR_YELLOW, self.curses.COLOR_BLACK)
self.stdscr = stdscr
self.base_panel = self.curses.panel.new_panel(self.stdscr)
self.base_panel.bottom()
self.curses.panel.update_panels()
self.height, self.width = stdscr.getmaxyx()
self.stdscr.erase()
if server_pid():
self.auto_mode = 1
ring = 4
if len(self.motd) > 0:
self._display_message_box(self.motd + "\nPress any key to close")
while self.running:
if ring == 4:
self.read_motd()
self.update_rows()
ring = 0
self.repaint()
ring += 1
char = -1
try:
char = timed_out(self.stdscr.getch, 1)
if char == 27: # escaping sequence
char = self.stdscr.getch()
if char == 79: # arrow
char = self.stdscr.getch()
if char == 65: # arrow up
char = self.curses.KEY_UP
elif char == 66: # arrow down
char = self.curses.KEY_DOWN
elif char == 72:
char = self.curses.KEY_PPAGE
elif char == 70:
char = self.curses.KEY_NPAGE
elif char == 91:
char = self.stdscr.getch()
if char == 53:
char = self.stdscr.getch()
if char == 126:
char = self.curses.KEY_HOME
except TimedOutExc:
char = -1
self.handle_keys(char)
class BibSched(object):
def __init__(self, debug=False):
self.debug = debug
self.hostname = gethostname()
self.helper_modules = CFG_BIBTASK_VALID_TASKS
## All the tasks in the queue that the node is allowed to manipulate
self.node_relevant_bibupload_tasks = ()
self.node_relevant_waiting_tasks = ()
self.node_relevant_active_tasks = ()
## All tasks of all nodes
self.active_tasks_all_nodes = ()
self.mono_tasks_all_nodes = ()
self.allowed_task_types = CFG_BIBSCHED_NODE_TASKS.get(self.hostname, CFG_BIBTASK_VALID_TASKS)
os.environ['BIBSCHED_MODE'] = 'automatic'
def tie_task_to_host(self, task_id):
"""Sets the hostname of a task to the machine executing this script
@return: True if the scheduling was successful, False otherwise,
e.g. if the task was scheduled concurrently on a different host.
"""
if not run_sql("""SELECT id FROM schTASK WHERE id=%s AND host=''
AND status='WAITING'""", (task_id, )):
## The task was already tied?
return False
run_sql("""UPDATE schTASK SET host=%s, status='SCHEDULED'
WHERE id=%s AND host='' AND status='WAITING'""",
(self.hostname, task_id))
return bool(run_sql("SELECT id FROM schTASK WHERE id=%s AND host=%s",
(task_id, self.hostname)))
def filter_for_allowed_tasks(self):
""" Removes all tasks that are not allowed in this Invenio instance
"""
def relevant_task(task_id, proc, runtime, status, priority, host, sequenceid): # pylint: disable=W0613
# if host and self.hostname != host:
# return False
procname = proc.split(':')[0]
if procname not in self.allowed_task_types:
return False
return True
def filter_tasks(tasks):
return tuple(t for t in tasks if relevant_task(*t))
self.node_relevant_bibupload_tasks = filter_tasks(self.node_relevant_bibupload_tasks)
self.node_relevant_active_tasks = filter_tasks(self.node_relevant_active_tasks)
self.node_relevant_waiting_tasks = filter_tasks(self.node_relevant_waiting_tasks)
self.node_relevant_sleeping_tasks = filter_tasks(self.node_relevant_sleeping_tasks)
def is_task_safe_to_execute(self, proc1, proc2):
"""Return True when the two tasks can run concurrently."""
return proc1 != proc2 # and not proc1.startswith('bibupload') and not proc2.startswith('bibupload')
def get_tasks_to_sleep_and_stop(self, proc, task_set):
"""Among the task_set, return the list of tasks to stop and the list
of tasks to sleep.
"""
if proc in CFG_BIBTASK_MONOTASKS:
return [], [t for t in task_set
if t[3] not in ('SLEEPING', 'ABOUT TO SLEEP')]
min_prio = None
min_task_id = None
min_proc = None
min_status = None
min_sequenceid = None
to_stop = []
## For all the lower priority tasks...
for (this_task_id, this_proc, this_priority, this_status, this_sequenceid) in task_set:
if not self.is_task_safe_to_execute(this_proc, proc):
to_stop.append((this_task_id, this_proc, this_priority, this_status, this_sequenceid))
elif (min_prio is None or this_priority < min_prio) and \
this_status not in ('SLEEPING', 'ABOUT TO SLEEP'):
## We don't put to sleep already sleeping task :-)
min_prio = this_priority
min_task_id = this_task_id
min_proc = this_proc
min_status = this_status
min_sequenceid = this_sequenceid
if to_stop:
return to_stop, []
elif min_task_id:
return [], [(min_task_id, min_proc, min_prio, min_status, min_sequenceid)]
else:
return [], []
def split_active_tasks_by_priority(self, task_id, priority):
"""Return two lists: the list of task_ids with lower priority and
those with higher or equal priority."""
higher = []
lower = []
### !!! We already have this in node_relevant_active_tasks
for other_task_id, task_proc, dummy, status, task_priority, task_host, sequenceid in self.node_relevant_active_tasks:
# for other_task_id, task_proc, runtime, status, task_priority, task_host in self.node_relevant_active_tasks:
# for other_task_id, task_proc, task_priority, status in self.get_running_tasks():
if task_id == other_task_id:
continue
if task_priority < priority and task_host == self.hostname:
lower.append((other_task_id, task_proc, task_priority, status, sequenceid))
elif task_host == self.hostname:
higher.append((other_task_id, task_proc, task_priority, status, sequenceid))
return lower, higher
def handle_task(self, task_id, proc, runtime, status, priority, host, sequenceid):
"""Perform needed action of the row representing a task.
Return True when task_status need to be refreshed"""
debug = self.debug
if debug:
Log("task_id: %s, proc: %s, runtime: %s, status: %s, priority: %s, host: %s, sequenceid: %s" %
(task_id, proc, runtime, status, priority, host, sequenceid))
if (task_id, proc, runtime, status, priority, host, sequenceid) in self.node_relevant_active_tasks:
# For multi-node
# check if we need to sleep ourselves for monotasks to be able to run
for other_task_id, other_proc, dummy_other_runtime, other_status, other_priority, other_host, other_sequenceid in self.mono_tasks_all_nodes:
if priority < other_priority:
# Sleep ourselves
if status not in ('SLEEPING', 'ABOUT TO SLEEP'):
sleep_task(task_id, proc, priority, status, sequenceid)
return True
return False
elif (task_id, proc, runtime, status, priority, host, sequenceid) in self.node_relevant_waiting_tasks:
if debug:
Log("Trying to run %s" % task_id)
if priority < -10:
if debug:
Log("Cannot run because priority < -10")
return False
lower, higher = self.split_active_tasks_by_priority(task_id, priority)
if debug:
Log('lower: %s' % lower)
Log('higher: %s' % higher)
for other_task_id, other_proc, dummy_other_runtime, other_status, \
other_priority, other_host, other_sequenceid in chain(
self.node_relevant_sleeping_tasks,
self.active_tasks_all_nodes):
if task_id != other_task_id and \
not self.is_task_safe_to_execute(proc, other_proc):
### !!! WE NEED TO CHECK FOR TASKS THAT CAN ONLY BE EXECUTED ON ONE MACHINE AT ONE TIME
### !!! FOR EXAMPLE BIBUPLOADS WHICH NEED TO BE EXECUTED SEQUENTIALLY AND NEVER CONCURRENTLY
## There's at least a higher priority task running that
## cannot run at the same time of the given task.
## We give up
if debug:
Log("Cannot run because task_id: %s, proc: %s is in the queue and incompatible" % (other_task_id, other_proc))
return False
if sequenceid:
## Let's normalize the prority of all tasks in a sequenceid to the
## max priority of the group
max_priority = run_sql("""SELECT MAX(priority) FROM schTASK
WHERE status='WAITING'
AND sequenceid=%s""",
(sequenceid, ))[0][0]
if run_sql("""UPDATE schTASK SET priority=%s
WHERE status='WAITING' AND sequenceid=%s""",
(max_priority, sequenceid)):
Log("Raised all waiting tasks with sequenceid "
"%s to the max priority %s" % (sequenceid, max_priority))
## Some priorities where raised
return True
## Let's normalize the runtime of all tasks in a sequenceid to
## the compatible runtime.
current_runtimes = run_sql("""SELECT id, runtime FROM schTASK WHERE sequenceid=%s AND status='WAITING' ORDER by id""", (sequenceid, ))
runtimes_adjusted = False
if current_runtimes:
last_runtime = current_runtimes[0][1]
for the_task_id, runtime in current_runtimes:
if runtime < last_runtime:
run_sql("""UPDATE schTASK SET runtime=%s WHERE id=%s""", (last_runtime, the_task_id))
if debug:
Log("Adjusted runtime of task_id %s to %s in order to be executed in the correct sequenceid order" % (the_task_id, last_runtime))
runtimes_adjusted = True
runtime = last_runtime
last_runtime = runtime
if runtimes_adjusted:
## Some runtime have been adjusted
return True
if sequenceid is not None:
for other_task_id, dummy_other_proc, dummy_other_runtime, dummy_other_status, dummy_other_priority, dummy_other_host, other_sequenceid in self.active_tasks_all_nodes:
if sequenceid == other_sequenceid and task_id > other_task_id:
Log('Task %s need to run after task %s since they have the same sequence id: %s' % (task_id, other_task_id, sequenceid))
## If there is a task with same sequence number then do not run the current task
return False
if proc in CFG_BIBTASK_MONOTASKS and higher:
## This is a monotask
if debug:
Log("Cannot run because this is a monotask and there are higher priority tasks: %s" % (higher, ))
return False
## No higher priority task have issue with the given task.
if proc not in CFG_BIBTASK_FIXEDTIMETASKS and len(higher) >= CFG_BIBSCHED_MAX_NUMBER_CONCURRENT_TASKS:
if debug:
Log("Cannot run because all resources (%s) are used (%s), higher: %s" % (CFG_BIBSCHED_MAX_NUMBER_CONCURRENT_TASKS, len(higher), higher))
return False
## Check for monotasks wanting to run
for other_task_id, other_proc, dummy_other_runtime, other_status, other_priority, other_host, other_sequenceid in self.mono_tasks_all_nodes:
if priority < other_priority:
if debug:
Log("Cannot run because there is a monotask with higher priority: %s %s" % (other_task_id, other_proc))
return False
## We check if it is necessary to stop/put to sleep some lower priority
## task.
tasks_to_stop, tasks_to_sleep = self.get_tasks_to_sleep_and_stop(proc, lower)
if debug:
Log('tasks_to_stop: %s' % tasks_to_stop)
Log('tasks_to_sleep: %s' % tasks_to_sleep)
if tasks_to_stop and priority < 100:
## Only tasks with priority higher than 100 have the power
## to put task to stop.
if debug:
Log("Cannot run because there are task to stop: %s and priority < 100" % tasks_to_stop)
return False
procname = proc.split(':')[0]
if not tasks_to_stop and (not tasks_to_sleep or (proc not in CFG_BIBTASK_MONOTASKS and len(self.node_relevant_active_tasks) < CFG_BIBSCHED_MAX_NUMBER_CONCURRENT_TASKS)):
if proc in CFG_BIBTASK_MONOTASKS and self.active_tasks_all_nodes:
if debug:
Log("Cannot run because this is a monotask and there are other tasks running: %s" % (self.node_relevant_active_tasks, ))
return False
def task_in_same_host(dummy_task_id, dummy_proc, dummy_runtime, dummy_status, dummy_priority, host, dummy_sequenceid):
return host == self.hostname
def filter_by_host(tasks):
return tuple(t for t in tasks if task_in_same_host(*t))
node_active_tasks = filter_by_host(self.node_relevant_active_tasks)
if len(node_active_tasks) >= CFG_BIBSCHED_MAX_NUMBER_CONCURRENT_TASKS:
if debug:
Log("Cannot run because all resources (%s) are used (%s), active: %s" % (CFG_BIBSCHED_MAX_NUMBER_CONCURRENT_TASKS, len(node_active_tasks), node_active_tasks))
return False
if status in ("SLEEPING", "ABOUT TO SLEEP"):
if host == self.hostname:
## We can only wake up tasks that are running on our own host
for other_task_id, other_proc, dummy_other_runtime, other_status, dummy_other_priority, other_host, dummy_other_sequenceid in self.node_relevant_active_tasks:
## But only if there are not other tasks still going to sleep, otherwise
## we might end up stealing the slot for an higher priority task.
if other_task_id != task_id and other_status in ('ABOUT TO SLEEP', 'ABOUT TO STOP') and other_host == self.hostname:
if debug:
Log("Not yet waking up task #%d since there are other tasks (%s #%d) going to sleep (higher priority task incoming?)" % (task_id, other_proc, other_task_id))
return False
bibsched_set_status(task_id, "CONTINUING", status)
if not bibsched_send_signal(proc, task_id, signal.SIGCONT):
bibsched_set_status(task_id, "ERROR", "CONTINUING")
Log("Task #%d (%s) woken up but didn't existed anymore" % (task_id, proc))
return True
Log("Task #%d (%s) woken up" % (task_id, proc))
return True
else:
return False
elif procname in self.helper_modules:
program = os.path.join(CFG_BINDIR, procname)
## Trick to log in bibsched.log the task exiting
exit_str = '&& echo "`date "+%%Y-%%m-%%d %%H:%%M:%%S"` --> Task #%d (%s) exited" >> %s' % (task_id, proc, os.path.join(CFG_LOGDIR, 'bibsched.log'))
command = "%s %s %s" % (program, str(task_id), exit_str)
### Set the task to scheduled and tie it to this host
if self.tie_task_to_host(task_id):
Log("Task #%d (%s) started" % (task_id, proc))
### Relief the lock for the BibTask, it is safe now to do so
spawn_task(command, wait=proc in CFG_BIBTASK_MONOTASKS)
count = 10
while run_sql("""SELECT status FROM schTASK
WHERE id=%s AND status='SCHEDULED'""",
(task_id, )):
## Polling to wait for the task to really start,
## in order to avoid race conditions.
if count <= 0:
raise StandardError("Process %s (task_id: %s) was launched but seems not to be able to reach RUNNING status." % (proc, task_id))
time.sleep(CFG_BIBSCHED_REFRESHTIME)
count -= 1
return True
else:
raise StandardError("%s is not in the allowed modules" % procname)
else:
## It's not still safe to run the task.
## We first need to stop tasks that should be stopped
## and to put to sleep tasks that should be put to sleep
for t in tasks_to_stop:
stop_task(*t)
for t in tasks_to_sleep:
sleep_task(*t)
time.sleep(CFG_BIBSCHED_REFRESHTIME)
return True
def check_errors(self):
errors = run_sql("""SELECT id,proc,status FROM schTASK
WHERE status = 'ERROR'
OR status = 'DONE WITH ERRORS'
OR status = 'CERROR'""")
if errors:
error_msgs = []
error_recoverable = True
for e_id, e_proc, e_status in errors:
if run_sql("""UPDATE schTASK
SET status='ERRORS REPORTED'
WHERE id = %s AND (status='CERROR'
OR status='ERROR'
OR status='DONE WITH ERRORS')""", [e_id]):
msg = " #%s %s -> %s" % (e_id, e_proc, e_status)
error_msgs.append(msg)
if e_status in ('ERROR', 'DONE WITH ERRORS'):
error_recoverable = False
if error_msgs:
msg = "BibTask with ERRORS:\n%s" % '\n'.join(error_msgs)
if error_recoverable:
raise RecoverableError(msg)
else:
raise StandardError(msg)
def calculate_rows(self):
"""Return all the node_relevant_active_tasks to work on."""
try:
self.check_errors()
except RecoverableError, msg:
register_emergency('Light emergency from %s: BibTask failed: %s' % (CFG_SITE_URL, msg))
max_bibupload_priority, min_bibupload_priority = run_sql(
"""SELECT MAX(priority), MIN(priority)
FROM schTASK
WHERE status IN ('WAITING', 'RUNNING', 'SLEEPING',
'ABOUT TO STOP', 'ABOUT TO SLEEP',
'SCHEDULED', 'CONTINUING')
AND proc = 'bibupload'
AND runtime <= NOW()""")[0]
if max_bibupload_priority > min_bibupload_priority:
run_sql(
"""UPDATE schTASK SET priority = %s
WHERE status IN ('WAITING', 'RUNNING', 'SLEEPING',
'ABOUT TO STOP', 'ABOUT TO SLEEP',
'SCHEDULED', 'CONTINUING')
AND proc = 'bibupload'
AND runtime <= NOW()
AND priority < %s""", (max_bibupload_priority,
max_bibupload_priority))
## The bibupload tasks are sorted by id, which means by the order they were scheduled
self.node_relevant_bibupload_tasks = run_sql(
"""SELECT id, proc, runtime, status, priority, host, sequenceid
FROM schTASK WHERE status IN ('WAITING', 'SLEEPING')
AND proc = 'bibupload'
AND runtime <= NOW()
ORDER BY id ASC LIMIT 1""", n=1)
## The other tasks are sorted by priority
self.node_relevant_waiting_tasks = run_sql(
"""SELECT id, proc, runtime, status, priority, host, sequenceid
FROM schTASK WHERE (status='WAITING' AND runtime <= NOW())
OR status = 'SLEEPING'
ORDER BY priority DESC, runtime ASC, id ASC""")
self.node_relevant_sleeping_tasks = run_sql(
"""SELECT id, proc, runtime, status, priority, host, sequenceid
FROM schTASK WHERE status = 'SLEEPING'
ORDER BY priority DESC, runtime ASC, id ASC""")
self.node_relevant_active_tasks = run_sql(
"""SELECT id, proc, runtime, status, priority, host, sequenceid
FROM schTASK WHERE status IN ('RUNNING', 'CONTINUING',
'SCHEDULED', 'ABOUT TO STOP',
'ABOUT TO SLEEP')""")
self.active_tasks_all_nodes = tuple(self.node_relevant_active_tasks)
self.mono_tasks_all_nodes = tuple(t for t in self.node_relevant_waiting_tasks if is_monotask(*t))
## Remove tasks that can not be executed on this host
self.filter_for_allowed_tasks()
def watch_loop(self):
## Cleaning up scheduled task not run because of bibsched being
## interrupted in the middle.
run_sql("""UPDATE schTASK
SET status = 'WAITING'
WHERE status = 'SCHEDULED'
AND host = %s""", (self.hostname, ))
try:
while True:
if self.debug:
Log("New bibsched cycle")
self.calculate_rows()
## Let's first handle running node_relevant_active_tasks.
for task in self.node_relevant_active_tasks:
if self.handle_task(*task):
break
else:
# If nothing has changed we can go on to run tasks.
for task in self.node_relevant_waiting_tasks:
if task[1] == 'bibupload' and self.node_relevant_bibupload_tasks:
## We switch in bibupload serial mode!
## which means we execute the first next bibupload.
if self.handle_task(*self.node_relevant_bibupload_tasks[0]):
## Something has changed
break
elif self.handle_task(*task):
## Something has changed
break
else:
time.sleep(CFG_BIBSCHED_REFRESHTIME)
except Exception, err:
register_exception(alert_admin=True)
try:
register_emergency('Emergency from %s: BibSched halted: %s' % (CFG_SITE_URL, err))
except NotImplementedError:
pass
raise
class TimedOutExc(Exception):
def __init__(self, value="Timed Out"):
Exception.__init__(self)
self.value = value
def __str__(self):
return repr(self.value)
def timed_out(f, timeout, *args, **kwargs):
def handler(signum, frame): # pylint: disable=W0613
raise TimedOutExc()
old = signal.signal(signal.SIGALRM, handler)
signal.alarm(timeout)
try:
result = f(*args, **kwargs)
finally:
signal.signal(signal.SIGALRM, old)
signal.alarm(0)
return result
def Log(message):
log = open(CFG_LOGDIR + "/bibsched.log", "a")
log.write(time.strftime("%Y-%m-%d %H:%M:%S --> ", time.localtime()))
log.write(message)
log.write("\n")
log.close()
def redirect_stdout_and_stderr():
"This function redirects stdout and stderr to bibsched.log and bibsched.err file."
old_stdout = sys.stdout
old_stderr = sys.stderr
sys.stdout = open(CFG_LOGDIR + "/bibsched.log", "a")
sys.stderr = open(CFG_LOGDIR + "/bibsched.err", "a")
return old_stdout, old_stderr
def restore_stdout_and_stderr(stdout, stderr):
sys.stdout = stdout
sys.stderr = stderr
def usage(exitcode=1, msg=""):
"""Prints usage info."""
if msg:
sys.stderr.write("Error: %s.\n" % msg)
sys.stderr.write("""\
Usage: %s [options] [start|stop|restart|monitor|status]
The following commands are available for bibsched:
start start bibsched in background
stop stop running bibtasks and the bibsched daemon safely
halt halt running bibsched while keeping bibtasks running
restart restart running bibsched
monitor enter the interactive monitor
status get report about current status of the queue
purge purge the scheduler queue from old tasks
General options:
-h, --help \t Print this help.
-V, --version \t Print version information.
-q, --quiet \t Quiet mode
-d, --debug \t Write debugging information in bibsched.log
Status options:
-s, --status=LIST\t Which BibTask status should be considered (default is Running,waiting)
-S, --since=TIME\t Since how long time to consider tasks e.g.: 30m, 2h, 1d (default
is all)
-t, --tasks=LIST\t Comma separated list of BibTask to consider (default
\t is all)
Purge options:
-s, --status=LIST\t Which BibTask status should be considered (default is DONE)
-S, --since=TIME\t Since how long time to consider tasks e.g.: 30m, 2h, 1d (default
is %s days)
-t, --tasks=LIST\t Comma separated list of BibTask to consider (default
\t is %s)
""" % (sys.argv[0], CFG_BIBSCHED_GC_TASKS_OLDER_THAN, ','.join(CFG_BIBSCHED_GC_TASKS_TO_REMOVE + CFG_BIBSCHED_GC_TASKS_TO_ARCHIVE)))
sys.exit(exitcode)
pidfile = os.path.join(CFG_PREFIX, 'var', 'run', 'bibsched.pid')
def error(msg):
print >> sys.stderr, "error: %s" % msg
sys.exit(1)
def warning(msg):
print >> sys.stderr, "warning: %s" % msg
def server_pid(ping_the_process=True, check_is_really_bibsched=True):
# The pid must be stored on the filesystem
try:
pid = int(open(pidfile).read())
except IOError:
return None
if ping_the_process:
# Even if the pid is available, we check if it corresponds to an
# actual process, as it might have been killed externally
try:
os.kill(pid, signal.SIGCONT)
except OSError:
warning("pidfile %s found referring to pid %s which is not running" % (pidfile, pid))
return None
if check_is_really_bibsched:
output = run_shell_command("ps p %s -o args=", (str(pid), ))[1]
if not 'bibsched' in output:
warning("pidfile %s found referring to pid %s which does not correspond to bibsched: cmdline is %s" % (pidfile, pid, output))
return None
return pid
def start(verbose=True, debug=False):
""" Fork this process in the background and start processing
requests. The process PID is stored in a pid file, so that it can
be stopped later on."""
if verbose:
sys.stdout.write("starting bibsched: ")
sys.stdout.flush()
pid = server_pid(ping_the_process=False)
if pid:
pid2 = server_pid()
if pid2:
error("another instance of bibsched (pid %d) is running" % pid2)
else:
warning("%s exist but the corresponding bibsched (pid %s) seems not be running" % (pidfile, pid))
warning("erasing %s and continuing..." % (pidfile, ))
os.remove(pidfile)
# start the child process using the "double fork" technique
pid = os.fork()
if pid > 0:
sys.exit(0)
os.setsid()
os.chdir('/')
pid = os.fork()
if pid > 0:
if verbose:
sys.stdout.write('pid %d\n' % pid)
Log("daemon started (pid %d)" % pid)
open(pidfile, 'w').write('%d' % pid)
return
sys.stdin.close()
redirect_stdout_and_stderr()
sched = BibSched(debug=debug)
try:
sched.watch_loop()
finally:
try:
os.remove(pidfile)
except OSError:
pass
def halt(verbose=True, soft=False, debug=False): # pylint: disable=W0613
pid = server_pid()
if not pid:
if soft:
print >> sys.stderr, 'bibsched seems not to be running.'
return
else:
error('bibsched seems not to be running.')
try:
os.kill(pid, signal.SIGKILL)
except OSError:
print >> sys.stderr, 'no bibsched process found'
Log("daemon stopped (pid %d)" % pid)
if verbose:
print "stopping bibsched: pid %d" % pid
os.unlink(pidfile)
def monitor(verbose=True, debug=False): # pylint: disable=W0613
old_stdout, old_stderr = redirect_stdout_and_stderr()
try:
Manager(old_stdout)
finally:
restore_stdout_and_stderr(old_stdout, old_stderr)
def write_message(msg, stream=None, verbose=1): # pylint: disable=W0613
"""Write message and flush output stream (may be sys.stdout or sys.stderr).
Useful for debugging stuff."""
if stream is None:
stream = sys.stdout
if msg:
if stream == sys.stdout or stream == sys.stderr:
stream.write(time.strftime("%Y-%m-%d %H:%M:%S --> ",
time.localtime()))
try:
stream.write("%s\n" % msg)
except UnicodeEncodeError:
stream.write("%s\n" % msg.encode('ascii', 'backslashreplace'))
stream.flush()
else:
sys.stderr.write("Unknown stream %s. [must be sys.stdout or sys.stderr]\n" % stream)
def report_queue_status(verbose=True, status=None, since=None, tasks=None): # pylint: disable=W0613
"""
Report about the current status of BibSched queue on standard output.
"""
def report_about_processes(status='RUNNING', since=None, tasks=None):
"""
Helper function to report about processes with the given status.
"""
if tasks is None:
task_query = ''
else:
task_query = 'AND proc IN (%s)' % (
','.join([repr(real_escape_string(task)) for task in tasks]))
if since is None:
since_query = ''
else:
# We're not interested in future task
if since.startswith('+') or since.startswith('-'):
since = since[1:]
since = '-' + since
since_query = "AND runtime >= '%s'" % get_datetime(since)
res = run_sql("""SELECT id, proc, user, runtime, sleeptime,
status, progress, priority
FROM schTASK WHERE status=%%s %(task_query)s
%(since_query)s ORDER BY id ASC""" % {
'task_query': task_query,
'since_query' : since_query},
(status,))
write_message("%s processes: %d" % (status, len(res)))
for (proc_id, proc_proc, proc_user, proc_runtime, proc_sleeptime,
proc_status, proc_progress, proc_priority) in res:
write_message(' * ID="%s" PRIORITY="%s" PROC="%s" USER="%s" '
'RUNTIME="%s" SLEEPTIME="%s" STATUS="%s" '
'PROGRESS="%s"' % (proc_id,
proc_priority, proc_proc, proc_user, proc_runtime,
proc_sleeptime, proc_status, proc_progress))
return
write_message("BibSched queue status report for %s:" % gethostname())
mode = server_pid() and "AUTOMATIC" or "MANUAL"
write_message("BibSched queue running mode: %s" % mode)
if status is None:
report_about_processes('Running', since, tasks)
report_about_processes('Waiting', since, tasks)
else:
for state in status:
report_about_processes(state, since, tasks)
write_message("Done.")
def restart(verbose=True, debug=False):
halt(verbose, soft=True, debug=debug)
start(verbose, debug=debug)
def stop(verbose=True, debug=False):
"""
* Stop bibsched
* Send stop signal to all the running tasks
* wait for all the tasks to stop
* return
"""
if verbose:
print "Stopping BibSched if running"
halt(verbose, soft=True, debug=debug)
run_sql("UPDATE schTASK SET status='WAITING' WHERE status='SCHEDULED'")
res = run_sql("""SELECT id, proc, status FROM schTASK
WHERE status NOT LIKE 'DONE'
AND status NOT LIKE '%_DELETED'
AND (status='RUNNING'
OR status='ABOUT TO STOP'
OR status='ABOUT TO SLEEP'
OR status='SLEEPING'
OR status='CONTINUING')""")
if verbose:
print "Stopping all running BibTasks"
for task_id, proc, status in res:
if status == 'SLEEPING':
bibsched_send_signal(proc, task_id, signal.SIGCONT)
time.sleep(CFG_BIBSCHED_REFRESHTIME)
bibsched_set_status(task_id, 'ABOUT TO STOP')
while run_sql("""SELECT id FROM schTASK
WHERE status NOT LIKE 'DONE'
AND status NOT LIKE '%_DELETED'
AND (status='RUNNING'
OR status='ABOUT TO STOP'
OR status='ABOUT TO SLEEP'
OR status='SLEEPING'
OR status='CONTINUING')"""):
if verbose:
sys.stdout.write('.')
sys.stdout.flush()
time.sleep(CFG_BIBSCHED_REFRESHTIME)
if verbose:
print "\nStopped"
Log("BibSched and all BibTasks stopped")
def main():
from invenio.bibtask import check_running_process_user
check_running_process_user()
verbose = True
status = None
since = None
tasks = None
debug = False
try:
opts, args = getopt.gnu_getopt(sys.argv[1:], "hVdqS:s:t:", [
"help", "version", "debug", "quiet", "since=", "status=", "task="])
except getopt.GetoptError, err:
Log("Error: %s" % err)
usage(1, err)
for opt, arg in opts:
if opt in ["-h", "--help"]:
usage(0)
elif opt in ["-V", "--version"]:
print __revision__
sys.exit(0)
elif opt in ['-q', '--quiet']:
verbose = False
elif opt in ['-s', '--status']:
status = arg.split(',')
elif opt in ['-S', '--since']:
since = arg
elif opt in ['-t', '--task']:
tasks = arg.split(',')
elif opt in ['-d', '--debug']:
debug = True
else:
usage(1)
try:
cmd = args[0]
except IndexError:
cmd = 'monitor'
try:
if cmd in ('status', 'purge'):
{'status' : report_queue_status,
'purge' : gc_tasks}[cmd](verbose, status, since, tasks)
else:
{'start': start,
'halt': halt,
'stop': stop,
'restart': restart,
'monitor': monitor}[cmd](verbose=verbose, debug=debug)
except KeyError:
usage(1, 'unkown command: %s' % cmd)
if __name__ == '__main__':
main()
diff --git a/invenio/legacy/bibupload/engine.py b/invenio/legacy/bibupload/engine.py
index 51c5e142f..73aef2ca4 100644
--- a/invenio/legacy/bibupload/engine.py
+++ b/invenio/legacy/bibupload/engine.py
@@ -1,2937 +1,2937 @@
# -*- coding: utf-8 -*-
##
## This file is part of Invenio.
## Copyright (C) 2006, 2007, 2008, 2009, 2010, 2011, 2012, 2013 CERN.
##
## Invenio is free software; you can redistribute it and/or
## modify it under the terms of the GNU General Public License as
## published by the Free Software Foundation; either version 2 of the
## License, or (at your option) any later version.
##
## Invenio is distributed in the hope that it will be useful, but
## WITHOUT ANY WARRANTY; without even the implied warranty of
## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
## General Public License for more details.
##
## You should have received a copy of the GNU General Public License
## along with Invenio; if not, write to the Free Software Foundation, Inc.,
## 59 Temple Place, Suite 330, Boston, MA 02111-1307, USA.
"""
BibUpload: Receive MARC XML file and update the appropriate database
tables according to options.
"""
__revision__ = "$Id$"
import os
import re
import sys
import time
from datetime import datetime
from zlib import compress
import socket
import marshal
import copy
import tempfile
import urlparse
import urllib2
import urllib
from invenio.config import CFG_OAI_ID_FIELD, \
CFG_BIBUPLOAD_EXTERNAL_SYSNO_TAG, \
CFG_BIBUPLOAD_EXTERNAL_OAIID_TAG, \
CFG_BIBUPLOAD_EXTERNAL_OAIID_PROVENANCE_TAG, \
CFG_BIBUPLOAD_STRONG_TAGS, \
CFG_BIBUPLOAD_CONTROLLED_PROVENANCE_TAGS, \
CFG_BIBUPLOAD_SERIALIZE_RECORD_STRUCTURE, \
CFG_BIBUPLOAD_DELETE_FORMATS, \
CFG_SITE_URL, CFG_SITE_SECURE_URL, CFG_SITE_RECORD, \
CFG_OAI_PROVENANCE_ALTERED_SUBFIELD, \
CFG_BIBUPLOAD_DISABLE_RECORD_REVISIONS, \
CFG_BIBUPLOAD_CONFLICTING_REVISION_TICKET_QUEUE
from invenio.utils.json import json, CFG_JSON_AVAILABLE
from invenio.bibupload_config import CFG_BIBUPLOAD_CONTROLFIELD_TAGS, \
CFG_BIBUPLOAD_SPECIAL_TAGS, \
CFG_BIBUPLOAD_DELETE_CODE, \
CFG_BIBUPLOAD_DELETE_VALUE, \
CFG_BIBUPLOAD_OPT_MODES
from invenio.dbquery import run_sql, \
Error
from invenio.legacy.bibrecord import create_records, \
record_add_field, \
record_delete_field, \
record_xml_output, \
record_get_field_instances, \
record_get_field_value, \
record_get_field_values, \
field_get_subfield_values, \
field_get_subfield_instances, \
record_modify_subfield, \
record_delete_subfield_from, \
record_delete_fields, \
record_add_subfield_into, \
record_find_field, \
record_extract_oai_id, \
record_extract_dois, \
record_has_field,\
records_identical
from invenio.search_engine import get_record
from invenio.utils.date import convert_datestruct_to_datetext
-from invenio.errorlib import register_exception
+from invenio.ext.logging import register_exception
from invenio.bibcatalog import bibcatalog_system
from invenio.intbitset import intbitset
from invenio.utils.url import make_user_agent_string
from invenio.config import CFG_BIBDOCFILE_FILEDIR
from invenio.bibtask import task_init, write_message, \
task_set_option, task_get_option, task_get_task_param, task_update_status, \
task_update_progress, task_sleep_now_if_required, fix_argv_paths
from invenio.bibdocfile import BibRecDocs, file_strip_ext, normalize_format, \
get_docname_from_url, check_valid_url, download_url, \
KEEP_OLD_VALUE, decompose_bibdocfile_url, InvenioBibDocFileError, \
bibdocfile_url_p, CFG_BIBDOCFILE_AVAILABLE_FLAGS, guess_format_from_url, \
BibRelation, MoreInfo
from invenio.search_engine import search_pattern
from invenio.bibupload_revisionverifier import RevisionVerifier, \
InvenioBibUploadConflictingRevisionsError, \
InvenioBibUploadInvalidRevisionError, \
InvenioBibUploadMissing005Error, \
InvenioBibUploadUnchangedRecordError
#Statistic variables
stat = {}
stat['nb_records_to_upload'] = 0
stat['nb_records_updated'] = 0
stat['nb_records_inserted'] = 0
stat['nb_errors'] = 0
stat['nb_holdingpen'] = 0
stat['exectime'] = time.localtime()
_WRITING_RIGHTS = None
CFG_BIBUPLOAD_ALLOWED_SPECIAL_TREATMENTS = ('oracle', )
CFG_HAS_BIBCATALOG = "UNKNOWN"
def check_bibcatalog():
"""
Return True if bibcatalog is available.
"""
global CFG_HAS_BIBCATALOG # pylint: disable=W0603
if CFG_HAS_BIBCATALOG != "UNKNOWN":
return CFG_HAS_BIBCATALOG
CFG_HAS_BIBCATALOG = True
if bibcatalog_system is not None:
bibcatalog_response = bibcatalog_system.check_system()
else:
bibcatalog_response = "No ticket system configured"
if bibcatalog_response != "":
write_message("BibCatalog error: %s\n" % (bibcatalog_response,))
CFG_HAS_BIBCATALOG = False
return CFG_HAS_BIBCATALOG
## Let's set a reasonable timeout for URL request (e.g. FFT)
socket.setdefaulttimeout(40)
def parse_identifier(identifier):
"""Parse the identifier and determine if it is temporary or fixed"""
id_str = str(identifier)
if not id_str.startswith("TMP:"):
return (False, identifier)
else:
return (True, id_str[4:])
def resolve_identifier(tmps, identifier):
"""Resolves an identifier. If the identifier is not temporary, this
function is an identity on the second argument. Otherwise, a resolved
value is returned or an exception raised"""
is_tmp, tmp_id = parse_identifier(identifier)
if is_tmp:
if not tmp_id in tmps:
raise StandardError("Temporary identifier %s not present in the dictionary" % (tmp_id, ))
if tmps[tmp_id] == -1:
# the identifier has been signalised but never assigned a value - probably error during processing
raise StandardError("Temporary identifier %s has been declared, but never assigned a value. Probably an error during processign of an appropriate FFT has happened. Please see the log" % (tmp_id, ))
return int(tmps[tmp_id])
else:
return int(identifier)
_re_find_001 = re.compile('<controlfield\\s+tag=("001"|\'001\')\\s*>\\s*(\\d*)\\s*</controlfield>', re.S)
def bibupload_pending_recids():
"""This function embed a bit of A.I. and is more a hack than an elegant
algorithm. It should be updated in case bibupload/bibsched are modified
in incompatible ways.
This function return the intbitset of all the records that are being
(or are scheduled to be) touched by other bibuploads.
"""
options = run_sql("""SELECT arguments FROM schTASK WHERE status<>'DONE' AND
proc='bibupload' AND (status='RUNNING' OR status='CONTINUING' OR
status='WAITING' OR status='SCHEDULED' OR status='ABOUT TO STOP' OR
status='ABOUT TO SLEEP')""")
ret = intbitset()
xmls = []
if options:
for arguments in options:
arguments = marshal.loads(arguments[0])
for argument in arguments[1:]:
if argument.startswith('/'):
# XMLs files are recognizable because they're absolute
# files...
xmls.append(argument)
for xmlfile in xmls:
# Let's grep for the 001
try:
xml = open(xmlfile).read()
ret += [int(group[1]) for group in _re_find_001.findall(xml)]
except:
continue
return ret
### bibupload engine functions:
def bibupload(record, opt_mode=None, opt_notimechange=0, oai_rec_id="", pretend=False,
tmp_ids=None, tmp_vers=None):
"""Main function: process a record and fit it in the tables
bibfmt, bibrec, bibrec_bibxxx, bibxxx with proper record
metadata.
Return (error_code, recID) of the processed record.
"""
if tmp_ids is None:
tmp_ids = {}
if tmp_vers is None:
tmp_vers = {}
if opt_mode == 'reference':
## NOTE: reference mode has been deprecated in favour of 'correct'
opt_mode = 'correct'
assert(opt_mode in CFG_BIBUPLOAD_OPT_MODES)
error = None
affected_tags = {}
original_record = {}
rec_old = {}
now = datetime.now() # will hold record creation/modification date
record_had_altered_bit = False
is_opt_mode_delete = False
# Extraction of the Record Id from 001, SYSNO or OAIID or DOI tags:
rec_id = retrieve_rec_id(record, opt_mode, pretend=pretend)
if rec_id == -1:
msg = " Failed: either the record already exists and insert was " \
"requested or the record does not exists and " \
"replace/correct/append has been used"
write_message(msg, verbose=1, stream=sys.stderr)
return (1, -1, msg)
elif rec_id > 0:
write_message(" -Retrieve record ID (found %s): DONE." % rec_id, verbose=2)
(unique_p, msg) = check_record_doi_is_unique(rec_id, record)
if not unique_p:
write_message(msg, verbose=1, stream=sys.stderr)
return (1, int(rec_id), msg)
if not record.has_key('001'):
# Found record ID by means of SYSNO or OAIID or DOI, and the
# input MARCXML buffer does not have this 001 tag, so we
# should add it now:
error = record_add_field(record, '001', controlfield_value=rec_id)
if error is None:
msg = " Failed: Error during adding the 001 controlfield " \
"to the record"
write_message(msg, verbose=1, stream=sys.stderr)
return (1, int(rec_id), msg)
else:
error = None
write_message(" -Added tag 001: DONE.", verbose=2)
write_message(" -Check if the xml marc file is already in the database: DONE" , verbose=2)
record_deleted_p = False
if opt_mode == 'insert' or \
(opt_mode == 'replace_or_insert') and rec_id is None:
insert_mode_p = True
# Insert the record into the bibrec databases to have a recordId
rec_id = create_new_record(pretend=pretend)
write_message(" -Creation of a new record id (%d): DONE" % rec_id, verbose=2)
# we add the record Id control field to the record
error = record_add_field(record, '001', controlfield_value=rec_id)
if error is None:
msg = " Failed: Error during adding the 001 controlfield " \
"to the record"
write_message(msg, verbose=1, stream=sys.stderr)
return (1, int(rec_id), msg)
else:
error = None
if '005' not in record:
error = record_add_field(record, '005', controlfield_value=now.strftime("%Y%m%d%H%M%S.0"))
if error is None:
msg = " Failed: Error during adding to 005 controlfield to record"
write_message(msg, verbose=1, stream=sys.stderr)
return (1, int(rec_id), msg)
else:
error = None
else:
write_message(" Note: 005 already existing upon inserting of new record. Keeping it.", verbose=2)
elif opt_mode != 'insert':
insert_mode_p = False
# Update Mode
# Retrieve the old record to update
rec_old = get_record(rec_id)
record_had_altered_bit = record_get_field_values(rec_old, CFG_BIBUPLOAD_EXTERNAL_OAIID_TAG[:3], CFG_BIBUPLOAD_EXTERNAL_OAIID_TAG[3], CFG_BIBUPLOAD_EXTERNAL_OAIID_TAG[4], CFG_OAI_PROVENANCE_ALTERED_SUBFIELD)
# Also save a copy to restore previous situation in case of errors
original_record = get_record(rec_id)
if rec_old is None:
msg = " Failed during the creation of the old record!"
write_message(msg, verbose=1, stream=sys.stderr)
return (1, int(rec_id), msg)
else:
write_message(" -Retrieve the old record to update: DONE", verbose=2)
# flag to check whether the revisions have been verified and patch generated.
# If revision verification failed, then we need to manually identify the affected tags
# and process them
revision_verified = False
rev_verifier = RevisionVerifier()
#check for revision conflicts before updating record
if record_has_field(record, '005') and not CFG_BIBUPLOAD_DISABLE_RECORD_REVISIONS:
write_message(" -Upload Record has 005. Verifying Revision", verbose=2)
try:
rev_res = rev_verifier.verify_revision(record, original_record, opt_mode)
if rev_res:
opt_mode = rev_res[0]
record = rev_res[1]
affected_tags = rev_res[2]
revision_verified = True
write_message(lambda: " -Patch record generated. Changing opt_mode to correct.\nPatch:\n%s " % record_xml_output(record), verbose=2)
else:
write_message(" -No Patch Record.", verbose=2)
except InvenioBibUploadUnchangedRecordError, err:
msg = " -ISSUE: %s" % err
write_message(msg, verbose=1, stream=sys.stderr)
write_message(msg, " Continuing anyway in case there are FFT or other tags")
except InvenioBibUploadConflictingRevisionsError, err:
msg = " -ERROR: Conflicting Revisions - %s" % err
write_message(msg, verbose=1, stream=sys.stderr)
submit_ticket_for_holding_pen(rec_id, err, "Conflicting Revisions. Inserting record into holding pen.")
insert_record_into_holding_pen(record, str(rec_id))
return (2, int(rec_id), msg)
except InvenioBibUploadInvalidRevisionError, err:
msg = " -ERROR: Invalid Revision - %s" % err
write_message(msg)
submit_ticket_for_holding_pen(rec_id, err, "Invalid Revisions. Inserting record into holding pen.")
insert_record_into_holding_pen(record, str(rec_id))
return (2, int(rec_id), msg)
except InvenioBibUploadMissing005Error, err:
msg = " -ERROR: Missing 005 - %s" % err
write_message(msg)
submit_ticket_for_holding_pen(rec_id, err, "Missing 005. Inserting record into holding pen.")
insert_record_into_holding_pen(record, str(rec_id))
return (2, int(rec_id), msg)
else:
write_message(" - No 005 Tag Present. Resuming normal flow.", verbose=2)
# dictionaries to temporarily hold original recs tag-fields
existing_tags = {}
retained_tags = {}
# in case of delete operation affected tags should be deleted in delete_bibrec_bibxxx
# but should not be updated again in STAGE 4
# utilising the below flag
is_opt_mode_delete = False
if not revision_verified:
# either 005 was not present or opt_mode was not correct/replace
# in this case we still need to find out affected tags to process
write_message(" - Missing 005 or opt_mode!=Replace/Correct.Revision Verifier not called.", verbose=2)
# Identify affected tags
if opt_mode == 'correct' or opt_mode == 'replace' or opt_mode == 'replace_or_insert':
rec_diff = rev_verifier.compare_records(record, original_record, opt_mode)
affected_tags = rev_verifier.retrieve_affected_tags_with_ind(rec_diff)
elif opt_mode == 'delete':
# populate an intermediate dictionary
# used in upcoming step related to 'delete' mode
is_opt_mode_delete = True
for tag, fields in original_record.iteritems():
existing_tags[tag] = [tag + (field[1] != ' ' and field[1] or '_') + (field[2] != ' ' and field[2] or '_') for field in fields]
elif opt_mode == 'append':
for tag, fields in record.iteritems():
if tag not in CFG_BIBUPLOAD_CONTROLFIELD_TAGS:
affected_tags[tag]=[(field[1], field[2]) for field in fields]
# In Replace mode, take over old strong tags if applicable:
if opt_mode == 'replace' or \
opt_mode == 'replace_or_insert':
copy_strong_tags_from_old_record(record, rec_old)
# Delete tags to correct in the record
if opt_mode == 'correct':
delete_tags_to_correct(record, rec_old)
write_message(" -Delete the old tags to correct in the old record: DONE",
verbose=2)
# Delete tags specified if in delete mode
if opt_mode == 'delete':
record = delete_tags(record, rec_old)
for tag, fields in record.iteritems():
retained_tags[tag] = [tag + (field[1] != ' ' and field[1] or '_') + (field[2] != ' ' and field[2] or '_') for field in fields]
#identify the tags that have been deleted
for tag in existing_tags.keys():
if tag not in retained_tags:
for item in existing_tags[tag]:
tag_to_add = item[0:3]
ind1, ind2 = item[3], item[4]
if tag_to_add in affected_tags and (ind1, ind2) not in affected_tags[tag_to_add]:
affected_tags[tag_to_add].append((ind1, ind2))
else:
affected_tags[tag_to_add] = [(ind1, ind2)]
else:
deleted = list(set(existing_tags[tag]) - set(retained_tags[tag]))
for item in deleted:
tag_to_add = item[0:3]
ind1, ind2 = item[3], item[4]
if tag_to_add in affected_tags and (ind1, ind2) not in affected_tags[tag_to_add]:
affected_tags[tag_to_add].append((ind1, ind2))
else:
affected_tags[tag_to_add] = [(ind1, ind2)]
write_message(" -Delete specified tags in the old record: DONE", verbose=2)
# Append new tag to the old record and update the new record with the old_record modified
if opt_mode == 'append' or opt_mode == 'correct':
record = append_new_tag_to_old_record(record, rec_old)
write_message(" -Append new tags to the old record: DONE", verbose=2)
write_message(" -Affected Tags found after comparing upload and original records: %s"%(str(affected_tags)), verbose=2)
# 005 tag should be added everytime the record is modified
# If an exiting record is modified, its 005 tag should be overwritten with a new revision value
if record.has_key('005'):
record_delete_field(record, '005')
write_message(" Deleted the existing 005 tag.", verbose=2)
last_revision = run_sql("SELECT MAX(job_date) FROM hstRECORD WHERE id_bibrec=%s", (rec_id, ))[0][0]
if last_revision and last_revision.strftime("%Y%m%d%H%M%S.0") == now.strftime("%Y%m%d%H%M%S.0"):
## We are updating the same record within the same seconds! It's less than
## the minimal granularity. Let's pause for 1 more second to take a breath :-)
time.sleep(1)
now = datetime.now()
error = record_add_field(record, '005', controlfield_value=now.strftime("%Y%m%d%H%M%S.0"))
if error is None:
write_message(" Failed: Error during adding to 005 controlfield to record", verbose=1, stream=sys.stderr)
return (1, int(rec_id))
else:
error=None
write_message(lambda: " -Added tag 005: DONE. "+ str(record_get_field_value(record, '005', '', '')), verbose=2)
# adding 005 to affected tags will delete the existing 005 entry
# and update with the latest timestamp.
if '005' not in affected_tags:
affected_tags['005'] = [(' ', ' ')]
write_message(" -Stage COMPLETED", verbose=2)
record_deleted_p = False
try:
if not record_is_valid(record):
msg = "ERROR: record is not valid"
write_message(msg, verbose=1, stream=sys.stderr)
return (1, -1, msg)
# Have a look if we have FFT tags
write_message("Stage 2: Start (Process FFT tags if exist).", verbose=2)
record_had_FFT = False
if extract_tag_from_record(record, 'FFT') is not None:
record_had_FFT = True
if not writing_rights_p():
write_message(" Stage 2 failed: Error no rights to write fulltext files",
verbose=1, stream=sys.stderr)
task_update_status("ERROR")
sys.exit(1)
try:
record = elaborate_fft_tags(record, rec_id, opt_mode,
pretend=pretend, tmp_ids=tmp_ids,
tmp_vers=tmp_vers)
except Exception, e:
register_exception()
msg = " Stage 2 failed: Error while elaborating FFT tags: %s" % e
write_message(msg, verbose=1, stream=sys.stderr)
return (1, int(rec_id), msg)
if record is None:
msg = " Stage 2 failed: Error while elaborating FFT tags"
write_message(msg, verbose=1, stream=sys.stderr)
return (1, int(rec_id), msg)
write_message(" -Stage COMPLETED", verbose=2)
else:
write_message(" -Stage NOT NEEDED", verbose=2)
# Have a look if we have FFT tags
write_message("Stage 2B: Start (Synchronize 8564 tags).", verbose=2)
if record_had_FFT or extract_tag_from_record(record, '856') is not None:
try:
record = synchronize_8564(rec_id, record, record_had_FFT, pretend=pretend)
# in case if FFT is in affected list make appropriate changes
if opt_mode is not 'insert': # because for insert, all tags are affected
if ('4', ' ') not in affected_tags.get('856', []):
if '856' not in affected_tags:
affected_tags['856'] = [('4', ' ')]
elif ('4', ' ') not in affected_tags['856']:
affected_tags['856'].append(('4', ' '))
write_message(" -Modified field list updated with FFT details: %s" % str(affected_tags), verbose=2)
except Exception, e:
register_exception(alert_admin=True)
msg = " Stage 2B failed: Error while synchronizing 8564 tags: %s" % e
write_message(msg, verbose=1, stream=sys.stderr)
return (1, int(rec_id), msg)
if record is None:
msg = " Stage 2B failed: Error while synchronizing 8564 tags"
write_message(msg, verbose=1, stream=sys.stderr)
return (1, int(rec_id), msg)
write_message(" -Stage COMPLETED", verbose=2)
else:
write_message(" -Stage NOT NEEDED", verbose=2)
write_message("Stage 3: Start (Apply fields deletion requests).", verbose=2)
write_message(lambda: " Record before deletion:\n%s" % record_xml_output(record), verbose=9)
# remove fields with __DELETE_FIELDS__
# NOTE:creating a temporary deep copy of record for iteration to avoid RunTimeError
# RuntimeError due to change in dictionary size during iteration
tmp_rec = copy.deepcopy(record)
for tag in tmp_rec:
for data_tuple in record[tag]:
if (CFG_BIBUPLOAD_DELETE_CODE, CFG_BIBUPLOAD_DELETE_VALUE) in data_tuple[0]:
# delete the tag with particular indicator pairs from original record
record_delete_field(record, tag, data_tuple[1], data_tuple[2])
write_message(lambda: " Record after cleaning up fields to be deleted:\n%s" % record_xml_output(record), verbose=9)
# Update of the BibFmt
write_message("Stage 4: Start (Update bibfmt).", verbose=2)
updates_exist = not records_identical(record, original_record)
if updates_exist:
# if record_had_altered_bit, this must be set to true, since the
# record has been altered.
if record_had_altered_bit:
oai_provenance_fields = record_get_field_instances(record, CFG_BIBUPLOAD_EXTERNAL_OAIID_TAG[:3], CFG_BIBUPLOAD_EXTERNAL_OAIID_TAG[3], CFG_BIBUPLOAD_EXTERNAL_OAIID_TAG[4])
for oai_provenance_field in oai_provenance_fields:
for i, (code, dummy_value) in enumerate(oai_provenance_field[0]):
if code == CFG_OAI_PROVENANCE_ALTERED_SUBFIELD:
oai_provenance_field[0][i] = (code, 'true')
tmp_indicators = (CFG_BIBUPLOAD_EXTERNAL_OAIID_TAG[3], CFG_BIBUPLOAD_EXTERNAL_OAIID_TAG[4])
if tmp_indicators not in affected_tags.get(CFG_BIBUPLOAD_EXTERNAL_OAIID_TAG[:3], []):
if CFG_BIBUPLOAD_EXTERNAL_OAIID_TAG[:3] not in affected_tags:
affected_tags[CFG_BIBUPLOAD_EXTERNAL_OAIID_TAG[:3]] = [tmp_indicators]
else:
affected_tags[CFG_BIBUPLOAD_EXTERNAL_OAIID_TAG[:3]].append(tmp_indicators)
write_message(lambda: " Updates exists:\n%s\n!=\n%s" % (record, original_record), verbose=9)
# format the single record as xml
rec_xml_new = record_xml_output(record)
# Update bibfmt with the format xm of this record
modification_date = time.strftime('%Y-%m-%d %H:%M:%S', time.strptime(record_get_field_value(record, '005'), '%Y%m%d%H%M%S.0'))
error = update_bibfmt_format(rec_id, rec_xml_new, 'xm', modification_date, pretend=pretend)
if error == 1:
msg = " Failed: error during update_bibfmt_format 'xm'"
write_message(msg, verbose=1, stream=sys.stderr)
return (1, int(rec_id), msg)
if CFG_BIBUPLOAD_SERIALIZE_RECORD_STRUCTURE:
error = update_bibfmt_format(rec_id, marshal.dumps(record), 'recstruct', modification_date, pretend=pretend)
if error == 1:
msg = " Failed: error during update_bibfmt_format 'recstruct'"
write_message(msg, verbose=1, stream=sys.stderr)
return (1, int(rec_id), msg)
if not CFG_BIBUPLOAD_DISABLE_RECORD_REVISIONS:
# archive MARCXML format of this record for version history purposes:
error = archive_marcxml_for_history(rec_id, affected_fields=affected_tags, pretend=pretend)
if error == 1:
msg = " Failed to archive MARCXML for history"
write_message(msg, verbose=1, stream=sys.stderr)
return (1, int(rec_id), msg)
else:
write_message(" -Archived MARCXML for history: DONE", verbose=2)
# delete some formats like HB upon record change:
if updates_exist or record_had_FFT:
for format_to_delete in CFG_BIBUPLOAD_DELETE_FORMATS:
try:
delete_bibfmt_format(rec_id, format_to_delete, pretend=pretend)
except:
# OK, some formats like HB could not have been deleted, no big deal
pass
write_message(" -Stage COMPLETED", verbose=2)
## Let's assert that one and only one 005 tag is existing at this stage.
assert len(record['005']) == 1
# Update the database MetaData
write_message("Stage 5: Start (Update the database with the metadata).",
verbose=2)
if insert_mode_p:
update_database_with_metadata(record, rec_id, oai_rec_id, pretend=pretend)
elif opt_mode in ('replace', 'replace_or_insert',
'append', 'correct', 'delete') and updates_exist:
# now we clear all the rows from bibrec_bibxxx from the old
record_deleted_p = True
delete_bibrec_bibxxx(rec_old, rec_id, affected_tags, pretend=pretend)
# metadata update will insert tags that are available in affected_tags.
# but for delete, once the tags have been deleted from bibrec_bibxxx, they dont have to be inserted
# except for 005.
if is_opt_mode_delete:
tmp_affected_tags = copy.deepcopy(affected_tags)
for tag in tmp_affected_tags:
if tag != '005':
affected_tags.pop(tag)
write_message(" -Clean bibrec_bibxxx: DONE", verbose=2)
update_database_with_metadata(record, rec_id, oai_rec_id, affected_tags, pretend=pretend)
else:
write_message(" -Stage NOT NEEDED in mode %s" % opt_mode,
verbose=2)
write_message(" -Stage COMPLETED", verbose=2)
record_deleted_p = False
# Finally we update the bibrec table with the current date
write_message("Stage 6: Start (Update bibrec table with current date).",
verbose=2)
if opt_notimechange == 0 and (updates_exist or record_had_FFT):
bibrec_now = convert_datestruct_to_datetext(time.localtime())
write_message(" -Retrieved current localtime: DONE", verbose=2)
update_bibrec_date(bibrec_now, rec_id, insert_mode_p, pretend=pretend)
write_message(" -Stage COMPLETED", verbose=2)
else:
write_message(" -Stage NOT NEEDED", verbose=2)
# Increase statistics
if insert_mode_p:
stat['nb_records_inserted'] += 1
else:
stat['nb_records_updated'] += 1
# Upload of this record finish
write_message("Record "+str(rec_id)+" DONE", verbose=1)
return (0, int(rec_id), "")
finally:
if record_deleted_p:
## BibUpload has failed living the record deleted. We should
## back the original record then.
update_database_with_metadata(original_record, rec_id, oai_rec_id, pretend=pretend)
write_message(" Restored original record", verbose=1, stream=sys.stderr)
def record_is_valid(record):
"""
Check if the record is valid. Currently this simply checks if the record
has exactly one rec_id.
@param record: the record
@type record: recstruct
@return: True if the record is valid
@rtype: bool
"""
rec_ids = record_get_field_values(record, tag="001")
if len(rec_ids) != 1:
write_message(" The record is not valid: it has not a single rec_id: %s" % (rec_ids), stream=sys.stderr)
return False
return True
def find_record_ids_by_oai_id(oaiId):
"""
A method finding the records identifier provided the oai identifier
returns a list of identifiers matching a given oai identifier
"""
# Is this record already in invenio (matching by oaiid)
if oaiId:
recids = search_pattern(p=oaiId, f=CFG_BIBUPLOAD_EXTERNAL_OAIID_TAG, m='e')
# Is this record already in invenio (matching by reportnumber i.e.
# particularly 037. Idea: to avoid doubbles insertions)
repnumber = oaiId.split(":")[-1]
if repnumber:
recids |= search_pattern(p = repnumber,
f = "reportnumber",
m = 'e' )
# Is this record already in invenio (matching by reportnumber i.e.
# particularly 037. Idea: to avoid double insertions)
repnumber = "arXiv:" + oaiId.split(":")[-1]
recids |= search_pattern(p = repnumber,
f = "reportnumber",
m = 'e' )
return recids
else:
return intbitset()
def bibupload_post_phase(record, mode=None, rec_id="", pretend=False,
tmp_ids=None, tmp_vers=None):
def _elaborate_tag(record, tag, fun):
if extract_tag_from_record(record, tag) is not None:
try:
record = fun()
except Exception, e:
register_exception()
write_message(" Stage failed: Error while elaborating %s tags: %s" % (tag, e),
verbose=1, stream=sys.stderr)
return (1, int(rec_id)) # TODO: ?
if record is None:
write_message(" Stage failed: Error while elaborating %s tags" % (tag, ),
verbose=1, stream=sys.stderr)
return (1, int(rec_id))
write_message(" -Stage COMPLETED", verbose=2)
else:
write_message(" -Stage NOT NEEDED", verbose=2)
if tmp_ids is None:
tmp_ids = {}
if tmp_vers is None:
tmp_vers = {}
_elaborate_tag(record, "BDR", lambda: elaborate_brt_tags(record, rec_id = rec_id,
mode = mode,
pretend = pretend,
tmp_ids = tmp_ids,
tmp_vers = tmp_vers))
_elaborate_tag(record, "BDM", lambda: elaborate_mit_tags(record, rec_id = rec_id,
mode = mode,
pretend = pretend,
tmp_ids = tmp_ids,
tmp_vers = tmp_vers))
def submit_ticket_for_holding_pen(rec_id, err, msg):
"""
Submit a ticket via BibCatalog to report about a record that has been put
into the Holding Pen.
@rec_id: the affected record
@err: the corresponding Exception
msg: verbose message
"""
from invenio import bibtask
from invenio.webuser import get_email_from_username, get_uid_from_email
user = task_get_task_param("user")
uid = None
if user:
try:
uid = get_uid_from_email(get_email_from_username(user))
except Exception, err:
write_message("WARNING: can't reliably retrieve uid for user %s: %s" % (user, err), stream=sys.stderr)
if check_bibcatalog():
text = """
%(msg)s found for record %(rec_id)s: %(err)s
See: <%(siteurl)s/record/edit/#state=edit&recid=%(rec_id)s>
BibUpload task information:
task_id: %(task_id)s
task_specific_name: %(task_specific_name)s
user: %(user)s
task_params: %(task_params)s
task_options: %(task_options)s""" % {
"msg": msg,
"rec_id": rec_id,
"err": err,
"siteurl": CFG_SITE_SECURE_URL,
"task_id": task_get_task_param("task_id"),
"task_specific_name": task_get_task_param("task_specific_name"),
"user": user,
"task_params": bibtask._TASK_PARAMS,
"task_options": bibtask._OPTIONS}
bibcatalog_system.ticket_submit(subject="%s: %s by %s" % (msg, rec_id, user), recordid=rec_id, text=text, queue=CFG_BIBUPLOAD_CONFLICTING_REVISION_TICKET_QUEUE, owner=uid)
def insert_record_into_holding_pen(record, oai_id, pretend=False):
query = "INSERT INTO bibHOLDINGPEN (oai_id, changeset_date, changeset_xml, id_bibrec) VALUES (%s, NOW(), %s, %s)"
xml_record = record_xml_output(record)
bibrec_ids = find_record_ids_by_oai_id(oai_id) # here determining the identifier of the record
if len(bibrec_ids) > 0:
bibrec_id = bibrec_ids.pop()
else:
# id not found by using the oai_id, let's use a wider search based
# on any information we might have.
bibrec_id = retrieve_rec_id(record, 'holdingpen', pretend=pretend)
if bibrec_id is None:
bibrec_id = 0
if not pretend:
run_sql(query, (oai_id, xml_record, bibrec_id))
# record_id is logged as 0! ( We are not inserting into the main database)
log_record_uploading(oai_id, task_get_task_param('task_id', 0), 0, 'H', pretend=pretend)
stat['nb_holdingpen'] += 1
def print_out_bibupload_statistics():
"""Print the statistics of the process"""
out = "Task stats: %(nb_input)d input records, %(nb_updated)d updated, " \
"%(nb_inserted)d inserted, %(nb_errors)d errors, %(nb_holdingpen)d inserted to holding pen. " \
"Time %(nb_sec).2f sec." % { \
'nb_input': stat['nb_records_to_upload'],
'nb_updated': stat['nb_records_updated'],
'nb_inserted': stat['nb_records_inserted'],
'nb_errors': stat['nb_errors'],
'nb_holdingpen': stat['nb_holdingpen'],
'nb_sec': time.time() - time.mktime(stat['exectime']) }
write_message(out)
def open_marc_file(path):
"""Open a file and return the data"""
try:
# open the file containing the marc document
marc_file = open(path, 'r')
marc = marc_file.read()
marc_file.close()
except IOError, erro:
write_message("Error: %s" % erro, verbose=1, stream=sys.stderr)
write_message("Exiting.", sys.stderr)
if erro.errno == 2:
# No such file or directory
# Not scary
task_update_status("CERROR")
else:
task_update_status("ERROR")
sys.exit(1)
return marc
def xml_marc_to_records(xml_marc):
"""create the records"""
# Creation of the records from the xml Marc in argument
recs = create_records(xml_marc, 1, 1)
if recs == []:
write_message("Error: Cannot parse MARCXML file.", verbose=1, stream=sys.stderr)
write_message("Exiting.", sys.stderr)
task_update_status("ERROR")
sys.exit(1)
elif recs[0][0] is None:
write_message("Error: MARCXML file has wrong format: %s" % recs,
verbose=1, stream=sys.stderr)
write_message("Exiting.", sys.stderr)
task_update_status("CERROR")
sys.exit(1)
else:
recs = map((lambda x:x[0]), recs)
return recs
def find_record_format(rec_id, bibformat):
"""Look whether record REC_ID is formatted in FORMAT,
i.e. whether FORMAT exists in the bibfmt table for this record.
Return the number of times it is formatted: 0 if not, 1 if yes,
2 if found more than once (should never occur).
"""
out = 0
query = """SELECT COUNT(*) FROM bibfmt WHERE id_bibrec=%s AND format=%s"""
params = (rec_id, bibformat)
res = []
res = run_sql(query, params)
out = res[0][0]
return out
def find_record_from_recid(rec_id):
"""
Try to find record in the database from the REC_ID number.
Return record ID if found, None otherwise.
"""
res = run_sql("SELECT id FROM bibrec WHERE id=%s",
(rec_id,))
if res:
return res[0][0]
else:
return None
def find_record_from_sysno(sysno):
"""
Try to find record in the database from the external SYSNO number.
Return record ID if found, None otherwise.
"""
bibxxx = 'bib'+CFG_BIBUPLOAD_EXTERNAL_SYSNO_TAG[0:2]+'x'
bibrec_bibxxx = 'bibrec_' + bibxxx
res = run_sql("""SELECT bb.id_bibrec FROM %(bibrec_bibxxx)s AS bb,
%(bibxxx)s AS b WHERE b.tag=%%s AND b.value=%%s
AND bb.id_bibxxx=b.id""" % \
{'bibxxx': bibxxx,
'bibrec_bibxxx': bibrec_bibxxx},
(CFG_BIBUPLOAD_EXTERNAL_SYSNO_TAG, sysno,))
if res:
return res[0][0]
else:
return None
def find_records_from_extoaiid(extoaiid, extoaisrc=None):
"""
Try to find records in the database from the external EXTOAIID number.
Return list of record ID if found, None otherwise.
"""
assert(CFG_BIBUPLOAD_EXTERNAL_OAIID_TAG[:5] == CFG_BIBUPLOAD_EXTERNAL_OAIID_PROVENANCE_TAG[:5])
bibxxx = 'bib'+CFG_BIBUPLOAD_EXTERNAL_OAIID_TAG[0:2]+'x'
bibrec_bibxxx = 'bibrec_' + bibxxx
write_message(' Looking for extoaiid="%s" with extoaisrc="%s"' % (extoaiid, extoaisrc), verbose=9)
id_bibrecs = intbitset(run_sql("""SELECT bb.id_bibrec FROM %(bibrec_bibxxx)s AS bb,
%(bibxxx)s AS b WHERE b.tag=%%s AND b.value=%%s
AND bb.id_bibxxx=b.id""" % \
{'bibxxx': bibxxx,
'bibrec_bibxxx': bibrec_bibxxx},
(CFG_BIBUPLOAD_EXTERNAL_OAIID_TAG, extoaiid,)))
write_message(' Partially found %s for extoaiid="%s"' % (id_bibrecs, extoaiid), verbose=9)
ret = intbitset()
for id_bibrec in id_bibrecs:
record = get_record(id_bibrec)
instances = record_get_field_instances(record, CFG_BIBUPLOAD_EXTERNAL_OAIID_TAG[0:3], CFG_BIBUPLOAD_EXTERNAL_OAIID_TAG[3], CFG_BIBUPLOAD_EXTERNAL_OAIID_TAG[4])
write_message(' recid %s -> instances "%s"' % (id_bibrec, instances), verbose=9)
for instance in instances:
this_extoaisrc = field_get_subfield_values(instance, CFG_BIBUPLOAD_EXTERNAL_OAIID_PROVENANCE_TAG[5])
this_extoaisrc = this_extoaisrc and this_extoaisrc[0] or None
this_extoaiid = field_get_subfield_values(instance, CFG_BIBUPLOAD_EXTERNAL_OAIID_TAG[5])
this_extoaiid = this_extoaiid and this_extoaiid[0] or None
write_message(" this_extoaisrc -> %s, this_extoaiid -> %s" % (this_extoaisrc, this_extoaiid), verbose=9)
if this_extoaiid == extoaiid:
write_message(' recid %s -> provenance "%s"' % (id_bibrec, this_extoaisrc), verbose=9)
if this_extoaisrc == extoaisrc:
write_message('Found recid %s for extoaiid="%s" with provenance="%s"' % (id_bibrec, extoaiid, extoaisrc), verbose=9)
ret.add(id_bibrec)
break
if this_extoaisrc is None:
write_message('WARNING: Found recid %s for extoaiid="%s" that doesn\'t specify any provenance, while input record does.' % (id_bibrec, extoaiid), stream=sys.stderr)
if extoaisrc is None:
write_message('WARNING: Found recid %s for extoaiid="%s" that specify a provenance (%s), while input record does not have a provenance.' % (id_bibrec, extoaiid, this_extoaisrc), stream=sys.stderr)
return ret
def find_record_from_oaiid(oaiid):
"""
Try to find record in the database from the OAI ID number and OAI SRC.
Return record ID if found, None otherwise.
"""
bibxxx = 'bib'+CFG_OAI_ID_FIELD[0:2]+'x'
bibrec_bibxxx = 'bibrec_' + bibxxx
res = run_sql("""SELECT bb.id_bibrec FROM %(bibrec_bibxxx)s AS bb,
%(bibxxx)s AS b WHERE b.tag=%%s AND b.value=%%s
AND bb.id_bibxxx=b.id""" % \
{'bibxxx': bibxxx,
'bibrec_bibxxx': bibrec_bibxxx},
(CFG_OAI_ID_FIELD, oaiid,))
if res:
return res[0][0]
else:
return None
def find_record_from_doi(doi):
"""
Try to find record in the database from the given DOI.
Return record ID if found, None otherwise.
"""
bibxxx = 'bib02x'
bibrec_bibxxx = 'bibrec_' + bibxxx
res = run_sql("""SELECT bb.id_bibrec, bb.field_number
FROM %(bibrec_bibxxx)s AS bb, %(bibxxx)s AS b
WHERE b.tag=%%s AND b.value=%%s
AND bb.id_bibxxx=b.id""" % \
{'bibxxx': bibxxx,
'bibrec_bibxxx': bibrec_bibxxx},
('0247_a', doi,))
# For each of the result, make sure that it is really tagged as doi
for (id_bibrec, field_number) in res:
res = run_sql("""SELECT bb.id_bibrec
FROM %(bibrec_bibxxx)s AS bb, %(bibxxx)s AS b
WHERE b.tag=%%s AND b.value=%%s
AND bb.id_bibxxx=b.id and bb.field_number=%%s and bb.id_bibrec=%%s""" % \
{'bibxxx': bibxxx,
'bibrec_bibxxx': bibrec_bibxxx},
('0247_2', "doi", field_number, id_bibrec))
if res and res[0][0] == id_bibrec:
return res[0][0]
return None
def extract_tag_from_record(record, tag_number):
""" Extract the tag_number for record."""
# first step verify if the record is not already in the database
if record:
return record.get(tag_number, None)
return None
def retrieve_rec_id(record, opt_mode, pretend=False, post_phase = False):
"""Retrieve the record Id from a record by using tag 001 or SYSNO or OAI ID or DOI
tag. opt_mod is the desired mode.
@param post_phase Tells if we are calling this method in the postprocessing phase. If true, we accept presence of 001 fields even in the insert mode
@type post_phase boolean
"""
rec_id = None
# 1st step: we look for the tag 001
tag_001 = extract_tag_from_record(record, '001')
if tag_001 is not None:
# We extract the record ID from the tag
rec_id = tag_001[0][3]
# if we are in insert mode => error
if opt_mode == 'insert' and not post_phase:
write_message(" Failed: tag 001 found in the xml" \
" submitted, you should use the option replace," \
" correct or append to replace an existing" \
" record. (-h for help)",
verbose=1, stream=sys.stderr)
return -1
else:
# we found the rec id and we are not in insert mode => continue
# we try to match rec_id against the database:
if find_record_from_recid(rec_id) is not None:
# okay, 001 corresponds to some known record
return int(rec_id)
elif opt_mode in ('replace', 'replace_or_insert'):
if task_get_option('force'):
# we found the rec_id but it's not in the system and we are
# requested to replace records. Therefore we create on the fly
# a empty record allocating the recid.
write_message(" Warning: tag 001 found in the xml with"
" value %(rec_id)s, but rec_id %(rec_id)s does"
" not exist. Since the mode replace was"
" requested the rec_id %(rec_id)s is allocated"
" on-the-fly." % {"rec_id": rec_id},
stream=sys.stderr)
return create_new_record(rec_id=rec_id, pretend=pretend)
else:
# Since --force was not used we are going to raise an error
write_message(" Failed: tag 001 found in the xml"
" submitted with value %(rec_id)s. The"
" corresponding record however does not"
" exists. If you want to really create"
" such record, please use the --force"
" parameter when calling bibupload." % {
"rec_id": rec_id}, stream=sys.stderr)
return -1
else:
# The record doesn't exist yet. We shall have try to check
# the SYSNO or OAI or DOI id later.
write_message(" -Tag 001 value not found in database.",
verbose=9)
rec_id = None
else:
write_message(" -Tag 001 not found in the xml marc file.", verbose=9)
if rec_id is None:
# 2nd step we look for the SYSNO
sysnos = record_get_field_values(record,
CFG_BIBUPLOAD_EXTERNAL_SYSNO_TAG[0:3],
CFG_BIBUPLOAD_EXTERNAL_SYSNO_TAG[3:4] != "_" and \
CFG_BIBUPLOAD_EXTERNAL_SYSNO_TAG[3:4] or "",
CFG_BIBUPLOAD_EXTERNAL_SYSNO_TAG[4:5] != "_" and \
CFG_BIBUPLOAD_EXTERNAL_SYSNO_TAG[4:5] or "",
CFG_BIBUPLOAD_EXTERNAL_SYSNO_TAG[5:6])
if sysnos:
sysno = sysnos[0] # there should be only one external SYSNO
write_message(" -Checking if SYSNO " + sysno + \
" exists in the database", verbose=9)
# try to find the corresponding rec id from the database
rec_id = find_record_from_sysno(sysno)
if rec_id is not None:
# rec_id found
pass
else:
# The record doesn't exist yet. We will try to check
# external and internal OAI ids later.
write_message(" -Tag SYSNO value not found in database.",
verbose=9)
rec_id = None
else:
write_message(" -Tag SYSNO not found in the xml marc file.",
verbose=9)
if rec_id is None:
# 2nd step we look for the external OAIID
extoai_fields = record_get_field_instances(record,
CFG_BIBUPLOAD_EXTERNAL_OAIID_TAG[0:3],
CFG_BIBUPLOAD_EXTERNAL_OAIID_TAG[3:4] != "_" and \
CFG_BIBUPLOAD_EXTERNAL_OAIID_TAG[3:4] or "",
CFG_BIBUPLOAD_EXTERNAL_OAIID_TAG[4:5] != "_" and \
CFG_BIBUPLOAD_EXTERNAL_OAIID_TAG[4:5] or "")
if extoai_fields:
for field in extoai_fields:
extoaiid = field_get_subfield_values(field, CFG_BIBUPLOAD_EXTERNAL_OAIID_TAG[5:6])
extoaisrc = field_get_subfield_values(field, CFG_BIBUPLOAD_EXTERNAL_OAIID_PROVENANCE_TAG[5:6])
if extoaiid:
extoaiid = extoaiid[0]
if extoaisrc:
extoaisrc = extoaisrc[0]
else:
extoaisrc = None
write_message(" -Checking if EXTOAIID %s (%s) exists in the database" % (extoaiid, extoaisrc), verbose=9)
# try to find the corresponding rec id from the database
rec_ids = find_records_from_extoaiid(extoaiid, extoaisrc)
if rec_ids:
# rec_id found
rec_id = rec_ids.pop()
break
else:
# The record doesn't exist yet. We will try to check
# OAI id later.
write_message(" -Tag EXTOAIID value not found in database.",
verbose=9)
rec_id = None
else:
write_message(" -Tag EXTOAIID not found in the xml marc file.", verbose=9)
if rec_id is None:
# 4th step we look for the OAI ID
oaiidvalues = record_get_field_values(record,
CFG_OAI_ID_FIELD[0:3],
CFG_OAI_ID_FIELD[3:4] != "_" and \
CFG_OAI_ID_FIELD[3:4] or "",
CFG_OAI_ID_FIELD[4:5] != "_" and \
CFG_OAI_ID_FIELD[4:5] or "",
CFG_OAI_ID_FIELD[5:6])
if oaiidvalues:
oaiid = oaiidvalues[0] # there should be only one OAI ID
write_message(" -Check if local OAI ID " + oaiid + \
" exist in the database", verbose=9)
# try to find the corresponding rec id from the database
rec_id = find_record_from_oaiid(oaiid)
if rec_id is not None:
# rec_id found
pass
else:
write_message(" -Tag OAI ID value not found in database.",
verbose=9)
rec_id = None
else:
write_message(" -Tag SYSNO not found in the xml marc file.",
verbose=9)
if rec_id is None:
# 5th step we look for the DOI.
record_dois = record_extract_dois(record)
matching_recids = set()
if record_dois:
# try to find the corresponding rec id from the database
for record_doi in record_dois:
possible_recid = find_record_from_doi(record_doi)
if possible_recid:
matching_recids.add(possible_recid)
if len(matching_recids) > 1:
# Oops, this record refers to DOI existing in multiple records.
# Dunno which one to choose.
write_message(" Failed: Multiple records found in the" \
" database %s that match the DOI(s) in the input" \
" MARCXML %s" % (repr(matching_recids), repr(record_dois)),
verbose=1, stream=sys.stderr)
return -1
elif len(matching_recids) == 1:
rec_id = matching_recids.pop()
if opt_mode == 'insert':
write_message(" Failed: DOI tag matching record #%s found in the xml" \
" submitted, you should use the option replace," \
" correct or append to replace an existing" \
" record. (-h for help)" % rec_id,
verbose=1, stream=sys.stderr)
return -1
else:
write_message(" - Tag DOI value not found in database.",
verbose=9)
rec_id = None
else:
write_message(" -Tag DOI not found in the xml marc file.",
verbose=9)
# Now we should have detected rec_id from SYSNO or OAIID
# tags. (None otherwise.)
if rec_id:
if opt_mode == 'insert':
write_message(" Failed: Record found in the database," \
" you should use the option replace," \
" correct or append to replace an existing" \
" record. (-h for help)",
verbose=1, stream=sys.stderr)
return -1
else:
if opt_mode != 'insert' and \
opt_mode != 'replace_or_insert':
write_message(" Failed: Record not found in the database."\
" Please insert the file before updating it."\
" (-h for help)", verbose=1, stream=sys.stderr)
return -1
return rec_id and int(rec_id) or None
def check_record_doi_is_unique(rec_id, record):
"""
Check that DOI found in 'record' does not exist in any other
record than 'recid'.
Return (boolean, msg) where 'boolean' would be True if the DOI is
unique.
"""
record_dois = record_extract_dois(record)
if record_dois:
matching_recids = set()
for record_doi in record_dois:
possible_recid = find_record_from_doi(record_doi)
if possible_recid:
matching_recids.add(possible_recid)
if len(matching_recids) > 1:
# Oops, this record refers to DOI existing in multiple records.
msg = " Failed: Multiple records found in the" \
" database %s that match the DOI(s) in the input" \
" MARCXML %s" % (repr(matching_recids), repr(record_dois))
return (False, msg)
elif len(matching_recids) == 1:
matching_recid = matching_recids.pop()
if str(matching_recid) != str(rec_id):
# Oops, this record refers to DOI existing in a different record.
msg = " Failed: DOI(s) %s found in this record (#%s)" \
" already exist(s) in another other record (#%s)" % \
(repr(record_dois), rec_id, matching_recid)
return (False, msg)
return (True, "")
### Insert functions
def create_new_record(rec_id=None, pretend=False):
"""
Create new record in the database
@param rec_id: if specified the new record will have this rec_id.
@type rec_id: int
@return: the allocated rec_id
@rtype: int
@note: in case of errors will be returned None
"""
if rec_id is not None:
try:
rec_id = int(rec_id)
except (ValueError, TypeError), error:
write_message(" Error during the creation_new_record function: %s "
% error, verbose=1, stream=sys.stderr)
return None
if run_sql("SELECT id FROM bibrec WHERE id=%s", (rec_id, )):
write_message(" Error during the creation_new_record function: the requested rec_id %s already exists." % rec_id)
return None
if pretend:
if rec_id:
return rec_id
else:
return run_sql("SELECT max(id)+1 FROM bibrec")[0][0]
if rec_id is not None:
return run_sql("INSERT INTO bibrec (id, creation_date, modification_date) VALUES (%s, NOW(), NOW())", (rec_id, ))
else:
return run_sql("INSERT INTO bibrec (creation_date, modification_date) VALUES (NOW(), NOW())")
def insert_bibfmt(id_bibrec, marc, bibformat, modification_date='1970-01-01 00:00:00', pretend=False):
"""Insert the format in the table bibfmt"""
# compress the marc value
pickled_marc = compress(marc)
try:
time.strptime(modification_date, "%Y-%m-%d %H:%M:%S")
except ValueError:
modification_date = '1970-01-01 00:00:00'
query = """INSERT LOW_PRIORITY INTO bibfmt (id_bibrec, format, last_updated, value)
VALUES (%s, %s, %s, %s)"""
if not pretend:
row_id = run_sql(query, (id_bibrec, bibformat, modification_date, pickled_marc))
return row_id
else:
return 1
def insert_record_bibxxx(tag, value, pretend=False):
"""Insert the record into bibxxx"""
# determine into which table one should insert the record
table_name = 'bib'+tag[0:2]+'x'
# check if the tag, value combination exists in the table
query = """SELECT id,value FROM %s """ % table_name
query += """ WHERE tag=%s AND value=%s"""
params = (tag, value)
res = None
res = run_sql(query, params)
# Note: compare now the found values one by one and look for
# string binary equality (e.g. to respect lowercase/uppercase
# match), regardless of the charset etc settings. Ideally we
# could use a BINARY operator in the above SELECT statement, but
# we would have to check compatibility on various MySQLdb versions
# etc; this approach checks all matched values in Python, not in
# MySQL, which is less cool, but more conservative, so it should
# work better on most setups.
if res:
for row in res:
row_id = row[0]
row_value = row[1]
if row_value == value:
return (table_name, row_id)
# We got here only when the tag, value combination was not found,
# so it is now necessary to insert the tag, value combination into
# bibxxx table as new.
query = """INSERT INTO %s """ % table_name
query += """ (tag, value) values (%s , %s)"""
params = (tag, value)
if not pretend:
row_id = run_sql(query, params)
else:
return (table_name, 1)
return (table_name, row_id)
def insert_record_bibrec_bibxxx(table_name, id_bibxxx,
field_number, id_bibrec, pretend=False):
"""Insert the record into bibrec_bibxxx"""
# determine into which table one should insert the record
full_table_name = 'bibrec_'+ table_name
# insert the proper row into the table
query = """INSERT INTO %s """ % full_table_name
query += """(id_bibrec,id_bibxxx, field_number) values (%s , %s, %s)"""
params = (id_bibrec, id_bibxxx, field_number)
if not pretend:
res = run_sql(query, params)
else:
return 1
return res
def synchronize_8564(rec_id, record, record_had_FFT, pretend=False):
"""
Synchronize 8564_ tags and BibDocFile tables.
This function directly manipulate the record parameter.
@type rec_id: positive integer
@param rec_id: the record identifier.
@param record: the record structure as created by bibrecord.create_record
@type record_had_FFT: boolean
@param record_had_FFT: True if the incoming bibuploaded-record used FFT
@return: the manipulated record (which is also modified as a side effect)
"""
def merge_marc_into_bibdocfile(field, pretend=False):
"""
Internal function that reads a single field and stores its content
in BibDocFile tables.
@param field: the 8564_ field containing a BibDocFile URL.
"""
write_message('Merging field: %s' % (field, ), verbose=9)
url = field_get_subfield_values(field, 'u')[:1] or field_get_subfield_values(field, 'q')[:1]
description = field_get_subfield_values(field, 'y')[:1]
comment = field_get_subfield_values(field, 'z')[:1]
if url:
recid, docname, docformat = decompose_bibdocfile_url(url[0])
if recid != rec_id:
write_message("INFO: URL %s is not pointing to a fulltext owned by this record (%s)" % (url, recid), stream=sys.stderr)
else:
try:
bibdoc = BibRecDocs(recid).get_bibdoc(docname)
if description and not pretend:
bibdoc.set_description(description[0], docformat)
if comment and not pretend:
bibdoc.set_comment(comment[0], docformat)
except InvenioBibDocFileError:
## Apparently the referenced docname doesn't exist anymore.
## Too bad. Let's skip it.
write_message("WARNING: docname %s does not seem to exist for record %s. Has it been renamed outside FFT?" % (docname, recid), stream=sys.stderr)
def merge_bibdocfile_into_marc(field, subfields):
"""
Internal function that reads BibDocFile table entries referenced by
the URL in the given 8564_ field and integrate the given information
directly with the provided subfields.
@param field: the 8564_ field containing a BibDocFile URL.
@param subfields: the subfields corresponding to the BibDocFile URL
generated after BibDocFile tables.
"""
write_message('Merging subfields %s into field %s' % (subfields, field), verbose=9)
subfields = dict(subfields) ## We make a copy not to have side-effects
subfield_to_delete = []
for subfield_position, (code, value) in enumerate(field_get_subfield_instances(field)):
## For each subfield instance already existing...
if code in subfields:
## ...We substitute it with what is in BibDocFile tables
record_modify_subfield(record, '856', code, subfields[code],
subfield_position, field_position_global=field[4])
del subfields[code]
else:
## ...We delete it otherwise
subfield_to_delete.append(subfield_position)
subfield_to_delete.sort()
for counter, position in enumerate(subfield_to_delete):
## FIXME: Very hackish algorithm. Since deleting a subfield
## will alterate the position of following subfields, we
## are taking note of this and adjusting further position
## by using a counter.
record_delete_subfield_from(record, '856', position - counter,
field_position_global=field[4])
subfields = subfields.items()
subfields.sort()
for code, value in subfields:
## Let's add non-previously existing subfields
record_add_subfield_into(record, '856', code, value,
field_position_global=field[4])
def get_bibdocfile_managed_info():
"""
Internal function, returns a dictionary of
BibDocFile URL -> wanna-be subfields.
This information is retrieved from internal BibDoc
structures rather than from input MARC XML files
@rtype: mapping
@return: BibDocFile URL -> wanna-be subfields dictionary
"""
ret = {}
bibrecdocs = BibRecDocs(rec_id)
latest_files = bibrecdocs.list_latest_files(list_hidden=False)
for afile in latest_files:
url = afile.get_url()
ret[url] = {'u': url}
description = afile.get_description()
comment = afile.get_comment()
subformat = afile.get_subformat()
if description:
ret[url]['y'] = description
if comment:
ret[url]['z'] = comment
if subformat:
ret[url]['x'] = subformat
return ret
write_message("Synchronizing MARC of recid '%s' with:\n%s" % (rec_id, record), verbose=9)
tags856s = record_get_field_instances(record, '856', '%', '%')
write_message("Original 856%% instances: %s" % tags856s, verbose=9)
tags8564s_to_add = get_bibdocfile_managed_info()
write_message("BibDocFile instances: %s" % tags8564s_to_add, verbose=9)
positions_tags8564s_to_remove = []
for local_position, field in enumerate(tags856s):
if field[1] == '4' and field[2] == ' ':
write_message('Analysing %s' % (field, ), verbose=9)
for url in field_get_subfield_values(field, 'u') + field_get_subfield_values(field, 'q'):
if url in tags8564s_to_add:
# there exists a link in the MARC of the record and the connection exists in BibDoc tables
if record_had_FFT:
merge_bibdocfile_into_marc(field, tags8564s_to_add[url])
else:
merge_marc_into_bibdocfile(field, pretend=pretend)
del tags8564s_to_add[url]
break
elif bibdocfile_url_p(url) and decompose_bibdocfile_url(url)[0] == rec_id:
# The link exists and is potentially correct-looking link to a document
# moreover, it refers to current record id ... but it does not exist in
# internal BibDoc structures. This could have happen in the case of renaming a document
# or its removal. In both cases we have to remove link... a new one will be created
positions_tags8564s_to_remove.append(local_position)
write_message("%s to be deleted and re-synchronized" % (field, ), verbose=9)
break
record_delete_fields(record, '856', positions_tags8564s_to_remove)
tags8564s_to_add = tags8564s_to_add.values()
tags8564s_to_add.sort()
for subfields in tags8564s_to_add:
subfields = subfields.items()
subfields.sort()
record_add_field(record, '856', '4', ' ', subfields=subfields)
write_message('Final record: %s' % record, verbose=9)
return record
def _get_subfield_value(field, subfield_code, default=None):
res = field_get_subfield_values(field, subfield_code)
if res != [] and res != None:
return res[0]
else:
return default
def elaborate_mit_tags(record, rec_id, mode, pretend = False, tmp_ids = {},
tmp_vers = {}):
"""
Uploading MoreInfo -> BDM tags
"""
tuple_list = extract_tag_from_record(record, 'BDM')
# Now gathering information from BDR tags - to be processed later
write_message("Processing BDM entries of the record ")
recordDocs = BibRecDocs(rec_id)
if tuple_list:
for mit in record_get_field_instances(record, 'BDM', ' ', ' '):
relation_id = _get_subfield_value(mit, "r")
bibdoc_id = _get_subfield_value(mit, "i")
# checking for a possibly temporary ID
if not (bibdoc_id is None):
bibdoc_id = resolve_identifier(tmp_ids, bibdoc_id)
bibdoc_ver = _get_subfield_value(mit, "v")
if not (bibdoc_ver is None):
bibdoc_ver = resolve_identifier(tmp_vers, bibdoc_ver)
bibdoc_name = _get_subfield_value(mit, "n")
bibdoc_fmt = _get_subfield_value(mit, "f")
moreinfo_str = _get_subfield_value(mit, "m")
if bibdoc_id == None:
if bibdoc_name == None:
raise StandardError("Incorrect relation. Neither name nor identifier of the first obejct has been specified")
else:
# retrieving the ID based on the document name (inside current record)
# The document is attached to current record.
try:
bibdoc_id = recordDocs.get_docid(bibdoc_name)
except:
raise StandardError("BibDoc of a name %s does not exist within a record" % (bibdoc_name, ))
else:
if bibdoc_name != None:
write_message("Warning: both name and id of the first document of a relation have been specified. Ignoring the name")
if (moreinfo_str is None or mode in ("replace", "correct")) and (not pretend):
MoreInfo(docid=bibdoc_id , version = bibdoc_ver,
docformat = bibdoc_fmt, relation = relation_id).delete()
if (not moreinfo_str is None) and (not pretend):
MoreInfo.create_from_serialised(moreinfo_str,
docid=bibdoc_id,
version = bibdoc_ver,
docformat = bibdoc_fmt,
relation = relation_id)
return record
def elaborate_brt_tags(record, rec_id, mode, pretend=False, tmp_ids = {}, tmp_vers = {}):
"""
Process BDR tags describing relations between existing objects
"""
tuple_list = extract_tag_from_record(record, 'BDR')
# Now gathering information from BDR tags - to be processed later
relations_to_create = []
write_message("Processing BDR entries of the record ")
recordDocs = BibRecDocs(rec_id) #TODO: check what happens if there is no record yet ! Will the class represent an empty set?
if tuple_list:
for brt in record_get_field_instances(record, 'BDR', ' ', ' '):
relation_id = _get_subfield_value(brt, "r")
bibdoc1_id = None
bibdoc1_name = None
bibdoc1_ver = None
bibdoc1_fmt = None
bibdoc2_id = None
bibdoc2_name = None
bibdoc2_ver = None
bibdoc2_fmt = None
if not relation_id:
bibdoc1_id = _get_subfield_value(brt, "i")
bibdoc1_name = _get_subfield_value(brt, "n")
if bibdoc1_id == None:
if bibdoc1_name == None:
raise StandardError("Incorrect relation. Neither name nor identifier of the first obejct has been specified")
else:
# retrieving the ID based on the document name (inside current record)
# The document is attached to current record.
try:
bibdoc1_id = recordDocs.get_docid(bibdoc1_name)
except:
raise StandardError("BibDoc of a name %s does not exist within a record" % \
(bibdoc1_name, ))
else:
# resolving temporary identifier
bibdoc1_id = resolve_identifier(tmp_ids, bibdoc1_id)
if bibdoc1_name != None:
write_message("Warning: both name and id of the first document of a relation have been specified. Ignoring the name")
bibdoc1_ver = _get_subfield_value(brt, "v")
if not (bibdoc1_ver is None):
bibdoc1_ver = resolve_identifier(tmp_vers, bibdoc1_ver)
bibdoc1_fmt = _get_subfield_value(brt, "f")
bibdoc2_id = _get_subfield_value(brt, "j")
bibdoc2_name = _get_subfield_value(brt, "o")
if bibdoc2_id == None:
if bibdoc2_name == None:
raise StandardError("Incorrect relation. Neither name nor identifier of the second obejct has been specified")
else:
# retrieving the ID based on the document name (inside current record)
# The document is attached to current record.
try:
bibdoc2_id = recordDocs.get_docid(bibdoc2_name)
except:
raise StandardError("BibDoc of a name %s does not exist within a record" % (bibdoc2_name, ))
else:
bibdoc2_id = resolve_identifier(tmp_ids, bibdoc2_id)
if bibdoc2_name != None:
write_message("Warning: both name and id of the first document of a relation have been specified. Ignoring the name")
bibdoc2_ver = _get_subfield_value(brt, "w")
if not (bibdoc2_ver is None):
bibdoc2_ver = resolve_identifier(tmp_vers, bibdoc2_ver)
bibdoc2_fmt = _get_subfield_value(brt, "g")
control_command = _get_subfield_value(brt, "d")
relation_type = _get_subfield_value(brt, "t")
if not relation_type and not relation_id:
raise StandardError("The relation type must be specified")
more_info = _get_subfield_value(brt, "m")
# the relation id might be specified in the case of updating
# MoreInfo table instead of other fields
rel_obj = None
if not relation_id:
rels = BibRelation.get_relations(rel_type = relation_type,
bibdoc1_id = bibdoc1_id,
bibdoc2_id = bibdoc2_id,
bibdoc1_ver = bibdoc1_ver,
bibdoc2_ver = bibdoc2_ver,
bibdoc1_fmt = bibdoc1_fmt,
bibdoc2_fmt = bibdoc2_fmt)
if len(rels) > 0:
rel_obj = rels[0]
relation_id = rel_obj.id
else:
rel_obj = BibRelation(rel_id=relation_id)
relations_to_create.append((relation_id, bibdoc1_id, bibdoc1_ver,
bibdoc1_fmt, bibdoc2_id, bibdoc2_ver,
bibdoc2_fmt, relation_type, more_info,
rel_obj, control_command))
record_delete_field(record, 'BDR', ' ', ' ')
if mode in ("insert", "replace_or_insert", "append", "correct", "replace"):
# now creating relations between objects based on the data
if not pretend:
for (relation_id, bibdoc1_id, bibdoc1_ver, bibdoc1_fmt,
bibdoc2_id, bibdoc2_ver, bibdoc2_fmt, rel_type,
more_info, rel_obj, control_command) in relations_to_create:
if rel_obj == None:
rel_obj = BibRelation.create(bibdoc1_id = bibdoc1_id,
bibdoc1_ver = bibdoc1_ver,
bibdoc1_fmt = bibdoc1_fmt,
bibdoc2_id = bibdoc2_id,
bibdoc2_ver = bibdoc2_ver,
bibdoc2_fmt = bibdoc2_fmt,
rel_type = rel_type)
relation_id = rel_obj.id
if mode in ("replace"):
# Clearing existing MoreInfo content
rel_obj.get_more_info().delete()
if more_info:
MoreInfo.create_from_serialised(more_info, relation = relation_id)
if control_command == "DELETE":
rel_obj.delete()
else:
write_message("BDR tag is not processed in the %s mode" % (mode, ))
return record
def elaborate_fft_tags(record, rec_id, mode, pretend=False,
tmp_ids = {}, tmp_vers = {}):
"""
Process FFT tags that should contain $a with file pathes or URLs
to get the fulltext from. This function enriches record with
proper 8564 URL tags, downloads fulltext files and stores them
into var/data structure where appropriate.
CFG_BIBUPLOAD_WGET_SLEEP_TIME defines time to sleep in seconds in
between URL downloads.
Note: if an FFT tag contains multiple $a subfields, we upload them
into different 856 URL tags in the metadata. See regression test
case test_multiple_fft_insert_via_http().
"""
# Let's define some handy sub procedure.
def _add_new_format(bibdoc, url, docformat, docname, doctype, newname, description, comment, flags, modification_date, pretend=False):
"""Adds a new format for a given bibdoc. Returns True when everything's fine."""
write_message('Add new format to %s url: %s, format: %s, docname: %s, doctype: %s, newname: %s, description: %s, comment: %s, flags: %s, modification_date: %s' % (repr(bibdoc), url, docformat, docname, doctype, newname, description, comment, flags, modification_date), verbose=9)
try:
if not url: # Not requesting a new url. Just updating comment & description
return _update_description_and_comment(bibdoc, docname, docformat, description, comment, flags, pretend=pretend)
try:
if not pretend:
bibdoc.add_file_new_format(url, description=description, comment=comment, flags=flags, modification_date=modification_date)
except StandardError, e:
write_message("('%s', '%s', '%s', '%s', '%s', '%s', '%s', '%s', '%s') not inserted because format already exists (%s)." % (url, docformat, docname, doctype, newname, description, comment, flags, modification_date, e), stream=sys.stderr)
raise
except Exception, e:
write_message("Error in adding '%s' as a new format because of: %s" % (url, e), stream=sys.stderr)
raise
return True
def _add_new_version(bibdoc, url, docformat, docname, doctype, newname, description, comment, flags, modification_date, pretend=False):
"""Adds a new version for a given bibdoc. Returns True when everything's fine."""
write_message('Add new version to %s url: %s, format: %s, docname: %s, doctype: %s, newname: %s, description: %s, comment: %s, flags: %s' % (repr(bibdoc), url, docformat, docname, doctype, newname, description, comment, flags))
try:
if not url:
return _update_description_and_comment(bibdoc, docname, docformat, description, comment, flags, pretend=pretend)
try:
if not pretend:
bibdoc.add_file_new_version(url, description=description, comment=comment, flags=flags, modification_date=modification_date)
except StandardError, e:
write_message("('%s', '%s', '%s', '%s', '%s', '%s', '%s', '%s', '%s') not inserted because '%s'." % (url, docformat, docname, doctype, newname, description, comment, flags, modification_date, e), stream=sys.stderr)
raise
except Exception, e:
write_message("Error in adding '%s' as a new version because of: %s" % (url, e), stream=sys.stderr)
raise
return True
def _update_description_and_comment(bibdoc, docname, docformat, description, comment, flags, pretend=False):
"""Directly update comments and descriptions."""
write_message('Just updating description and comment for %s with format %s with description %s, comment %s and flags %s' % (docname, docformat, description, comment, flags), verbose=9)
try:
if not pretend:
bibdoc.set_description(description, docformat)
bibdoc.set_comment(comment, docformat)
for flag in CFG_BIBDOCFILE_AVAILABLE_FLAGS:
if flag in flags:
bibdoc.set_flag(flag, docformat)
else:
bibdoc.unset_flag(flag, docformat)
except StandardError, e:
write_message("('%s', '%s', '%s', '%s', '%s') description and comment not updated because '%s'." % (docname, docformat, description, comment, flags, e))
raise
return True
def _process_document_moreinfos(more_infos, docname, version, docformat, mode):
if not mode in ('correct', 'append', 'replace_or_insert', 'replace', 'correct', 'insert'):
print "exited because the mode is incorrect"
return
brd = BibRecDocs(rec_id)
docid = None
try:
docid = brd.get_docid(docname)
except:
raise StandardError("MoreInfo: No document of a given name associated with the record")
if not version:
# We have to retrieve the most recent version ...
version = brd.get_bibdoc(docname).get_latest_version()
doc_moreinfo_s, version_moreinfo_s, version_format_moreinfo_s, format_moreinfo_s = more_infos
if mode in ("replace", "replace_or_insert"):
if doc_moreinfo_s: #only if specified, otherwise do not touch
MoreInfo(docid = docid).delete()
if format_moreinfo_s: #only if specified... otherwise do not touch
MoreInfo(docid = docid, docformat = docformat).delete()
if not doc_moreinfo_s is None:
MoreInfo.create_from_serialised(ser_str = doc_moreinfo_s, docid = docid)
if not version_moreinfo_s is None:
MoreInfo.create_from_serialised(ser_str = version_moreinfo_s,
docid = docid, version = version)
if not version_format_moreinfo_s is None:
MoreInfo.create_from_serialised(ser_str = version_format_moreinfo_s,
docid = docid, version = version,
docformat = docformat)
if not format_moreinfo_s is None:
MoreInfo.create_from_serialised(ser_str = format_moreinfo_s,
docid = docid, docformat = docformat)
if mode == 'delete':
raise StandardError('FFT tag specified but bibupload executed in --delete mode')
tuple_list = extract_tag_from_record(record, 'FFT')
if tuple_list: # FFT Tags analysis
write_message("FFTs: "+str(tuple_list), verbose=9)
docs = {} # docnames and their data
for fft in record_get_field_instances(record, 'FFT', ' ', ' '):
# Very first, we retrieve the potentially temporary odentifiers...
#even if the rest fails, we should include them in teh dictionary
version = _get_subfield_value(fft, 'v', '')
# checking if version is temporary... if so, filling a different varaible
is_tmp_ver, bibdoc_tmpver = parse_identifier(version)
if is_tmp_ver:
version = None
else:
bibdoc_tmpver = None
if not version: #treating cases of empty string etc...
version = None
bibdoc_tmpid = field_get_subfield_values(fft, 'i')
if bibdoc_tmpid:
bibdoc_tmpid = bibdoc_tmpid[0]
else:
bibdoc_tmpid
is_tmp_id, bibdoc_tmpid = parse_identifier(bibdoc_tmpid)
if not is_tmp_id:
bibdoc_tmpid = None
# In the case of having temporary id's, we dont resolve them yet but signaklise that they have been used
# value -1 means that identifier has been declared but not assigned a value yet
if bibdoc_tmpid:
if bibdoc_tmpid in tmp_ids:
write_message("WARNING: the temporary identifier %s has been declared more than once. Ignoring the second occurance" % (bibdoc_tmpid, ))
else:
tmp_ids[bibdoc_tmpid] = -1
if bibdoc_tmpver:
if bibdoc_tmpver in tmp_vers:
write_message("WARNING: the temporary version identifier %s has been declared more than once. Ignoring the second occurance" % (bibdoc_tmpver, ))
else:
tmp_vers[bibdoc_tmpver] = -1
# Let's discover the type of the document
# This is a legacy field and will not be enforced any particular
# check on it.
doctype = _get_subfield_value(fft, 't', 'Main') #Default is Main
# Let's discover the url.
url = field_get_subfield_values(fft, 'a')
if url:
url = url[0]
try:
check_valid_url(url)
except StandardError, e:
raise StandardError, "fft '%s' specifies in $a a location ('%s') with problems: %s" % (fft, url, e)
else:
url = ''
#TODO: a lot of code can be compactified using similar syntax ... should be more readable on the longer scale
# maybe right side expressions look a bit cryptic, but the elaborate_fft function would be much clearer
if mode == 'correct' and doctype != 'FIX-MARC':
arg2 = ""
else:
arg2 = KEEP_OLD_VALUE
description = _get_subfield_value(fft, 'd', arg2)
# Let's discover the description
# description = field_get_subfield_values(fft, 'd')
# if description != []:
# description = description[0]
# else:
# if mode == 'correct' and doctype != 'FIX-MARC':
## If the user require to correct, and do not specify
## a description this means she really want to
## modify the description.
# description = ''
# else:
# description = KEEP_OLD_VALUE
# Let's discover the desired docname to be created/altered
name = field_get_subfield_values(fft, 'n')
if name:
## Let's remove undesired extensions
name = file_strip_ext(name[0] + '.pdf')
else:
if url:
name = get_docname_from_url(url)
elif mode != 'correct' and doctype != 'FIX-MARC':
raise StandardError, "Warning: fft '%s' doesn't specifies either a location in $a or a docname in $n" % str(fft)
else:
continue
# Let's discover the desired new docname in case we want to change it
newname = field_get_subfield_values(fft, 'm')
if newname:
newname = file_strip_ext(newname[0] + '.pdf')
else:
newname = name
# Let's discover the desired format
docformat = field_get_subfield_values(fft, 'f')
if docformat:
docformat = normalize_format(docformat[0])
else:
if url:
docformat = guess_format_from_url(url)
else:
docformat = ""
# Let's discover the icon
icon = field_get_subfield_values(fft, 'x')
if icon != []:
icon = icon[0]
if icon != KEEP_OLD_VALUE:
try:
check_valid_url(icon)
except StandardError, e:
raise StandardError, "fft '%s' specifies in $x an icon ('%s') with problems: %s" % (fft, icon, e)
else:
icon = ''
# Let's discover the comment
comment = field_get_subfield_values(fft, 'z')
if comment != []:
comment = comment[0]
else:
if mode == 'correct' and doctype != 'FIX-MARC':
## See comment on description
comment = ''
else:
comment = KEEP_OLD_VALUE
# Let's discover the restriction
restriction = field_get_subfield_values(fft, 'r')
if restriction != []:
restriction = restriction[0]
else:
if mode == 'correct' and doctype != 'FIX-MARC':
## See comment on description
restriction = ''
else:
restriction = KEEP_OLD_VALUE
document_moreinfo = _get_subfield_value(fft, 'w')
version_moreinfo = _get_subfield_value(fft, 'p')
version_format_moreinfo = _get_subfield_value(fft, 'b')
format_moreinfo = _get_subfield_value(fft, 'u')
# Let's discover the timestamp of the file (if any)
timestamp = field_get_subfield_values(fft, 's')
if timestamp:
try:
timestamp = datetime(*(time.strptime(timestamp[0], "%Y-%m-%d %H:%M:%S")[:6]))
except ValueError:
write_message('Warning: The timestamp is not in a good format, thus will be ignored. The format should be YYYY-MM-DD HH:MM:SS')
timestamp = ''
else:
timestamp = ''
flags = field_get_subfield_values(fft, 'o')
for flag in flags:
if flag not in CFG_BIBDOCFILE_AVAILABLE_FLAGS:
raise StandardError, "fft '%s' specifies a non available flag: %s" % (fft, flag)
if docs.has_key(name): # new format considered
(doctype2, newname2, restriction2, version2, urls, dummybibdoc_moreinfos2, dummybibdoc_tmpid2, dummybibdoc_tmpver2 ) = docs[name]
if doctype2 != doctype:
raise StandardError, "fft '%s' specifies a different doctype from previous fft with docname '%s'" % (str(fft), name)
if newname2 != newname:
raise StandardError, "fft '%s' specifies a different newname from previous fft with docname '%s'" % (str(fft), name)
if restriction2 != restriction:
raise StandardError, "fft '%s' specifies a different restriction from previous fft with docname '%s'" % (str(fft), name)
if version2 != version:
raise StandardError, "fft '%s' specifies a different version than the previous fft with docname '%s'" % (str(fft), name)
for (dummyurl2, format2, dummydescription2, dummycomment2, dummyflags2, dummytimestamp2) in urls:
if docformat == format2:
raise StandardError, "fft '%s' specifies a second file '%s' with the same format '%s' from previous fft with docname '%s'" % (str(fft), url, docformat, name)
if url or docformat:
urls.append((url, docformat, description, comment, flags, timestamp))
if icon:
urls.append((icon, icon[len(file_strip_ext(icon)):] + ';icon', description, comment, flags, timestamp))
else:
if url or docformat:
docs[name] = (doctype, newname, restriction, version, [(url, docformat, description, comment, flags, timestamp)], [document_moreinfo, version_moreinfo, version_format_moreinfo, format_moreinfo], bibdoc_tmpid, bibdoc_tmpver)
if icon:
docs[name][4].append((icon, icon[len(file_strip_ext(icon)):] + ';icon', description, comment, flags, timestamp))
elif icon:
docs[name] = (doctype, newname, restriction, version, [(icon, icon[len(file_strip_ext(icon)):] + ';icon', description, comment, flags, timestamp)], [document_moreinfo, version_moreinfo, version_format_moreinfo, format_moreinfo], bibdoc_tmpid, bibdoc_tmpver)
else:
docs[name] = (doctype, newname, restriction, version, [], [document_moreinfo, version_moreinfo, version_format_moreinfo, format_moreinfo], bibdoc_tmpid, bibdoc_tmpver)
write_message('Result of FFT analysis:\n\tDocs: %s' % (docs,), verbose=9)
# Let's remove all FFT tags
record_delete_field(record, 'FFT', ' ', ' ')
# Preprocessed data elaboration
bibrecdocs = BibRecDocs(rec_id)
## Let's pre-download all the URLs to see if, in case of mode 'correct' or 'append'
## we can avoid creating a new revision.
for docname, (doctype, newname, restriction, version, urls, more_infos, bibdoc_tmpid, bibdoc_tmpver ) in docs.items():
downloaded_urls = []
try:
bibdoc = bibrecdocs.get_bibdoc(docname)
except InvenioBibDocFileError:
## A bibdoc with the given docname does not exists.
## So there is no chance we are going to revise an existing
## format with an identical file :-)
bibdoc = None
new_revision_needed = False
for url, docformat, description, comment, flags, timestamp in urls:
if url:
try:
downloaded_url = download_url(url, docformat)
write_message("%s saved into %s" % (url, downloaded_url), verbose=9)
except Exception, err:
write_message("Error in downloading '%s' because of: %s" % (url, err), stream=sys.stderr)
raise
if mode == 'correct' and bibdoc is not None and not new_revision_needed:
downloaded_urls.append((downloaded_url, docformat, description, comment, flags, timestamp))
if not bibrecdocs.check_file_exists(downloaded_url, docformat):
new_revision_needed = True
else:
write_message("WARNING: %s is already attached to bibdoc %s for recid %s" % (url, docname, rec_id), stream=sys.stderr)
elif mode == 'append' and bibdoc is not None:
if not bibrecdocs.check_file_exists(downloaded_url, docformat):
downloaded_urls.append((downloaded_url, docformat, description, comment, flags, timestamp))
else:
write_message("WARNING: %s is already attached to bibdoc %s for recid %s" % (url, docname, rec_id), stream=sys.stderr)
else:
downloaded_urls.append((downloaded_url, docformat, description, comment, flags, timestamp))
else:
downloaded_urls.append(('', docformat, description, comment, flags, timestamp))
if mode == 'correct' and bibdoc is not None and not new_revision_needed:
## Since we don't need a new revision (because all the files
## that are being uploaded are different)
## we can simply remove the urls but keep the other information
write_message("No need to add a new revision for docname %s for recid %s" % (docname, rec_id), verbose=2)
docs[docname] = (doctype, newname, restriction, version, [('', docformat, description, comment, flags, timestamp) for (dummy, docformat, description, comment, flags, timestamp) in downloaded_urls], more_infos, bibdoc_tmpid, bibdoc_tmpver)
for downloaded_url, dummy, dummy, dummy, dummy, dummy in downloaded_urls:
## Let's free up some space :-)
if downloaded_url and os.path.exists(downloaded_url):
os.remove(downloaded_url)
else:
if downloaded_urls or mode != 'append':
docs[docname] = (doctype, newname, restriction, version, downloaded_urls, more_infos, bibdoc_tmpid, bibdoc_tmpver)
else:
## In case we are in append mode and there are no urls to append
## we discard the whole FFT
del docs[docname]
if mode == 'replace': # First we erase previous bibdocs
if not pretend:
for bibdoc in bibrecdocs.list_bibdocs():
bibdoc.delete()
bibrecdocs.build_bibdoc_list()
for docname, (doctype, newname, restriction, version, urls, more_infos, bibdoc_tmpid, bibdoc_tmpver) in docs.iteritems():
write_message("Elaborating olddocname: '%s', newdocname: '%s', doctype: '%s', restriction: '%s', urls: '%s', mode: '%s'" % (docname, newname, doctype, restriction, urls, mode), verbose=9)
if mode in ('insert', 'replace'): # new bibdocs, new docnames, new marc
if newname in bibrecdocs.get_bibdoc_names():
write_message("('%s', '%s') not inserted because docname already exists." % (newname, urls), stream=sys.stderr)
raise StandardError("('%s', '%s') not inserted because docname already exists." % (newname, urls), stream=sys.stderr)
try:
if not pretend:
bibdoc = bibrecdocs.add_bibdoc(doctype, newname)
bibdoc.set_status(restriction)
else:
bibdoc = None
except Exception, e:
write_message("('%s', '%s', '%s') not inserted because: '%s'." % (doctype, newname, urls, e), stream=sys.stderr)
raise e
for (url, docformat, description, comment, flags, timestamp) in urls:
assert(_add_new_format(bibdoc, url, docformat, docname, doctype, newname, description, comment, flags, timestamp, pretend=pretend))
elif mode == 'replace_or_insert': # to be thought as correct_or_insert
for bibdoc in bibrecdocs.list_bibdocs():
brd = BibRecDocs(rec_id)
dn = brd.get_docname(bibdoc.id)
if dn == docname:
if doctype not in ('PURGE', 'DELETE', 'EXPUNGE', 'REVERT', 'FIX-ALL', 'FIX-MARC', 'DELETE-FILE'):
if newname != docname:
try:
if not pretend:
bibrecdocs.change_name(newname = newname, docid = bibdoc.id)
## Let's refresh the list of bibdocs.
bibrecdocs.build_bibdoc_list()
except StandardError, e:
write_message(e, stream=sys.stderr)
raise
found_bibdoc = False
for bibdoc in bibrecdocs.list_bibdocs():
brd = BibRecDocs(rec_id)
dn = brd.get_docname(bibdoc.id)
if dn == newname:
found_bibdoc = True
if doctype == 'PURGE':
if not pretend:
bibdoc.purge()
elif doctype == 'DELETE':
if not pretend:
bibdoc.delete()
elif doctype == 'EXPUNGE':
if not pretend:
bibdoc.expunge()
elif doctype == 'FIX-ALL':
if not pretend:
bibrecdocs.fix(docname)
elif doctype == 'FIX-MARC':
pass
elif doctype == 'DELETE-FILE':
if urls:
for (url, docformat, description, comment, flags, timestamp) in urls:
if not pretend:
bibdoc.delete_file(docformat, version)
elif doctype == 'REVERT':
try:
if not pretend:
bibdoc.revert(version)
except Exception, e:
write_message('(%s, %s) not correctly reverted: %s' % (newname, version, e), stream=sys.stderr)
raise
else:
if restriction != KEEP_OLD_VALUE:
if not pretend:
bibdoc.set_status(restriction)
# Since the docname already existed we have to first
# bump the version by pushing the first new file
# then pushing the other files.
if urls:
(first_url, first_format, first_description, first_comment, first_flags, first_timestamp) = urls[0]
other_urls = urls[1:]
assert(_add_new_version(bibdoc, first_url, first_format, docname, doctype, newname, first_description, first_comment, first_flags, first_timestamp, pretend=pretend))
for (url, docformat, description, comment, flags, timestamp) in other_urls:
assert(_add_new_format(bibdoc, url, docformat, docname, doctype, newname, description, comment, flags, timestamp, pretend=pretend))
## Let's refresh the list of bibdocs.
bibrecdocs.build_bibdoc_list()
if not found_bibdoc:
if not pretend:
bibdoc = bibrecdocs.add_bibdoc(doctype, newname)
bibdoc.set_status(restriction)
for (url, docformat, description, comment, flags, timestamp) in urls:
assert(_add_new_format(bibdoc, url, docformat, docname, doctype, newname, description, comment, flags, timestamp))
elif mode == 'correct':
for bibdoc in bibrecdocs.list_bibdocs():
brd = BibRecDocs(rec_id)
dn = brd.get_docname(bibdoc.id)
if dn == docname:
if doctype not in ('PURGE', 'DELETE', 'EXPUNGE', 'REVERT', 'FIX-ALL', 'FIX-MARC', 'DELETE-FILE'):
if newname != docname:
try:
if not pretend:
bibrecdocs.change_name(docid = bibdoc.id, newname=newname)
## Let's refresh the list of bibdocs.
bibrecdocs.build_bibdoc_list()
except StandardError, e:
write_message('Error in renaming %s to %s: %s' % (docname, newname, e), stream=sys.stderr)
raise
found_bibdoc = False
for bibdoc in bibrecdocs.list_bibdocs():
brd = BibRecDocs(rec_id)
dn = brd.get_docname(bibdoc.id)
if dn == newname:
found_bibdoc = True
if doctype == 'PURGE':
if not pretend:
bibdoc.purge()
elif doctype == 'DELETE':
if not pretend:
bibdoc.delete()
elif doctype == 'EXPUNGE':
if not pretend:
bibdoc.expunge()
elif doctype == 'FIX-ALL':
if not pretend:
bibrecdocs.fix(newname)
elif doctype == 'FIX-MARC':
pass
elif doctype == 'DELETE-FILE':
if urls:
for (url, docformat, description, comment, flags, timestamp) in urls:
if not pretend:
bibdoc.delete_file(docformat, version)
elif doctype == 'REVERT':
try:
if not pretend:
bibdoc.revert(version)
except Exception, e:
write_message('(%s, %s) not correctly reverted: %s' % (newname, version, e), stream=sys.stderr)
raise
else:
if restriction != KEEP_OLD_VALUE:
if not pretend:
bibdoc.set_status(restriction)
if doctype and doctype!= KEEP_OLD_VALUE:
if not pretend:
bibdoc.change_doctype(doctype)
if urls:
(first_url, first_format, first_description, first_comment, first_flags, first_timestamp) = urls[0]
other_urls = urls[1:]
assert(_add_new_version(bibdoc, first_url, first_format, docname, doctype, newname, first_description, first_comment, first_flags, first_timestamp, pretend=pretend))
for (url, docformat, description, comment, flags, timestamp) in other_urls:
assert(_add_new_format(bibdoc, url, docformat, docname, doctype, newname, description, comment, flags, timestamp, pretend=pretend))
## Let's refresh the list of bibdocs.
bibrecdocs.build_bibdoc_list()
if not found_bibdoc:
if doctype in ('PURGE', 'DELETE', 'EXPUNGE', 'FIX-ALL', 'FIX-MARC', 'DELETE-FILE', 'REVERT'):
write_message("('%s', '%s', '%s') not performed because '%s' docname didn't existed." % (doctype, newname, urls, docname), stream=sys.stderr)
raise StandardError
else:
if not pretend:
bibdoc = bibrecdocs.add_bibdoc(doctype, newname)
bibdoc.set_status(restriction)
for (url, docformat, description, comment, flags, timestamp) in urls:
assert(_add_new_format(bibdoc, url, docformat, docname, doctype, newname, description, comment, flags, timestamp))
elif mode == 'append':
try:
found_bibdoc = False
for bibdoc in bibrecdocs.list_bibdocs():
brd = BibRecDocs(rec_id)
dn = brd.get_docname(bibdoc.id)
if dn == docname:
found_bibdoc = True
for (url, docformat, description, comment, flags, timestamp) in urls:
assert(_add_new_format(bibdoc, url, docformat, docname, doctype, newname, description, comment, flags, timestamp, pretend=pretend))
if not found_bibdoc:
try:
if not pretend:
bibdoc = bibrecdocs.add_bibdoc(doctype, docname)
bibdoc.set_status(restriction)
for (url, docformat, description, comment, flags, timestamp) in urls:
assert(_add_new_format(bibdoc, url, docformat, docname, doctype, newname, description, comment, flags, timestamp))
except Exception, e:
register_exception()
write_message("('%s', '%s', '%s') not appended because: '%s'." % (doctype, newname, urls, e), stream=sys.stderr)
raise
except:
register_exception()
raise
if not pretend:
_process_document_moreinfos(more_infos, newname, version, urls and urls[0][1], mode)
# resolving temporary version and identifier
brd = BibRecDocs(rec_id)
if bibdoc_tmpid:
if bibdoc_tmpid in tmp_ids and tmp_ids[bibdoc_tmpid] != -1:
write_message("WARNING: the temporary identifier %s has been declared more than once. Ignoring the second occurance" % (bibdoc_tmpid, ))
else:
tmp_ids[bibdoc_tmpid] = brd.get_docid(docname)
if bibdoc_tmpver:
if bibdoc_tmpver in tmp_vers and tmp_vers[bibdoc_tmpver] != -1:
write_message("WARNING: the temporary version identifier %s has been declared more than once. Ignoring the second occurance" % (bibdoc_tmpver, ))
else:
if version == None:
if version:
tmp_vers[bibdoc_tmpver] = version
else:
tmp_vers[bibdoc_tmpver] = brd.get_bibdoc(docname).get_latest_version()
else:
tmp_vers[bibdoc_tmpver] = version
return record
### Update functions
def update_bibrec_date(now, bibrec_id, insert_mode_p, pretend=False):
"""Update the date of the record in bibrec table """
if insert_mode_p:
query = """UPDATE bibrec SET creation_date=%s, modification_date=%s WHERE id=%s"""
params = (now, now, bibrec_id)
else:
query = """UPDATE bibrec SET modification_date=%s WHERE id=%s"""
params = (now, bibrec_id)
if not pretend:
run_sql(query, params)
write_message(" -Update record creation/modification date: DONE" , verbose=2)
def update_bibfmt_format(id_bibrec, format_value, format_name, modification_date=None, pretend=False):
"""Update the format in the table bibfmt"""
if modification_date is None:
modification_date = time.strftime('%Y-%m-%d %H:%M:%S')
else:
try:
time.strptime(modification_date, "%Y-%m-%d %H:%M:%S")
except ValueError:
modification_date = '1970-01-01 00:00:00'
# We check if the format is already in bibFmt
nb_found = find_record_format(id_bibrec, format_name)
if nb_found == 1:
# we are going to update the format
# compress the format_value value
pickled_format_value = compress(format_value)
# update the format:
query = """UPDATE LOW_PRIORITY bibfmt SET last_updated=%s, value=%s WHERE id_bibrec=%s AND format=%s"""
params = (modification_date, pickled_format_value, id_bibrec, format_name)
if not pretend:
row_id = run_sql(query, params)
if not pretend and row_id is None:
write_message(" Failed: Error during update_bibfmt_format function", verbose=1, stream=sys.stderr)
return 1
else:
write_message(" -Update the format %s in bibfmt: DONE" % format_name , verbose=2)
return 0
elif nb_found > 1:
write_message(" Failed: Same format %s found several time in bibfmt for the same record." % format_name, verbose=1, stream=sys.stderr)
return 1
else:
# Insert the format information in BibFMT
res = insert_bibfmt(id_bibrec, format_value, format_name, modification_date, pretend=pretend)
if res is None:
write_message(" Failed: Error during insert_bibfmt", verbose=1, stream=sys.stderr)
return 1
else:
write_message(" -Insert the format %s in bibfmt: DONE" % format_name , verbose=2)
return 0
def delete_bibfmt_format(id_bibrec, format_name, pretend=False):
"""
Delete format FORMAT_NAME from bibfmt table fo record ID_BIBREC.
"""
if not pretend:
run_sql("DELETE LOW_PRIORITY FROM bibfmt WHERE id_bibrec=%s and format=%s", (id_bibrec, format_name))
return 0
def archive_marcxml_for_history(recID, affected_fields, pretend=False):
"""
Archive current MARCXML format of record RECID from BIBFMT table
into hstRECORD table. Useful to keep MARCXML history of records.
Return 0 if everything went fine. Return 1 otherwise.
"""
res = run_sql("SELECT id_bibrec, value, last_updated FROM bibfmt WHERE format='xm' AND id_bibrec=%s",
(recID,))
db_affected_fields = ""
if affected_fields:
tmp_affected_fields = {}
for field in affected_fields:
if field.isdigit(): #hack for tags from RevisionVerifier
for ind in affected_fields[field]:
tmp_affected_fields[(field + ind[0] + ind[1] + "%").replace(" ", "_")] = 1
else:
pass #future implementation for fields
tmp_affected_fields = tmp_affected_fields.keys()
tmp_affected_fields.sort()
db_affected_fields = ",".join(tmp_affected_fields)
if res and not pretend:
run_sql("""INSERT INTO hstRECORD (id_bibrec, marcxml, job_id, job_name, job_person, job_date, job_details, affected_fields)
VALUES (%s,%s,%s,%s,%s,%s,%s,%s)""",
(res[0][0], res[0][1], task_get_task_param('task_id', 0), 'bibupload', task_get_task_param('user', 'UNKNOWN'), res[0][2],
'mode: ' + task_get_option('mode', 'UNKNOWN') + '; file: ' + task_get_option('file_path', 'UNKNOWN') + '.',
db_affected_fields))
return 0
def update_database_with_metadata(record, rec_id, oai_rec_id="oai", affected_tags=None, pretend=False):
"""Update the database tables with the record and the record id given in parameter"""
# extract only those tags that have been affected.
# check happens at subfield level. This is to prevent overhead
# associated with inserting already existing field with given ind pair
write_message("update_database_with_metadata: record=%s, rec_id=%s, oai_rec_id=%s, affected_tags=%s" % (record, rec_id, oai_rec_id, affected_tags), verbose=9)
tmp_record = {}
if affected_tags:
for tag in record.keys():
if tag in affected_tags.keys():
write_message(" -Tag %s found to be modified.Setting up for update" % tag, verbose=9)
# initialize new list to hold affected field
new_data_tuple_list = []
for data_tuple in record[tag]:
ind1 = data_tuple[1]
ind2 = data_tuple[2]
if (ind1, ind2) in affected_tags[tag]:
write_message(" -Indicator pair (%s, %s) added to update list" % (ind1, ind2), verbose=9)
new_data_tuple_list.append(data_tuple)
tmp_record[tag] = new_data_tuple_list
write_message(lambda: " -Modified fields: \n%s" % record_xml_output(tmp_record), verbose=2)
else:
tmp_record = record
for tag in tmp_record.keys():
# check if tag is not a special one:
if tag not in CFG_BIBUPLOAD_SPECIAL_TAGS:
# for each tag there is a list of tuples representing datafields
tuple_list = tmp_record[tag]
# this list should contain the elements of a full tag [tag, ind1, ind2, subfield_code]
tag_list = []
tag_list.append(tag)
for single_tuple in tuple_list:
# these are the contents of a single tuple
subfield_list = single_tuple[0]
ind1 = single_tuple[1]
ind2 = single_tuple[2]
# append the ind's to the full tag
if ind1 == '' or ind1 == ' ':
tag_list.append('_')
else:
tag_list.append(ind1)
if ind2 == '' or ind2 == ' ':
tag_list.append('_')
else:
tag_list.append(ind2)
datafield_number = single_tuple[4]
if tag in CFG_BIBUPLOAD_SPECIAL_TAGS:
# nothing to do for special tags (FFT, BDR, BDM)
pass
elif tag in CFG_BIBUPLOAD_CONTROLFIELD_TAGS and tag != "001":
value = single_tuple[3]
# get the full tag
full_tag = ''.join(tag_list)
# update the tables
write_message(" insertion of the tag "+full_tag+" with the value "+value, verbose=9)
# insert the tag and value into into bibxxx
(table_name, bibxxx_row_id) = insert_record_bibxxx(full_tag, value, pretend=pretend)
#print 'tname, bibrow', table_name, bibxxx_row_id;
if table_name is None or bibxxx_row_id is None:
write_message(" Failed: during insert_record_bibxxx", verbose=1, stream=sys.stderr)
# connect bibxxx and bibrec with the table bibrec_bibxxx
res = insert_record_bibrec_bibxxx(table_name, bibxxx_row_id, datafield_number, rec_id, pretend=pretend)
if res is None:
write_message(" Failed: during insert_record_bibrec_bibxxx", verbose=1, stream=sys.stderr)
else:
# get the tag and value from the content of each subfield
for subfield in subfield_list:
subtag = subfield[0]
value = subfield[1]
tag_list.append(subtag)
# get the full tag
full_tag = ''.join(tag_list)
# update the tables
write_message(" insertion of the tag "+full_tag+" with the value "+value, verbose=9)
# insert the tag and value into into bibxxx
(table_name, bibxxx_row_id) = insert_record_bibxxx(full_tag, value, pretend=pretend)
if table_name is None or bibxxx_row_id is None:
write_message(" Failed: during insert_record_bibxxx", verbose=1, stream=sys.stderr)
# connect bibxxx and bibrec with the table bibrec_bibxxx
res = insert_record_bibrec_bibxxx(table_name, bibxxx_row_id, datafield_number, rec_id, pretend=pretend)
if res is None:
write_message(" Failed: during insert_record_bibrec_bibxxx", verbose=1, stream=sys.stderr)
# remove the subtag from the list
tag_list.pop()
tag_list.pop()
tag_list.pop()
tag_list.pop()
write_message(" -Update the database with metadata: DONE", verbose=2)
log_record_uploading(oai_rec_id, task_get_task_param('task_id', 0), rec_id, 'P', pretend=pretend)
def append_new_tag_to_old_record(record, rec_old):
"""Append new tags to a old record"""
def _append_tag(tag):
if tag in CFG_BIBUPLOAD_CONTROLFIELD_TAGS:
if tag == '001':
pass
else:
# if it is a controlfield, just access the value
for single_tuple in record[tag]:
controlfield_value = single_tuple[3]
# add the field to the old record
newfield_number = record_add_field(rec_old, tag,
controlfield_value=controlfield_value)
if newfield_number is None:
write_message(" Error when adding the field"+tag, verbose=1, stream=sys.stderr)
else:
# For each tag there is a list of tuples representing datafields
for single_tuple in record[tag]:
# We retrieve the information of the tag
subfield_list = single_tuple[0]
ind1 = single_tuple[1]
ind2 = single_tuple[2]
if '%s%s%s' % (tag, ind1 == ' ' and '_' or ind1, ind2 == ' ' and '_' or ind2) in (CFG_BIBUPLOAD_EXTERNAL_OAIID_TAG[:5], CFG_BIBUPLOAD_EXTERNAL_SYSNO_TAG[:5]):
## We don't want to append the external identifier
## if it is already existing.
if record_find_field(rec_old, tag, single_tuple)[0] is not None:
write_message(" Not adding tag: %s ind1=%s ind2=%s subfields=%s: it's already there" % (tag, ind1, ind2, subfield_list), verbose=9)
continue
# We add the datafield to the old record
write_message(" Adding tag: %s ind1=%s ind2=%s subfields=%s" % (tag, ind1, ind2, subfield_list), verbose=9)
newfield_number = record_add_field(rec_old, tag, ind1,
ind2, subfields=subfield_list)
if newfield_number is None:
write_message(" Error when adding the field"+tag, verbose=1, stream=sys.stderr)
# Go through each tag in the appended record
for tag in record:
_append_tag(tag)
return rec_old
def copy_strong_tags_from_old_record(record, rec_old):
"""
Look for strong tags in RECORD and REC_OLD. If no strong tags are
found in RECORD, then copy them over from REC_OLD. This function
modifies RECORD structure on the spot.
"""
for strong_tag in CFG_BIBUPLOAD_STRONG_TAGS:
if not record_get_field_instances(record, strong_tag, strong_tag[3:4] or '%', strong_tag[4:5] or '%'):
strong_tag_old_field_instances = record_get_field_instances(rec_old, strong_tag)
if strong_tag_old_field_instances:
for strong_tag_old_field_instance in strong_tag_old_field_instances:
sf_vals, fi_ind1, fi_ind2, controlfield, dummy = strong_tag_old_field_instance
record_add_field(record, strong_tag, fi_ind1, fi_ind2, controlfield, sf_vals)
return
### Delete functions
def delete_tags(record, rec_old):
"""
Returns a record structure with all the fields in rec_old minus the
fields in record.
@param record: The record containing tags to delete.
@type record: record structure
@param rec_old: The original record.
@type rec_old: record structure
@return: The modified record.
@rtype: record structure
"""
returned_record = copy.deepcopy(rec_old)
for tag, fields in record.iteritems():
if tag in ('001', ):
continue
for field in fields:
local_position = record_find_field(returned_record, tag, field)[1]
if local_position is not None:
record_delete_field(returned_record, tag, field_position_local=local_position)
return returned_record
def delete_tags_to_correct(record, rec_old):
"""
Delete tags from REC_OLD which are also existing in RECORD. When
deleting, pay attention not only to tags, but also to indicators,
so that fields with the same tags but different indicators are not
deleted.
"""
## Some fields are controlled via provenance information.
## We should re-add saved fields at the end.
fields_to_readd = {}
for tag in CFG_BIBUPLOAD_CONTROLLED_PROVENANCE_TAGS:
if tag[:3] in record:
tmp_field_instances = record_get_field_instances(record, tag[:3], tag[3], tag[4]) ## Let's discover the provenance that will be updated
provenances_to_update = []
for instance in tmp_field_instances:
for code, value in instance[0]:
if code == tag[5]:
if value not in provenances_to_update:
provenances_to_update.append(value)
break
else:
## The provenance is not specified.
## let's add the special empty provenance.
if '' not in provenances_to_update:
provenances_to_update.append('')
potential_fields_to_readd = record_get_field_instances(rec_old, tag[:3], tag[3], tag[4]) ## Let's take all the field corresponding to tag
## Let's save apart all the fields that should be updated, but
## since they have a different provenance not mentioned in record
## they should be preserved.
fields = []
for sf_vals, ind1, ind2, dummy_cf, dummy_line in potential_fields_to_readd:
for code, value in sf_vals:
if code == tag[5]:
if value not in provenances_to_update:
fields.append(sf_vals)
break
else:
if '' not in provenances_to_update:
## Empty provenance, let's protect in any case
fields.append(sf_vals)
fields_to_readd[tag] = fields
# browse through all the tags from the MARCXML file:
for tag in record:
# check if the tag exists in the old record too:
if tag in rec_old and tag != '001':
# the tag does exist, so delete all record's tag+ind1+ind2 combinations from rec_old
for dummy_sf_vals, ind1, ind2, dummy_cf, dummyfield_number in record[tag]:
write_message(" Delete tag: " + tag + " ind1=" + ind1 + " ind2=" + ind2, verbose=9)
record_delete_field(rec_old, tag, ind1, ind2)
## Ok, we readd necessary fields!
for tag, fields in fields_to_readd.iteritems():
for sf_vals in fields:
write_message(" Adding tag: " + tag[:3] + " ind1=" + tag[3] + " ind2=" + tag[4] + " code=" + str(sf_vals), verbose=9)
record_add_field(rec_old, tag[:3], tag[3], tag[4], subfields=sf_vals)
def delete_bibrec_bibxxx(record, id_bibrec, affected_tags={}, pretend=False):
"""Delete the database record from the table bibxxx given in parameters"""
# we clear all the rows from bibrec_bibxxx from the old record
# clearing only those tags that have been modified.
write_message(lambda: "delete_bibrec_bibxxx(record=%s, id_bibrec=%s, affected_tags=%s)" % (record, id_bibrec, affected_tags), verbose=9)
for tag in affected_tags:
# sanity check with record keys just to make sure its fine.
if tag not in CFG_BIBUPLOAD_SPECIAL_TAGS:
write_message("%s found in record"%tag, verbose=2)
# for each name construct the bibrec_bibxxx table name
table_name = 'bib'+tag[0:2]+'x'
bibrec_table = 'bibrec_'+table_name
# delete all the records with proper id_bibrec. Indicators matter for individual affected tags
tmp_ind_1 = ''
tmp_ind_2 = ''
# construct exact tag value using indicators
for ind_pair in affected_tags[tag]:
if ind_pair[0] == ' ':
tmp_ind_1 = '_'
else:
tmp_ind_1 = ind_pair[0]
if ind_pair[1] == ' ':
tmp_ind_2 = '_'
else:
tmp_ind_2 = ind_pair[1]
# need to escape incase of underscore so that mysql treats it as a char
tag_val = tag+"\\"+tmp_ind_1+"\\"+tmp_ind_2 + '%'
query = """DELETE br.* FROM `%s` br,`%s` b where br.id_bibrec=%%s and br.id_bibxxx=b.id and b.tag like %%s""" % (bibrec_table, table_name)
params = (id_bibrec, tag_val)
write_message(query % params, verbose=9)
if not pretend:
run_sql(query, params)
else:
write_message("%s not found"%tag, verbose=2)
def main():
"""Main that construct all the bibtask."""
task_init(authorization_action='runbibupload',
authorization_msg="BibUpload Task Submission",
description="""Receive MARC XML file and update appropriate database
tables according to options.
Examples:
$ bibupload -i input.xml
""",
help_specific_usage=""" -a, --append\t\tnew fields are appended to the existing record
-c, --correct\t\tfields are replaced by the new ones in the existing record, except
\t\t\twhen overridden by CFG_BIBUPLOAD_CONTROLLED_PROVENANCE_TAGS
-i, --insert\t\tinsert the new record in the database
-r, --replace\t\tthe existing record is entirely replaced by the new one,
\t\t\texcept for fields in CFG_BIBUPLOAD_STRONG_TAGS
-d, --delete\t\tspecified fields are deleted in existing record
-n, --notimechange\tdo not change record last modification date when updating
-o, --holdingpen\tInsert record into holding pen instead of the normal database
--pretend\t\tdo not really insert/append/correct/replace the input file
--force\t\twhen --replace, use provided 001 tag values, even if the matching
\t\t\trecord does not exist (thus allocating it on-the-fly)
--callback-url\tSend via a POST request a JSON-serialized answer (see admin guide), in
\t\t\torder to provide a feedback to an external service about the outcome of the operation.
--nonce\t\twhen used together with --callback add the nonce value in the JSON message.
--special-treatment=MODE\tif "oracle" is specified, when used together with --callback_url,
\t\t\tPOST an application/x-www-form-urlencoded request where the JSON message is encoded
\t\t\tinside a form field called "results".
""",
version=__revision__,
specific_params=("ircazdnoS:",
[
"insert",
"replace",
"correct",
"append",
"reference",
"delete",
"notimechange",
"holdingpen",
"pretend",
"force",
"callback-url=",
"nonce=",
"special-treatment=",
"stage=",
]),
task_submit_elaborate_specific_parameter_fnc=task_submit_elaborate_specific_parameter,
task_run_fnc=task_run_core)
def task_submit_elaborate_specific_parameter(key, value, opts, args): # pylint: disable=W0613
""" Given the string key it checks it's meaning, eventually using the
value. Usually it fills some key in the options dict.
It must return True if it has elaborated the key, False, if it doesn't
know that key.
eg:
if key in ['-n', '--number']:
task_get_option(\1) = value
return True
return False
"""
# No time change option
if key in ("-n", "--notimechange"):
task_set_option('notimechange', 1)
# Insert mode option
elif key in ("-i", "--insert"):
if task_get_option('mode') == 'replace':
# if also replace found, then set to replace_or_insert
task_set_option('mode', 'replace_or_insert')
else:
task_set_option('mode', 'insert')
fix_argv_paths([args[0]])
task_set_option('file_path', os.path.abspath(args[0]))
# Replace mode option
elif key in ("-r", "--replace"):
if task_get_option('mode') == 'insert':
# if also insert found, then set to replace_or_insert
task_set_option('mode', 'replace_or_insert')
else:
task_set_option('mode', 'replace')
fix_argv_paths([args[0]])
task_set_option('file_path', os.path.abspath(args[0]))
# Holding pen mode option
elif key in ("-o", "--holdingpen"):
write_message("Holding pen mode", verbose=3)
task_set_option('mode', 'holdingpen')
fix_argv_paths([args[0]])
task_set_option('file_path', os.path.abspath(args[0]))
# Correct mode option
elif key in ("-c", "--correct"):
task_set_option('mode', 'correct')
fix_argv_paths([args[0]])
task_set_option('file_path', os.path.abspath(args[0]))
# Append mode option
elif key in ("-a", "--append"):
task_set_option('mode', 'append')
fix_argv_paths([args[0]])
task_set_option('file_path', os.path.abspath(args[0]))
# Deprecated reference mode option (now correct)
elif key in ("-z", "--reference"):
task_set_option('mode', 'correct')
fix_argv_paths([args[0]])
task_set_option('file_path', os.path.abspath(args[0]))
elif key in ("-d", "--delete"):
task_set_option('mode', 'delete')
fix_argv_paths([args[0]])
task_set_option('file_path', os.path.abspath(args[0]))
elif key in ("--pretend",):
task_set_option('pretend', True)
fix_argv_paths([args[0]])
task_set_option('file_path', os.path.abspath(args[0]))
elif key in ("--force",):
task_set_option('force', True)
fix_argv_paths([args[0]])
task_set_option('file_path', os.path.abspath(args[0]))
elif key in ("--callback-url", ):
task_set_option('callback_url', value)
elif key in ("--nonce", ):
task_set_option('nonce', value)
elif key in ("--special-treatment", ):
if value.lower() in CFG_BIBUPLOAD_ALLOWED_SPECIAL_TREATMENTS:
if value.lower() == 'oracle':
task_set_option('oracle_friendly', True)
else:
print >> sys.stderr, """The specified value is not in the list of allowed special treatments codes: %s""" % CFG_BIBUPLOAD_ALLOWED_SPECIAL_TREATMENTS
return False
elif key in ("-S", "--stage"):
print >> sys.stderr, """WARNING: the --stage parameter is deprecated and ignored."""
else:
return False
return True
def task_submit_check_options():
""" Reimplement this method for having the possibility to check options
before submitting the task, in order for example to provide default
values. It must return False if there are errors in the options.
"""
if task_get_option('mode') is None:
write_message("Please specify at least one update/insert mode!")
return False
if task_get_option('file_path') is None:
write_message("Missing filename! -h for help.")
return False
return True
def writing_rights_p():
"""Return True in case bibupload has the proper rights to write in the
fulltext file folder."""
if _WRITING_RIGHTS is not None:
return _WRITING_RIGHTS
try:
if not os.path.exists(CFG_BIBDOCFILE_FILEDIR):
os.makedirs(CFG_BIBDOCFILE_FILEDIR)
fd, filename = tempfile.mkstemp(suffix='.txt', prefix='test', dir=CFG_BIBDOCFILE_FILEDIR)
test = os.fdopen(fd, 'w')
test.write('TEST')
test.close()
if open(filename).read() != 'TEST':
raise IOError("Can not successfully write and readback %s" % filename)
os.remove(filename)
except:
register_exception(alert_admin=True)
return False
return True
def post_results_to_callback_url(results, callback_url):
write_message("Sending feedback to %s" % callback_url)
if not CFG_JSON_AVAILABLE:
from warnings import warn
warn("--callback-url used but simplejson/json not available")
return
json_results = json.dumps(results)
write_message("Message to send: %s" % json_results, verbose=9)
## <scheme>://<netloc>/<path>?<query>#<fragment>
scheme, dummynetloc, dummypath, dummyquery, dummyfragment = urlparse.urlsplit(callback_url)
## See: http://stackoverflow.com/questions/111945/is-there-any-way-to-do-http-put-in-python
if scheme == 'http':
opener = urllib2.build_opener(urllib2.HTTPHandler)
elif scheme == 'https':
opener = urllib2.build_opener(urllib2.HTTPSHandler)
else:
raise ValueError("Scheme not handled %s for callback_url %s" % (scheme, callback_url))
if task_get_option('oracle_friendly'):
write_message("Oracle friendly mode requested", verbose=9)
request = urllib2.Request(callback_url, data=urllib.urlencode({'results': json_results}))
request.add_header('Content-Type', 'application/x-www-form-urlencoded')
else:
request = urllib2.Request(callback_url, data=json_results)
request.add_header('Content-Type', 'application/json')
request.add_header('User-Agent', make_user_agent_string('BibUpload'))
write_message("Headers about to be sent: %s" % request.headers, verbose=9)
write_message("Data about to be sent: %s" % request.data, verbose=9)
res = opener.open(request)
msg = res.read()
write_message("Result of posting the feedback: %s %s" % (res.code, res.msg), verbose=9)
write_message("Returned message is: %s" % msg, verbose=9)
return res
def bibupload_records(records, opt_mode=None, opt_notimechange=0,
pretend=False, callback_url=None, results_for_callback=None):
"""perform the task of uploading a set of records
returns list of (error_code, recid) tuples for separate records
"""
#Dictionaries maintaining temporary identifiers
# Structure: identifier -> number
tmp_ids = {}
tmp_vers = {}
results = []
# The first phase -> assigning meaning to temporary identifiers
if opt_mode == 'reference':
## NOTE: reference mode has been deprecated in favour of 'correct'
opt_mode = 'correct'
record = None
for record in records:
record_id = record_extract_oai_id(record)
task_sleep_now_if_required(can_stop_too=True)
if opt_mode == "holdingpen":
#inserting into the holding pen
write_message("Inserting into holding pen", verbose=3)
insert_record_into_holding_pen(record, record_id, pretend=pretend)
else:
write_message("Inserting into main database", verbose=3)
error = bibupload(
record,
opt_mode = opt_mode,
opt_notimechange = opt_notimechange,
oai_rec_id = record_id,
pretend = pretend,
tmp_ids = tmp_ids,
tmp_vers = tmp_vers)
results.append(error)
if error[0] == 1:
if record:
write_message(lambda: record_xml_output(record),
stream=sys.stderr)
else:
write_message("Record could not have been parsed",
stream=sys.stderr)
stat['nb_errors'] += 1
if callback_url:
results_for_callback['results'].append({'recid': error[1], 'success': False, 'error_message': error[2]})
elif error[0] == 2:
if record:
write_message(lambda: record_xml_output(record),
stream=sys.stderr)
else:
write_message("Record could not have been parsed",
stream=sys.stderr)
stat['nb_holdingpen'] += 1
if callback_url:
results_for_callback['results'].append({'recid': error[1], 'success': False, 'error_message': error[2]})
elif error[0] == 0:
if callback_url:
from invenio.search_engine import print_record
results_for_callback['results'].append({'recid': error[1], 'success': True, "marcxml": print_record(error[1], 'xm'), 'url': "%s/%s/%s" % (CFG_SITE_URL, CFG_SITE_RECORD, error[1])})
else:
if callback_url:
results_for_callback['results'].append({'recid': error[1], 'success': False, 'error_message': error[2]})
# stat us a global variable
task_update_progress("Done %d out of %d." % \
(stat['nb_records_inserted'] + \
stat['nb_records_updated'],
stat['nb_records_to_upload']))
# Second phase -> Now we can process all entries where temporary identifiers might appear (BDR, BDM)
write_message("Identifiers table after processing: %s versions: %s" % (str(tmp_ids), str(tmp_vers)))
write_message("Uploading BDR and BDM fields")
if opt_mode != "holdingpen":
for record in records:
record_id = retrieve_rec_id(record, opt_mode, pretend=pretend, post_phase = True)
bibupload_post_phase(record,
rec_id = record_id,
mode = opt_mode,
pretend = pretend,
tmp_ids = tmp_ids,
tmp_vers = tmp_vers)
return results
def task_run_core():
""" Reimplement to add the body of the task."""
write_message("Input file '%s', input mode '%s'." %
(task_get_option('file_path'), task_get_option('mode')))
write_message("STAGE 0:", verbose=2)
if task_get_option('file_path') is not None:
write_message("start preocessing", verbose=3)
task_update_progress("Reading XML input")
recs = xml_marc_to_records(open_marc_file(task_get_option('file_path')))
stat['nb_records_to_upload'] = len(recs)
write_message(" -Open XML marc: DONE", verbose=2)
task_sleep_now_if_required(can_stop_too=True)
write_message("Entering records loop", verbose=3)
callback_url = task_get_option('callback_url')
results_for_callback = {'results': []}
if recs is not None:
# We proceed each record by record
bibupload_records(records=recs, opt_mode=task_get_option('mode'),
opt_notimechange=task_get_option('notimechange'),
pretend=task_get_option('pretend'),
callback_url=callback_url,
results_for_callback=results_for_callback)
else:
write_message(" Error bibupload failed: No record found",
verbose=1, stream=sys.stderr)
callback_url = task_get_option("callback_url")
if callback_url:
nonce = task_get_option("nonce")
if nonce:
results_for_callback["nonce"] = nonce
post_results_to_callback_url(results_for_callback, callback_url)
if task_get_task_param('verbose') >= 1:
# Print out the statistics
print_out_bibupload_statistics()
# Check if they were errors
return not stat['nb_errors'] >= 1
def log_record_uploading(oai_rec_id, task_id, bibrec_id, insertion_db, pretend=False):
if oai_rec_id != "" and oai_rec_id != None:
query = """UPDATE oaiHARVESTLOG SET date_inserted=NOW(), inserted_to_db=%s, id_bibrec=%s WHERE oai_id = %s AND bibupload_task_id = %s ORDER BY date_harvested LIMIT 1"""
if not pretend:
run_sql(query, (str(insertion_db), str(bibrec_id), str(oai_rec_id), str(task_id), ))
if __name__ == "__main__":
main()
diff --git a/invenio/legacy/errorlib/webinterface.py b/invenio/legacy/errorlib/webinterface.py
index 8ed858dc3..1f4bd90e5 100644
--- a/invenio/legacy/errorlib/webinterface.py
+++ b/invenio/legacy/errorlib/webinterface.py
@@ -1,107 +1,107 @@
# -*- coding: utf-8 -*-
##
## This file is part of Invenio.
## Copyright (C) 2006, 2007, 2008, 2010, 2011 CERN.
##
## Invenio is free software; you can redistribute it and/or
## modify it under the terms of the GNU General Public License as
## published by the Free Software Foundation; either version 2 of the
## License, or (at your option) any later version.
##
## Invenio is distributed in the hope that it will be useful, but
## WITHOUT ANY WARRANTY; without even the implied warranty of
## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
## General Public License for more details.
##
## You should have received a copy of the GNU General Public License
## along with Invenio; if not, write to the Free Software Foundation, Inc.,
## 59 Temple Place, Suite 330, Boston, MA 02111-1307, USA.
# pylint: disable=C0301
"""ErrorLib web interface."""
__revision__ = "$Id$"
__lastupdated__ = "$Date$"
from invenio.config import CFG_SITE_URL
from invenio.webpage import page
-from invenio.errorlib import send_error_report_to_admin
+from invenio.ext.logging import send_error_report_to_admin
from invenio.webinterface_handler import wash_urlargd, WebInterfaceDirectory
from invenio.utils.url import redirect_to_url
from invenio.base.i18n import gettext_set_language
class WebInterfaceErrorPages(WebInterfaceDirectory):
"""Defines the set of /error pages."""
_exports = ['', 'send']
def index(self, req, form):
"""Index page."""
redirect_to_url(req, '%s/error/send' % CFG_SITE_URL)
def send(self, req, form):
"""
Confirmation page of error report sent the admin
parameters are the same as used for the error box. See webstyle_templates.tmpl_error_box
"""
argd = wash_urlargd(form, {'header': (str, "NA"),
'url': (str, "NA"),
'time': (str, "NA"),
'browser': (str, "NA"),
'client': (str, "NA"),
'error': (str, "NA"),
'sys_error': (str, "NA"),
'traceback': (str, "NA"),
'referer': (str, "NA"),
})
_ = gettext_set_language(argd['ln'])
if argd['client'] == "NA":
return page(title=_("Sorry"),
body=_("Cannot send error request, %s parameter missing.") % 'client',
lastupdated=__lastupdated__,
req=req)
elif argd['url'] == "NA":
return page(title=_("Sorry"),
body=_("Cannot send error request, %s parameter missing.") % 'url',
lastupdated=__lastupdated__,
req=req)
elif argd['time'] == "NA":
return page(title=_("Sorry"),
body=_("Cannot send error request, %s parameter missing.") % 'time',
lastupdated=__lastupdated__,
req=req)
elif argd['error'] == "NA":
return page(title=_("Sorry"),
body=_("Cannot send error request, %s parameter missing.") % 'error',
lastupdated=__lastupdated__,
req=req)
else:
send_error_report_to_admin(argd['header'],
argd['url'],
argd['time'],
argd['browser'],
argd['client'],
argd['error'],
argd['sys_error'],
argd['traceback'])
out = """
<p><span class="exampleleader">%(title)s</span>
<p>%(message)s
<p>%(back)s
""" % \
{'title' : _("The error report has been sent."),
'message' : _("Many thanks for helping us to improve the service."),
'back' : argd['referer']!="NA" and "<a href=\"%s\">back</a>" % (argd['referer'],) or \
_("Use the back button of your browser to return to the previous page.")
}
return page(title=_("Thank you!"),
body=out,
lastupdated=__lastupdated__,
req=req)
diff --git a/invenio/legacy/external_authentication/cern.py b/invenio/legacy/external_authentication/cern.py
index 631aaad9d..c7e2e5193 100644
--- a/invenio/legacy/external_authentication/cern.py
+++ b/invenio/legacy/external_authentication/cern.py
@@ -1,196 +1,196 @@
# -*- coding: utf-8 -*-
##
## This file is part of Invenio.
## Copyright (C) 2007, 2008, 2009, 2010, 2011 CERN.
##
## Invenio is free software; you can redistribute it and/or
## modify it under the terms of the GNU General Public License as
## published by the Free Software Foundation; either version 2 of the
## License, or (at your option) any later version.
##
## Invenio is distributed in the hope that it will be useful, but
## WITHOUT ANY WARRANTY; without even the implied warranty of
## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
## General Public License for more details.
##
## You should have received a copy of the GNU General Public License
## along with Invenio; if not, write to the Free Software Foundation, Inc.,
## 59 Temple Place, Suite 330, Boston, MA 02111-1307, USA.
"""External user authentication for CERN NICE/CRA Invenio."""
__revision__ = \
"$Id$"
import httplib
import socket
import re
-from invenio.errorlib import register_exception
+from invenio.ext.logging import register_exception
from invenio.external_authentication import ExternalAuth, \
InvenioWebAccessExternalAuthError
from invenio.external_authentication_cern_wrapper import AuthCernWrapper
# Tunable list of settings to be hidden
CFG_EXTERNAL_AUTH_CERN_HIDDEN_SETTINGS = ['auth', 'respccid', 'ccid']
# Tunable list of groups to be hidden
CFG_EXTERNAL_AUTH_HIDDEN_GROUPS = (
'All Exchange People',
'CERN Users',
'cern-computing-postmasters@cern.ch',
'cern-nice2000-postmasters@cern.ch',
'CMF FrontEnd Users',
'CMF_NSC_259_NSU',
'Domain Users',
'GP Apply Favorites Redirection',
'GP Apply NoAdmin',
'info-terminalservices@cern.ch',
'info-terminalservices-members@cern.ch',
'IT Web IT',
'NICE Deny Enforce Password-protected Screensaver',
'NICE Enforce Password-protected Screensaver',
'NICE LightWeight Authentication WS Users',
'NICE MyDocuments Redirection (New)',
'NICE Profile Redirection',
'NICE Terminal Services Users',
'NICE Users',
'NICE VPN Users',
)
CFG_EXTERNAL_AUTH_HIDDEN_GROUPS_RE = (
re.compile(r'Users by Letter [A-Z]'),
re.compile(r'building-[\d]+'),
re.compile(r'Users by Home CERNHOME[A-Z]'),
)
class ExternalAuthCern(ExternalAuth):
"""
External authentication example for a custom HTTPS-based
authentication service (called "CERN NICE").
"""
def __init__(self):
"""Initialize stuff here"""
ExternalAuth.__init__(self)
try:
self.connection = AuthCernWrapper()
except (httplib.CannotSendRequest, socket.error, AttributeError,
IOError, TypeError), msg: # Let the user note that
# no connection is available
register_exception(alert_admin=True)
raise InvenioWebAccessExternalAuthError, msg
def _try_twice(self, funct, params):
"""Try twice to execute funct on self.connection passing it params.
If for various reason the connection doesn't work it's restarted
"""
try:
ret = funct(self.connection, **params)
except (httplib.CannotSendRequest, socket.error, AttributeError,
IOError, TypeError):
try:
self.connection = AuthCernWrapper()
ret = funct(self.connection, **params)
except (httplib.CannotSendRequest, socket.error, AttributeError,
IOError, TypeError):
register_exception(alert_admin=True)
self.connection = None
raise InvenioWebAccessExternalAuthError
return ret
def auth_user(self, username, password, req=None):
"""
Check USERNAME and PASSWORD against CERN NICE/CRA database.
Return (None, None) if authentication failed, or the
(email address, nickname) of the
person if the authentication was successful. In order to do
this you may perhaps have to keep a translation table between
usernames and email addresses.
If it is the first time the user logs in Invenio the nickname is
stored alongside the email. If this nickname is unfortunatly already
in use it is discarded. Otherwise it is ignored.
Raise InvenioWebAccessExternalAuthError in case of external troubles.
"""
infos = self._try_twice(funct=AuthCernWrapper.get_user_info, \
params={"user_name":username, "password":password})
if "email" in infos:
return infos["email"], infos["login"]
else:
return None, None
def user_exists(self, email, req=None):
"""Checks against CERN NICE/CRA for existance of email.
@return: True if the user exists, False otherwise
"""
users = self._try_twice(funct=AuthCernWrapper.list_users, \
params={"display_name":email})
return email.upper() in [user['email'].upper() for user in users]
def fetch_user_groups_membership(self, email, password=None, req=None):
"""Fetch user groups membership from the CERN NICE/CRA account.
@return: a dictionary of groupname, group description
"""
groups = self._try_twice(funct=AuthCernWrapper.get_groups_for_user, \
params={"user_name":email})
# Filtering out uncomfortable groups
groups = [group for group in groups if group not in
CFG_EXTERNAL_AUTH_HIDDEN_GROUPS]
for regexp in CFG_EXTERNAL_AUTH_HIDDEN_GROUPS_RE:
for group in groups:
if regexp.match(group):
groups.remove(group)
# Produce list of double value: group/mailing list(with stripped
# @cern.ch) name, and group/description built from the name.
return dict(map(lambda x: (x.find('@') > -1 and x[:x.find('@')] or x,
'@' in x and x + ' (CERN Mailing list)'
or x + ' (CERN Group)'), groups))
def fetch_user_nickname(self, username, password, req=None):
"""Given a username and a password, returns the right nickname
belonging to that user (username could be an email).
"""
infos = self._try_twice(funct=AuthCernWrapper.get_user_info,
params={"user_name":username, "password":password})
if "login" in infos:
return infos["login"]
else:
return None
def fetch_user_preferences(self, username, password=None, req=None):
"""Fetch user preferences/settings from the CERN Nice account.
the external key will be '1' if the account is external to NICE/CRA,
otherwise 0
@return: a dictionary. Note: auth and respccid are hidden
"""
prefs = self._try_twice(funct=AuthCernWrapper.get_user_info, \
params={"user_name":username, "password":password})
ret = {}
try:
if int(prefs['auth']) == 3 \
and (int(prefs['respccid']) > 0 \
or not prefs['email'].endswith('@cern.ch')):
ret['external'] = '1'
else:
ret['external'] = '0'
except KeyError:
ret['external'] = '1'
for key, value in prefs.items():
if key in CFG_EXTERNAL_AUTH_CERN_HIDDEN_SETTINGS:
ret['HIDDEN_' + key] = value
else:
ret[key] = value
## Hack to be forward-compatible with CERN SSO implementation
if ret.has_key('company'):
ret['homeinstitute'] = ret['company']
del ret['company']
if ret.has_key('name'):
ret['fullname'] = ret['name']
del ret['name']
return ret
diff --git a/invenio/legacy/external_authentication/cern_wrapper.py b/invenio/legacy/external_authentication/cern_wrapper.py
index eb6c0fb57..20afab545 100644
--- a/invenio/legacy/external_authentication/cern_wrapper.py
+++ b/invenio/legacy/external_authentication/cern_wrapper.py
@@ -1,182 +1,182 @@
# -*- coding: utf-8 -*-
##
## This file is part of Invenio.
## Copyright (C) 2007, 2008, 2009, 2010, 2011 CERN.
##
## Invenio is free software; you can redistribute it and/or
## modify it under the terms of the GNU General Public License as
## published by the Free Software Foundation; either version 2 of the
## License, or (at your option) any later version.
##
## Invenio is distributed in the hope that it will be useful, but
## WITHOUT ANY WARRANTY; without even the implied warranty of
## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
## General Public License for more details.
##
## You should have received a copy of the GNU General Public License
## along with Invenio; if not, write to the Free Software Foundation, Inc.,
## 59 Temple Place, Suite 330, Boston, MA 02111-1307, USA.
"""Nice API Python wrapper."""
__revision__ = \
"$Id$"
import httplib
import urllib
import re
import socket
from invenio.config import CFG_ETCDIR
-from invenio.errorlib import register_exception
+from invenio.ext.logging import register_exception
_cern_nice_soap_file = open(CFG_ETCDIR + "/webaccess/cern_nice_soap_credentials.txt", "r")
_cern_nice_soap_auth = _cern_nice_soap_file.read().strip()
_cern_nice_soap_file.close()
_re_ccd_is_nice = re.compile('<string.*>(?P<CCID>.*)</string>')
_re_get_groups_for_user = re.compile("<string>(?P<group>.*)</string>")
_re_user_is_member_of_list = re.compile('<boolean.*>(?P<membership>.*)</boolean>')
_re_user_is_member_of_group = re.compile("<boolean.*>(?P<membership>.*)</boolean>")
_re_get_user_info = re.compile("<(?P<field>.*)>(?P<value>.*)</.*>")
_re_search_groups = re.compile("<string>(?P<group>.*)</string>")
_re_get_user_info_ex = re.compile("<(?P<field>.*)>(?P<value>.*)</.*>")
_re_list_users = re.compile("<(?P<field>.*)>(?P<value>.*)</.*>")
class AuthCernWrapper:
"""Wrapper class for CERN NICE/CRA webservice"""
def __init__(self):
"""Create a connection to CERN NICE/CRA webservice.
Authentication credential should be in the file
CFG_ETCDIR/webaccess/cern_nice_soap_credentials.txt which must contain
username:password in base64 encoding.
"""
## WORKAROUND for bug in Python up to 2.4.3
## Having a timeout is buggy with SSL
self._headers = {"Content-type": "application/x-www-form-urlencoded",
"Accept": "text/plain",
"Authorization": "Basic " + _cern_nice_soap_auth}
self._conn = None
def __del__(self):
"""Close the CERN Nice webservice connection."""
if self._conn:
self._conn.close()
def _request(self, name, params):
"""Call the name request with a dictionary parms.
@return: the XML response.
"""
params = urllib.urlencode(params)
socket_timeout = socket.getdefaulttimeout()
socket.setdefaulttimeout(None)
try:
self._conn = httplib.HTTPSConnection("winservices-soap.web.cern.ch")
self._conn.connect()
self._conn.request("POST",
"/winservices-soap/generic/Authentication.asmx/%s" % name,
params, self._headers)
response = self._conn.getresponse().read()
self._conn.close()
except:
register_exception(alert_admin=True)
raise
socket.setdefaulttimeout(socket_timeout)
return response
def ccid_is_nice(self, ccid):
"""Verify this CCID belongs to a Nice account. Returns login or -1
if not found.
"""
data = self._request("CCIDisNice", {"CCID": ccid})
match = _re_ccd_is_nice.search(data)
if match:
if match == -1:
return False
else:
return match.group("CCID")
def get_groups_for_user(self, user_name):
"""Returns a string array containing Groups the specified User is
member of. UserName is NICE Login or Email. Listname can be 'listname'
or 'listname@cern.ch'."""
data = self._request("GetGroupsForUser", {"UserName": user_name})
groups = []
for match in _re_get_groups_for_user.finditer(data):
groups.append(match.group("group"))
return groups
def user_is_member_of_list(self, user_name, list_name):
"""Check if one user is member of specified simba list. UserName is
NICE Login or Email. Listname can be 'listname' or 'listname@cern.ch'.
"""
data = self._request("UserIsMemberOfList",
{"UserName": user_name, "ListName": list_name})
match = _re_user_is_member_of_list.search(data)
if match:
match = match.group("membership")
return match == "true"
return None
def user_is_member_of_group(self, user_name, group_name):
"""Check if one user is member of specified NICE Group. UserName is
NICE Login or Email."""
data = self._request("UserIsMemberOfGroup",
{"UserName": user_name, "GroupName": group_name})
match = _re_user_is_member_of_group.search(data)
if match:
match = match.group("membership")
return match == "true"
return None
def get_user_info(self, user_name, password):
"""Authenticates user from login and password. Login can be email
address or NICE login."""
data = self._request("GetUserInfo",
{"UserName": user_name, "Password": password})
infos = {}
for row in data.split('\r\n'):
match = _re_get_user_info.search(row)
if match:
infos[match.group("field")] = match.group("value")
return infos
def search_groups(self, pattern):
"""Search for a group, based on given pattern. 3 characters minimum are
required. Search is done with: *pattern*."""
data = self._request("SearchGroups", {"pattern": pattern})
groups = []
for match in _re_search_groups.finditer(data):
groups.append(match.group("group"))
return groups
def get_user_info_ex(self, user_name, password, group_name):
"""Authenticates user from login and password. Login can be email
address or NICE login. Group membership is verified at the same time,
multiple groups can be specified, separated with ','."""
data = self._request("GetUserInfoEx", {"UserName": user_name,
"Password": password,
"GroupName": group_name})
infos = {}
for row in data.split('\r\n'):
match = _re_get_user_info_ex.search(row)
if match:
infos[match.group("field")] = match.group("value")
return infos
def list_users(self, display_name):
"""Search users with given display name. Display name is firstname +
lastname, or email, and can contain *."""
data = self._request("ListUsers", {"DisplayName": display_name})
users = []
for row in data.split('\r\n'):
if "<userInfo>" in row:
current_user = {}
elif "</userInfo>" in row:
users.append(current_user)
else:
match = _re_list_users.search(row)
if match:
current_user[match.group("field")] = match.group("value")
return users
diff --git a/invenio/legacy/miscutil/doc/hacking/miscutil-errorlib.webdoc b/invenio/legacy/miscutil/doc/hacking/miscutil-errorlib.webdoc
index 90a87f067..f6df1ef21 100644
--- a/invenio/legacy/miscutil/doc/hacking/miscutil-errorlib.webdoc
+++ b/invenio/legacy/miscutil/doc/hacking/miscutil-errorlib.webdoc
@@ -1,318 +1,318 @@
## -*- mode: html; coding: utf-8; -*-
## This file is part of Invenio.
## Copyright (C) 2007, 2008, 2010, 2011 CERN.
##
## Invenio is free software; you can redistribute it and/or
## modify it under the terms of the GNU General Public License as
## published by the Free Software Foundation; either version 2 of the
## License, or (at your option) any later version.
##
## Invenio is distributed in the hope that it will be useful, but
## WITHOUT ANY WARRANTY; without even the implied warranty of
## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
## General Public License for more details.
##
## You should have received a copy of the GNU General Public License
## along with Invenio; if not, write to the Free Software Foundation, Inc.,
## 59 Temple Place, Suite 330, Boston, MA 02111-1307, USA.
<!-- WebDoc-Page-Title: Error Library -->
<!-- WebDoc-Page-Navtrail: <a class="navtrail" href="<CFG_SITE_URL>/help/hacking">Hacking Invenio</a> &gt; <a class="navtrail" href="miscutil-internals">MiscUtil Internals</a> -->
<!-- WebDoc-Page-Revision: $Id$ -->
<p>
These are the functions and methodologies for error handling in Invenio.
</p>
<h2>Contents</h2>
<ol>
<li><a href="#overview">Overview</a></li>
<li><a href="#creating">Creating errors</a></li>
<li><a href="#using">Using error library</a></li>
<li><a href="#troubleshooting">Troubleshooting</a></li>
</ol>
<h2>1. <a name="overview">Overview</a></h2>
<p>This API handles two concepts: Errors and Warnings.
<p>An error is an unexpected behavior that leads to the stopping of current process.
Discussing of web pages, errors should be displayed by instead of the
requested page. Errors are logged into <code>invenio/var/log/invenio.err</code>.
Errors can be logged with client information and a tracestack.</p>
<p>A warning is an unexpected behavior that can be ignored. Warnings are logged into
<code>invenio/var/log/invenio.log</code> with just the warning message.</p>
<p>Errors and warnings should be internationalized (see <a href="#i18n">below</a>).</p>
<h2>2. <a name="creating">Creating errors</a></h2>
<h3>2.1 Configuration file</h3>
<p>Every module should create a file containing error definitions, warning
definitions, variables avoiding &quot;magic&quot; number or strings, etc.</p>
<p>This file has to be named against a convention:</p>
<pre>
&lt;module-name&gt;&#95;config.py
</pre>
<p>e.g. <code>webmessage&#95;config.py</code> for the WebMessage module.</p>
<h3>2.2 Dictionaries of errors</h3>
<p>Errors and warnings are eventually stored into dictionaries. The dictionaries
are to be named against the following convention:</p>
<pre>
CFG&#95;&lt;MODULENAME&gt;&#95;ERROR&#95;MESSAGES and
CFG&#95;&lt;MODULENAME&gt;&#95;WARNING&#95;MESSAGES
</pre>
<p>These two dictionaries (one can choose to implement only one if he doesn&apos;t
need warnings, for example) contain an error-name -&gt; displayable message
association.</p>
<p>Errors are to be named against the following convention:</p>
<pre>
ERR&#95;&lt;MODULE-NAME&gt;&#95;ERROR&#95;NAME
</pre>
<p>Please note the use of uppercase.</p>
<p>Warnings can also be named as errors if convenient, and so have to
follow one of these rules:</p>
<pre>
WRN&#95;&lt;MODULE-NAME&gt;&#95;WARNING&#95;NAME or
ERR&#95;&lt;MODULE-NAME&gt;&#95;WARNING&#95;NAME
</pre>
<p>The associated message can obviously contain substituted variables like <code>%s</code>, <code>%d</code>...
<h3><a name="i18n">Internationalization</a></h3>
<p>Errors should also be internationalized. As the config file cannot receive
parameters, this is done by the error handling library. The convenient way that has
been chosen is to nest the classical <code>&#95;()</code> function inside the string.</p>
<p>An internationalized error message should look like this:</p>
<pre>
'&#95;("Internationalized error (%s) message")'
</pre>
<p>A complete example of correct dictionary is given below:</p>
<pre>
CFG&#95;WEBCOMMENT&#95;ERROR&#95;MESSAGES =
{ 'ERR&#95;WEBCOMMENT&#95;RECID&#95;INVALID' : '&#95;("%i is an invalid record ID")',
'ERR&#95;WEBCOMMENT&#95;RECID&#95;NAN' : '&#95;("Record ID %i is not a number")',
'ERR&#95;WEBCOMMENT&#95;UID&#95;INVALID' : '&#95;("%i is an invalid user ID")'
}
</pre>
<h2>3. <a name="using">Using error library</a></h2>
<h3>3.1 From a web interface</h3>
<p>When displaying a page, the <code>modules/webstyle/lib/webpage.py</code> python module should
be used. This module provides a <code>page()</code> function, convenient for webpage output,
which can handle errors (display and log).<br />
A call to this function should use the following arguments, assuming that language
information is stored in a variable called <code>ln</code>, and request information
are stored in req (will be used for IP logging, for example):</p>
<pre>
page(...,
req=req,
language=ln,
errors=error&#95;list,
warnings=warnings&#95;list,
...)
</pre>
<p>list of errors and warnings are behaving the same way. They are lists of tuples:</p>
<pre>
[(error&#95;name, param1, ..., paramN), ...]
</pre>
<p>The params are used to represent substitued values in messages. For example if
you want to throw one of the errors above, error&#95;list should look like this:</p>
<pre>
error&#95;list = [('ERR&#95;WEBCOMMENT&#95;RECID&#95;INVALID', 123456)]
</pre>
<h4>Example</h4>
<p>Business logic should be separated from web interface. We consider three files in the
following (real) example:
<ol>
<li><code>webmessage_webinterface.py</code>, which is the page as viewed by a browser,</li>
<li><code>webmessage.py</code>, which contains the business logic,</li>
<li><code>webmessage&#95;config</code>, which contains error definitions</li>
</ol>
<p>In this example, a user tries to read a message. We must ensure he doesn't
read another message, and that this message really exist in the system. For
a more convenient reading, some (non error-related) parts of code have been suppressed.</p>
<h5>webmessage&#95;config.py</h5>
<pre>
&#35; error messages. (should not happen, except in case of reload, or url altering)
CFG&#95;WEBMESSAGE&#95;ERROR&#95;MESSAGES = \
{ 'ERR&#95;WEBMESSAGE&#95;NOTOWNER': '&#95;("This message is not in your mailbox")',
'ERR&#95;WEBMESSAGE&#95;NONICKNAME':'&#95;("No nickname or user for uid #%s")',
'ERR&#95;WEBMESSAGE&#95;NOMESSAGE': '&#95;("This message doesn\'t exist")'
}
</pre>
<h5>webmessage.py: business logic</h5>
<pre>
from invenio.webmessage&#95;config import CFG&#95;WEBMESSAGE&#95;ERROR&#95;MESSAGES
def perform&#95;request&#95;display&#95;msg(uid, msgid, ln=CFG_SITE_LANG):
uid = wash&#95;url&#95;argument(uid, 'int')
msgid = wash&#95;url&#95;argument(msgid, 'int')
ln = wash&#95;language(ln)
errors = []
warnings = []
body = ""
if (check&#95;user&#95;owns&#95;message(uid, msgid) == 0):
&#35; The user doesn't own this message
errors.append(('ERR&#95;WEBMESSAGE&#95;NOTOWNER',))
else:
(msg&#95;id, ...) = get&#95;message(uid, msgid)
if (msg&#95;id == ""):
&#35; The message exists in table user&#95;msgMESSAGE
&#35; but not in table msgMESSAGE => table inconsistency
errors.append(('ERR&#95;WEBMESSAGE&#95;NOMESSAGE',))
else:
body = webmessage&#95;templates.tmpl&#95;display&#95;msg( ... )
return (body, errors, warnings)
</pre>
<h5>webmessage_webinterface.py: web interface</h5>
<pre>
from invenio.webpage import page
from invenio.webmessage import perform&#95;request&#95;display&#95;msg
def display&#95;msg(req, msgid=-1, ln=CFG_SITE_LANG):
&#95; = gettext&#95;set&#95;language(ln)
# Generate content
(body, errors, warnings) = perform&#95;request&#95;display&#95;msg(uid, msgid, ln)
title = &#95;("Read a message")
return page(title = title,
body = body,
navtrail = get&#95;navtrail(ln, title),
uid = uid,
lastupdated = &#95;&#95;lastupdated&#95;&#95;,
req = req,
language = ln,
errors = errors,
warnings = warnings)
</pre>
<h3>3.2 From a command line interface</h3>
<p>The following functions can be useful (see source code for other functions):</p>
<pre>
get&#95;msgs&#95;for&#95;code&#95;list(code&#95;list, stream='error', ln=CFG_SITE_LANG)
Returns formatted strings for the given errors
@param code&#95;list: list of tuples [(err&#95;name, arg1, ..., argN), ...]
@param stream: 'error' or 'warning'
@return list of tuples of length 2 [('ERR&#95;...', err&#95;msg), ...]
if code&#95;list empty, will return None.
if errors retrieving error messages, will append an error to
the list
register&#95;errors(errors&#95;or&#95;warnings&#95;list, stream, req=None)
log errors to invenio.err and warnings to invenio.log
errors will be logged with client information (if req is given)
and a tracestack
warnings will be logged with just the warning message
@param errors&#95;or&#95;warnings&#95;list: list of tuples (err&#95;name, err&#95;msg)
@param stream: 'error' or 'warning'
@param req = mod&#95;python request
@return integer 1 if successfully wrote to stream, integer 0 if not
will append another error to errors&#95;list if unsuccessful
send&#95;error&#95;report&#95;to&#95;admin(header, url, time, browser, client,
error, sys&#95;error, traceback)
Sends an email to the admin with client info and tracestack
</pre>
<h4>Example</h4>
<p>In the following example, two files are used:</p>
<ol>
<li><code>webmessage&#95;config</code>, containing error messages</li>
<li><code>webmessage&#95;example&#95;bin.py</code>, containing business logic</li>
</ol>
<p>Scenario: a function receives an error and wants to register it only if it is not a
messaging error</p>
<h5>webmessage&#95;config.py</h5>
<pre>
&#35; error messages. (should not happen, except in case of reload, or url altering)
CFG&#95;WEBMESSAGE&#95;ERROR&#95;MESSAGES = \
{ 'ERR&#95;WEBMESSAGE&#95;NOTOWNER': '&#95;("This message is not in your mailbox")',
'ERR&#95;WEBMESSAGE&#95;NONICKNAME':'&#95;("No nickname or user for uid #%s")',
'ERR&#95;WEBMESSAGE&#95;NOMESSAGE': '&#95;("This message doesn\'t exist")'
}
</pre>
<h5>webmessage&#95;example&#95;bin.py</h5>
<pre>
from invenio.webmessage&#95;config import CFG&#95;WEBMESSAGE&#95;ERROR&#95;MESSAGES
-from invenio.errorlib import get&#95;msgs&#95;for&#95;code&#95;list, register&#95;errors
+from invenio.ext.logging import get&#95;msgs&#95;for&#95;code&#95;list, register&#95;errors
def handle&#95;error(error):
errorlist = get&#95;msgs&#95;for&#95;code&#95;list([error])
&#35; error is a tuple of error name, arguments => we only need the name
if CFG&#95;WEBMESSAGE&#95;ERROR&#95;MESSAGES[error[0]]:
print("Error in webmessage: %s" % errorlist[0][1])
else:
for error in errorlist:
print("Error: %s" % error[1])
register&#95;errors(errorlist, 'error')
</pre>
<h2>4. <a name="troubleshooting">Troubleshooting</a></h2>
<p>MiscUtil can generate errors. See miscutil&#95;config.py for a complete list.
One can see below some usual errors and their solutions:</p>
<dl>
<dt><b><code>ERR&#95;MISCUTIL&#95;IMPORT&#95;ERROR</code></b></dt>
<dd>The <code>&lt;module-name&gt;&#95;config.py</code> file has not been found. Check it
has the correct name and is deployed.<br />
Check that the error is named following this pattern:
<pre>
WRN&#95;&lt;MODULE-NAME&gt;&#95;WARNING&#95;NAME or
ERR&#95;&lt;MODULE-NAME&gt;&#95;WARNING&#95;NAME
</pre>
</dd>
<dt><b><code>ERR&#95;MISCUTIL&#95;NO&#95;DICT</code></b></dt>
<dd>No dictionary could be found in <code>&lt;module-name&gt;&#95;config.py</code>. Check
that your dictionary is correctly named:
<pre>
CFG&#95;&LT;MODULENAME&GT;&#95;ERROR&#95;MESSAGES
</pre>
You could also have inverted errors and warnings if only one dictionary was provided.<br/>
This can also happen when using direct API if the <code>stream</code> argument is misspelled.
</dd>
<dt><b><code>ERR&#95;MISCUTIL&#95;NO&#95;MESSAGE&#95;IN&#95;DICT</code></b></dt>
<dd>A dictionary was found but not the error in it. You probably misspelled
<code>error&#95;name</code>, or inverted errors and warnings dictionaries.
</dd>
<dt><b><code>ERR&#95;MISCUTIL&#95;UNDEFINED&#95;ERROR</code></b></dt>
<dd>The library couldn't guess the name of module. Check that the error name is beginning
with <code>ERR&#95;MODULE-NAME&#95;</code> or <code>WRN&#95;MODULE-NAME&#95;</code>. This library uses
underscores as separators to guess module name.
</dd>
<dt><b><code>ERR&#95;MISCUTIL&#95;TOO&#95;MANY&#95;ARGUMENT</code></b></dt>
<dd>As the library was rendering the display of error, a surnumerous text substitute was
found (surnumerous are ignored for final rendering, and this error is appened to list of errors):
<pre>
# Module knights:
'ERR&#95;KNIGHTS': '&#95;("We are the knights who say %s!")'
errors = ('ERR&#95;KNIGHTS', 'ni', 'ni')
</pre>
</dd>
<dt><b><code>ERR&#95;MISCUTIL&#95;TOO&#95;FEW&#95;ARGUMENT</code></b></dt>
<dd>Not enough arguments (text substitutes) were given for an error. Missing ones are
replaced by <code>'???'</code>:
<pre>
# Module knights
'ERR&#95;KNIGHTS': '&#95;("We are the knights who say %s! We demand a %s")'
errors = ('ERR&#95;KNIGHTS', 'ni') # so, where is the shrubbery??
</pre>
</dd>
<dt><b><code>ERR&#95;MISCUTIL&#95;BAD&#95;ARGUMENT&#95;TYPE</code></b></dt>
<dd>Your arguments (text substitutes) did not match with the error declaration<br />
e.g. inversion between integer (<code>%i</code>) and string (<code>%s</code>)
</dd>
</dl>
diff --git a/invenio/legacy/miscutil/solrutils_bibindex_indexer.py b/invenio/legacy/miscutil/solrutils_bibindex_indexer.py
index c62c972b1..0f549a9e9 100644
--- a/invenio/legacy/miscutil/solrutils_bibindex_indexer.py
+++ b/invenio/legacy/miscutil/solrutils_bibindex_indexer.py
@@ -1,76 +1,76 @@
# -*- coding: utf-8 -*-
##
## This file is part of Invenio.
## Copyright (C) 2011 CERN.
##
## Invenio is free software; you can redistribute it and/or
## modify it under the terms of the GNU General Public License as
## published by the Free Software Foundation; either version 2 of the
## License, or (at your option) any later version.
##
## Invenio is distributed in the hope that it will be useful, but
## WITHOUT ANY WARRANTY; without even the implied warranty of
## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
## General Public License for more details.
##
## You should have received a copy of the GNU General Public License
## along with Invenio; if not, write to the Free Software Foundation, Inc.,
## 59 Temple Place, Suite 330, Boston, MA 02111-1307, USA.
"""
Solr utilities.
"""
from invenio.config import CFG_SOLR_URL
from invenio.solrutils_config import CFG_SOLR_INVALID_CHAR_RANGES
-from invenio.errorlib import register_exception
+from invenio.ext.logging import register_exception
if CFG_SOLR_URL:
import solr
SOLR_CONNECTION = solr.SolrConnection(CFG_SOLR_URL) # pylint: disable=E1101
def replace_invalid_solr_characters(utext):
def replace(x):
o = ord(x)
for r in CFG_SOLR_INVALID_CHAR_RANGES:
if r[0] <= o <= r[1]:
return r[2]
return x
utext_elements = map(replace, utext)
return ''.join(utext_elements)
def solr_add_fulltext(recid, text):
"""
Helper function that dispatches TEXT to Solr for given record ID.
Returns True/False upon success/failure.
"""
if recid:
try:
utext = unicode(text, 'utf-8')
utext = replace_invalid_solr_characters(utext)
SOLR_CONNECTION.add(id=recid, abstract="", author="", fulltext=utext, keyword="", title="")
return True
except (UnicodeDecodeError, UnicodeEncodeError):
# forget about bad UTF-8 files
pass
except:
# In case anything else happens
register_exception(alert_admin=True)
return False
def solr_commit():
try:
# Commits might cause an exception, most likely a
# timeout while hitting a background merge
# Changes will then be committed later by the
# calling (periodical) task
# Also, autocommits can be used in the solrconfig
SOLR_CONNECTION.commit()
except:
register_exception(alert_admin=True)
diff --git a/invenio/legacy/miscutil/solrutils_bibrank_indexer.py b/invenio/legacy/miscutil/solrutils_bibrank_indexer.py
index 84906506f..4d67b3b68 100644
--- a/invenio/legacy/miscutil/solrutils_bibrank_indexer.py
+++ b/invenio/legacy/miscutil/solrutils_bibrank_indexer.py
@@ -1,184 +1,184 @@
# -*- coding: utf-8 -*-
##
## This file is part of Invenio.
## Copyright (C) 2011 CERN.
##
## Invenio is free software; you can redistribute it and/or
## modify it under the terms of the GNU General Public License as
## published by the Free Software Foundation; either version 2 of the
## License, or (at your option) any later version.
##
## Invenio is distributed in the hope that it will be useful, but
## WITHOUT ANY WARRANTY; without even the implied warranty of
## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
## General Public License for more details.
##
## You should have received a copy of the GNU General Public License
## along with Invenio; if not, write to the Free Software Foundation, Inc.,
## 59 Temple Place, Suite 330, Boston, MA 02111-1307, USA.
"""
Solr utilities.
"""
import time
from invenio.config import CFG_SOLR_URL
from invenio.bibtask import write_message, task_get_option, task_update_progress, \
task_sleep_now_if_required
from invenio.dbquery import run_sql
from invenio.search_engine import record_exists
from invenio.bibdocfile import BibRecDocs
from invenio.solrutils_bibindex_indexer import replace_invalid_solr_characters
from invenio.bibindex_engine import create_range_list
-from invenio.errorlib import register_exception
+from invenio.ext.logging import register_exception
from invenio.bibrank_bridge_utils import get_tags, get_field_content_in_utf8
if CFG_SOLR_URL:
import solr
SOLR_CONNECTION = solr.SolrConnection(CFG_SOLR_URL) # pylint: disable=E1101
def solr_add_ranges(id_ranges):
sub_range_length = task_get_option("flush")
id_ranges_to_index = []
for id_range in id_ranges:
lower_recid = id_range[0]
upper_recid = id_range[1]
i_low = lower_recid
while i_low <= upper_recid:
i_up = min(i_low + sub_range_length - 1, upper_recid)
id_ranges_to_index.append((i_low, i_up))
i_low += sub_range_length
tags_to_index = get_tags()
# Indexes latest records first by reversing
# This allows the ranker to return better results during long indexing
# runs as the ranker cuts the hitset using latest records
id_ranges_to_index.reverse()
next_commit_counter = 0
for id_range_to_index in id_ranges_to_index:
lower_recid = id_range_to_index[0]
upper_recid = id_range_to_index[1]
status_msg = "Solr ranking indexer called for %s-%s" % (lower_recid, upper_recid)
write_message(status_msg)
task_update_progress(status_msg)
next_commit_counter = solr_add_range(lower_recid, upper_recid, tags_to_index, next_commit_counter)
solr_commit_if_necessary(next_commit_counter, final_commit=True)
def solr_commit_if_necessary(next_commit_counter, final_commit=False, recid=None):
# Counter full or final commit if counter set
if next_commit_counter == task_get_option("flush") - 1 or (final_commit and next_commit_counter > 0):
recid_info = ''
if recid:
recid_info = ' for recid=%s' % recid
status_msg = 'Solr ranking indexer COMMITTING' + recid_info
write_message(status_msg)
task_update_progress(status_msg)
try:
# Commits might cause an exception, most likely a
# timeout while hitting a background merge
# Changes will then be committed later by the
# calling (periodical) task
# Also, autocommits can be used in the solrconfig
SOLR_CONNECTION.commit()
except:
register_exception(alert_admin=True)
next_commit_counter = 0
task_sleep_now_if_required(can_stop_too=True)
else:
next_commit_counter = next_commit_counter + 1
return next_commit_counter
def solr_add_range(lower_recid, upper_recid, tags_to_index, next_commit_counter):
"""
Adds the regarding field values of all records from the lower recid to the upper one to Solr.
It preserves the fulltext information.
"""
for recid in range(lower_recid, upper_recid + 1):
if record_exists(recid):
abstract = get_field_content_in_utf8(recid, 'abstract', tags_to_index)
author = get_field_content_in_utf8(recid, 'author', tags_to_index)
keyword = get_field_content_in_utf8(recid, 'keyword', tags_to_index)
title = get_field_content_in_utf8(recid, 'title', tags_to_index)
try:
bibrecdocs = BibRecDocs(recid)
fulltext = unicode(bibrecdocs.get_text(), 'utf-8')
except:
fulltext = ''
solr_add(recid, abstract, author, fulltext, keyword, title)
next_commit_counter = solr_commit_if_necessary(next_commit_counter,recid=recid)
return next_commit_counter
def solr_add(recid, abstract, author, fulltext, keyword, title):
"""
Helper function that adds word similarity ranking relevant indexes to Solr.
"""
try:
SOLR_CONNECTION.add(id=recid,
abstract=replace_invalid_solr_characters(abstract),
author=replace_invalid_solr_characters(author),
fulltext=replace_invalid_solr_characters(fulltext),
keyword=replace_invalid_solr_characters(keyword),
title=replace_invalid_solr_characters(title))
except:
register_exception(alert_admin=True)
def word_similarity_solr(run):
return word_index(run)
def get_recIDs_by_date(dates=""):
"""Returns recIDs modified between DATES[0] and DATES[1].
If DATES is not set, then returns records modified since the last run of
the ranking method.
"""
if not dates:
write_message("Using the last update time for the rank method")
res = run_sql('SELECT last_updated FROM rnkMETHOD WHERE name="wrd"')
if not res:
return
if not res[0][0]:
dates = ("0000-00-00",'')
else:
dates = (res[0][0],'')
if dates[1]:
res = run_sql('SELECT id FROM bibrec WHERE modification_date >= %s AND modification_date <= %s ORDER BY id ASC', (dates[0], dates[1]))
else:
res = run_sql('SELECT id FROM bibrec WHERE modification_date >= %s ORDER BY id ASC', (dates[0],))
return create_range_list([row[0] for row in res])
def word_index(run): # pylint: disable=W0613
"""
Runs the indexing task.
"""
# Explicitly set ids
id_option = task_get_option("id")
if len(id_option):
solr_add_ranges([(id_elem[0], id_elem[1]) for id_elem in id_option])
# Indexes modified ids since last run
else:
starting_time = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime())
id_ranges = get_recIDs_by_date()
if id_ranges:
solr_add_ranges([(id_range[0], id_range[1]) for id_range in id_ranges])
run_sql('UPDATE rnkMETHOD SET last_updated=%s WHERE name="wrd"', (starting_time, ))
else:
write_message("No new records. Solr index is up to date")
write_message("Solr ranking indexer completed")
diff --git a/invenio/legacy/miscutil/solrutils_bibrank_searcher.py b/invenio/legacy/miscutil/solrutils_bibrank_searcher.py
index e5f34d8b3..8ade2bd5b 100644
--- a/invenio/legacy/miscutil/solrutils_bibrank_searcher.py
+++ b/invenio/legacy/miscutil/solrutils_bibrank_searcher.py
@@ -1,218 +1,218 @@
# -*- coding: utf-8 -*-
##
## This file is part of Invenio.
## Copyright (C) 2011 CERN.
##
## Invenio is free software; you can redistribute it and/or
## modify it under the terms of the GNU General Public License as
## published by the Free Software Foundation; either version 2 of the
## License, or (at your option) any later version.
##
## Invenio is distributed in the hope that it will be useful, but
## WITHOUT ANY WARRANTY; without even the implied warranty of
## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
## General Public License for more details.
##
## You should have received a copy of the GNU General Public License
## along with Invenio; if not, write to the Free Software Foundation, Inc.,
## 59 Temple Place, Suite 330, Boston, MA 02111-1307, USA.
"""
Solr utilities.
"""
import itertools
from invenio.config import CFG_SOLR_URL
from invenio.intbitset import intbitset
-from invenio.errorlib import register_exception
+from invenio.ext.logging import register_exception
if CFG_SOLR_URL:
import solr
conn = solr.Solr(CFG_SOLR_URL)
SOLR_CONNECTION = solr.SolrConnection(CFG_SOLR_URL) # pylint: disable=E1101
SOLR_MLT_CONNECTION = solr.SearchHandler(conn, "/mlt")
BOOLEAN_EQUIVALENTS = {"+": "AND",
"|": "OR",
"-": "NOT"}
def get_collection_filter(hitset, cutoff_amount):
# The last n hitset records are considered to be newest and therfore most relevant
start_index = len(hitset) - cutoff_amount
if start_index < 0:
start_index = 0
it = itertools.islice(hitset, start_index, None)
ids = ' '.join([str(recid) for recid in it])
if ids:
return 'id:(%s)' % ids
else:
return ''
def solr_get_ranked(query, hitset, params, ranked_result_amount):
"""
Queries Solr.
Returns: a list of ranked record ids [(recid, score), ...) contained in hitset
and an intbitset of record ids contained in hitset.
"""
response = SOLR_CONNECTION.query(q=query, fields=['id', 'score'], rows=str(ranked_result_amount), fq=get_collection_filter(hitset, params['cutoff_amount']), timeAllowed=params['cutoff_time_ms'])
return get_normalized_ranking_scores(response)
def solr_get_similar_ranked(recid, hitset, params, ranked_result_amount):
"""
Queries Solr for similar records.
Returns: a list of ranked record ids [(recid, score), ...) contained in hitset
and an intbitset of record ids contained in hitset.
"""
# original one first
query = 'id:%s' % recid
response = SOLR_MLT_CONNECTION(q=query, fields=['id', 'score'], rows=str(ranked_result_amount * params['find_similar_to_recid']['more_results_factor']),
mlt='true', mlt_fl=params['find_similar_to_recid']['mlt_fl'], timeAllowed=params['cutoff_time_ms'],
mlt_mintf=params['find_similar_to_recid']['mlt_mintf'], mlt_mindf=params['find_similar_to_recid']['mlt_mindf'], mlt_minwl=params['find_similar_to_recid']['mlt_minwl'],
mlt_maxwl=params['find_similar_to_recid']['mlt_maxwl'], mlt_maxqt=params['find_similar_to_recid']['mlt_maxqt'], mlt_maxntp=params['find_similar_to_recid']['mlt_maxntp'],
mlt_boost=params['find_similar_to_recid']['mlt_boost'])
# Insert original id at the front with guaranteed highest score
response.results.insert(0, {u'id': u'%s' % recid, u'score': response.maxScore * 1.1})
return get_normalized_ranking_scores(response, hitset, [recid])
def get_normalized_ranking_scores(response, hitset_filter = None, recids = []):
"""
Returns the result having normalized ranking scores, interval [0, 100].
hitset_filter - optional filter for the results
recids - optional recids that shall remain in the result despite the filter
"""
if not len(response.results):
return ([], intbitset())
# response.maxScore does not work in case of something was added to the response
max_score = float(response.results[0]['score'])
ranked_result = []
matched_recs = intbitset()
for hit in response.results:
recid = int(hit['id'])
if (not hitset_filter and hitset_filter != []) or recid in hitset_filter or recid in recids:
normalised_score = 0
if max_score > 0:
normalised_score = int(100.0 / max_score * float(hit['score']))
ranked_result.append((recid, normalised_score))
matched_recs.add(recid)
ranked_result.reverse()
return (ranked_result, matched_recs)
def word_similarity_solr(pattern, hitset, params, verbose, explicit_field, ranked_result_amount):
"""
Ranking a records containing specified words and returns a sorted list.
input:
hitset - a list of hits for the query found by search_engine
verbose - verbose value
explicit_field - field to search (selected in GUI)
ranked_result_amount - amount of results to be ranked
output:
recset - a list of sorted records: [[23,34], [344,24], [1,01]]
prefix - what to show before the rank value
postfix - what to show after the rank value
voutput - contains extra information, content dependent on verbose value
"""
voutput = ""
search_units = []
if not len(hitset):
return ([], "", "", voutput)
if pattern:
pattern = " ".join(map(str, pattern))
from invenio.search_engine import create_basic_search_units
search_units = create_basic_search_units(None, pattern, explicit_field)
else:
return (None, "Records not ranked. The query is not detailed enough, or not enough records found, for ranking to be possible.", "", voutput)
if verbose > 0:
voutput += "Hitset: %s<br/>" % hitset
voutput += "Pattern: %s<br/>" % pattern
voutput += "Search units: %s<br/>" % search_units
query = ""
(ranked_result, matched_recs) = (None, None)
# Ranks similar records
if search_units[0][2] == 'recid':
recid = search_units[0][1]
if verbose > 0:
voutput += "Ranked amount: %s<br/>" % ranked_result_amount
try:
(ranked_result, matched_recs) = solr_get_similar_ranked(recid, hitset, params, ranked_result_amount)
except:
register_exception()
return (None, "Records not ranked. An error occurred. Please check the query.", "", voutput)
# Cutoffs potentially large hitset
it = itertools.islice(hitset, params['find_similar_to_recid']['hitset_cutoff'])
hitset = intbitset(list(it))
# Regular word similarity ranking
else:
for (operator, pattern, field, unit_type) in search_units:
# Any field
if field == '':
field = 'global'
# Field might not exist
elif field not in params["fields"].keys():
field = params["default_field"]
if unit_type == "a":
# Eliminates leading and trailing %
if pattern[0] == "%":
pattern = pattern[1:-1]
pattern = "\"" + pattern + "\""
weighting = "^" + str(params["fields"][field]["weight"])
if ':' in pattern:
pattern = pattern.rsplit(':', 1)[1]
query_part = field + ":" + pattern + weighting
# Considers boolean operator from the second part on, allows negation from the first part on
if query or operator == "-":
query += " " + BOOLEAN_EQUIVALENTS[operator] + " "
query += query_part + " "
if verbose > 0:
voutput += "Solr query: %s<br/>" % query
try:
(ranked_result, matched_recs) = solr_get_ranked(query, hitset, params, ranked_result_amount)
except:
register_exception()
return (None, "Records not ranked. An error occurred. Please check the query.", "", voutput)
if verbose > 0:
voutput += "All matched records: %s<br/>" % matched_recs
# Considers not ranked records
not_ranked = hitset.difference(matched_recs)
if not_ranked:
lrecIDs = list(not_ranked)
ranked_result = zip(lrecIDs, [0] * len(lrecIDs)) + ranked_result
if verbose > 0:
voutput += "Not ranked: %s<br/>" % not_ranked
return (ranked_result, params["prefix"], params["postfix"], voutput)
diff --git a/invenio/legacy/oaiharvest/daemon.py b/invenio/legacy/oaiharvest/daemon.py
index 2c0a16528..9d32f2ac3 100644
--- a/invenio/legacy/oaiharvest/daemon.py
+++ b/invenio/legacy/oaiharvest/daemon.py
@@ -1,1622 +1,1622 @@
# -*- coding: utf-8 -*-
##
## This file is part of Invenio.
## Copyright (C) 2009, 2010, 2011 CERN.
##
## Invenio is free software; you can redistribute it and/or
## modify it under the terms of the GNU General Public License as
## published by the Free Software Foundation; either version 2 of the
## License, or (at your option) any later version.
##
## Invenio is distributed in the hope that it will be useful, but
## WITHOUT ANY WARRANTY; without even the implied warranty of
## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
## General Public License for more details.
##
## You should have received a copy of the GNU General Public License
## along with Invenio; if not, write to the Free Software Foundation, Inc.,
## 59 Temple Place, Suite 330, Boston, MA 02111-1307, USA.
"""
OAI Harvest daemon - harvest records from OAI repositories.
If started via CLI with --verb parameters, starts a manual single-shot
harvesting. Otherwise starts a BibSched task for periodical harvesting
of repositories defined in the OAI Harvest admin interface
"""
__revision__ = "$Id$"
import os
import sys
import getopt
import getpass
import re
import time
import calendar
import shutil
import tempfile
import urlparse
import random
from invenio.config import \
CFG_BINDIR, \
CFG_TMPDIR, \
CFG_ETCDIR, \
CFG_INSPIRE_SITE, \
CFG_CERN_SITE, \
CFG_PLOTEXTRACTOR_DOWNLOAD_TIMEOUT, \
CFG_SITE_URL, \
CFG_OAI_FAILED_HARVESTING_STOP_QUEUE, \
CFG_OAI_FAILED_HARVESTING_EMAILS_ADMIN
from invenio.oai_harvest_config import InvenioOAIHarvestWarning
from invenio.dbquery import run_sql
from invenio.bibtask import \
task_get_task_param, \
task_get_option, \
task_set_option, \
write_message, \
task_init, \
task_sleep_now_if_required, \
task_update_progress, \
task_low_level_submission
from invenio.legacy.bibrecord import record_extract_oai_id, create_records, \
create_record, record_add_fields, \
record_delete_fields, record_xml_output, \
record_get_field_instances, \
record_modify_subfield, \
record_has_field, field_xml_output
from invenio import oai_harvest_getter
-from invenio.errorlib import register_exception
+from invenio.ext.logging import register_exception
from invenio.plotextractor_getter import harvest_single, make_single_directory
from invenio.plotextractor_converter import untar
from invenio.plotextractor import process_single, get_defaults
from invenio.shellutils import run_shell_command, Timeout
from invenio.utils.text import translate_latex2unicode
from invenio.bibedit_utils import record_find_matching_fields
from invenio.bibcatalog import bibcatalog_system
import invenio.template
oaiharvest_templates = invenio.template.load('oai_harvest')
from invenio.webinterface_handler_flask import with_app_context
## precompile some often-used regexp for speed reasons:
REGEXP_OAI_ID = re.compile("<identifier.*?>(.*?)<\/identifier>", re.DOTALL)
REGEXP_RECORD = re.compile("<record.*?>(.*?)</record>", re.DOTALL)
REGEXP_REFS = re.compile("<record.*?>.*?<controlfield .*?>.*?</controlfield>(.*?)</record>", re.DOTALL)
REGEXP_AUTHLIST = re.compile("<collaborationauthorlist.*?</collaborationauthorlist>", re.DOTALL)
CFG_OAI_AUTHORLIST_POSTMODE_STYLESHEET = "%s/bibconvert/config/%s" % (CFG_ETCDIR, "authorlist2marcxml.xsl")
def get_nb_records_in_file(filename):
"""
Return number of record in FILENAME that is either harvested or converted
file. Useful for statistics.
"""
try:
nb = open(filename, 'r').read().count("</record>")
except IOError:
nb = 0 # file not exists and such
except:
nb = -1
return nb
def task_run_core():
"""Run the harvesting task. The row argument is the oaiharvest task
queue row, containing if, arguments, etc.
Return 1 in case of success and 0 in case of failure.
"""
reposlist = []
datelist = []
dateflag = 0
filepath_prefix = "%s/oaiharvest_%s" % (CFG_TMPDIR, str(task_get_task_param("task_id")))
### go ahead: build up the reposlist
if task_get_option("repository") is not None:
### user requests harvesting from selected repositories
write_message("harvesting from selected repositories")
for reposname in task_get_option("repository"):
row = get_row_from_reposname(reposname)
if row == []:
write_message("source name %s is not valid" % (reposname,))
continue
else:
reposlist.append(get_row_from_reposname(reposname))
else:
### user requests harvesting from all repositories
write_message("harvesting from all repositories in the database")
reposlist = get_all_rows_from_db()
### go ahead: check if user requested from-until harvesting
if task_get_option("dates"):
### for each repos simply perform a from-until date harvesting...
### no need to update anything
dateflag = 1
for element in task_get_option("dates"):
datelist.append(element)
error_happened_p = 0 # 0: no error, 1: "recoverable" error (don't stop queue), 2: error (admin intervention needed)
j = 0
for repos in reposlist:
j += 1
task_sleep_now_if_required()
# Extract values from database row (in exact order):
# | id | baseurl | metadataprefix | arguments | comment
# | bibconvertcfgfile | name | lastrun | frequency
# | postprocess | setspecs | bibfilterprogram
source_id = repos[0][0]
baseurl = str(repos[0][1])
metadataprefix = str(repos[0][2])
bibconvert_cfgfile = str(repos[0][5])
reponame = str(repos[0][6])
lastrun = repos[0][7]
frequency = repos[0][8]
postmode = repos[0][9]
setspecs = str(repos[0][10])
bibfilterprogram = str(repos[0][11])
write_message("running in postmode %s" % (postmode,))
downloaded_material_dict = {}
harvested_files_list = []
# Harvest phase
harvestpath = "%s_%d_%s_" % (filepath_prefix, j, time.strftime("%Y%m%d%H%M%S"))
if dateflag == 1:
task_update_progress("Harvesting %s from %s to %s (%i/%i)" % \
(reponame, \
str(datelist[0]),
str(datelist[1]),
j, \
len(reposlist)))
exit_code, file_list = oai_harvest_get(prefix=metadataprefix,
baseurl=baseurl,
harvestpath=harvestpath,
fro=str(datelist[0]),
until=str(datelist[1]),
setspecs=setspecs)
if exit_code == 1 :
write_message("source %s was harvested from %s to %s" % \
(reponame, str(datelist[0]), str(datelist[1])))
harvested_files_list = file_list
else:
write_message("an error occurred while harvesting from source %s for the dates chosen:\n%s\n" % \
(reponame, file_list))
if error_happened_p < 1:
error_happened_p = 1
continue
elif dateflag != 1 and lastrun is None and frequency != 0:
write_message("source %s was never harvested before - harvesting whole repository" % \
(reponame,))
task_update_progress("Harvesting %s (%i/%i)" % \
(reponame,
j, \
len(reposlist)))
exit_code, file_list = oai_harvest_get(prefix=metadataprefix,
baseurl=baseurl,
harvestpath=harvestpath,
setspecs=setspecs)
if exit_code == 1 :
update_lastrun(source_id)
harvested_files_list = file_list
else :
write_message("an error occurred while harvesting from source %s:\n%s\n" % \
(reponame, file_list))
if error_happened_p < 1:
error_happened_p = 1
continue
elif dateflag != 1 and frequency != 0:
### check that update is actually needed,
### i.e. lastrun+frequency>today
timenow = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime())
lastrundate = re.sub(r'\.[0-9]+$', '',
str(lastrun)) # remove trailing .00
timeinsec = int(frequency) * 60 * 60
updatedue = add_timestamp_and_timelag(lastrundate, timeinsec)
proceed = compare_timestamps_with_tolerance(updatedue, timenow)
if proceed == 0 or proceed == -1 : #update needed!
write_message("source %s is going to be updated" % (reponame,))
fromdate = str(lastrun)
fromdate = fromdate.split()[0] # get rid of time of the day for the moment
task_update_progress("Harvesting %s (%i/%i)" % \
(reponame,
j, \
len(reposlist)))
exit_code, file_list = oai_harvest_get(prefix=metadataprefix,
baseurl=baseurl,
harvestpath=harvestpath,
fro=fromdate,
setspecs=setspecs)
if exit_code == 1 :
update_lastrun(source_id)
harvested_files_list = file_list
else :
write_message("an error occurred while harvesting from source %s:\n%s\n" % \
(reponame, file_list))
if error_happened_p < 1:
error_happened_p = 1
continue
else:
write_message("source %s does not need updating" % (reponame,))
continue
elif dateflag != 1 and frequency == 0:
write_message("source %s has frequency set to 'Never' so it will not be updated" % \
(reponame,))
continue
# Harvesting done, now convert/extract/filter/upload as requested
if len(harvested_files_list) < 1:
write_message("No records harvested for %s" % (reponame,))
continue
# Retrieve all OAI IDs and set active list
harvested_identifier_list = collect_identifiers(harvested_files_list)
active_files_list = harvested_files_list
if len(active_files_list) != len(harvested_identifier_list):
# Harvested files and its identifiers are 'out of sync', abort harvest
write_message("Harvested files miss identifiers for %s" % (reponame,))
continue
write_message("post-harvest processes started")
# Convert phase
if 'c' in postmode:
updated_files_list = []
i = 0
write_message("conversion step started")
for active_file in active_files_list:
i += 1
task_sleep_now_if_required()
task_update_progress("Converting material harvested from %s (%i/%i)" % \
(reponame, \
i, \
len(active_files_list)))
updated_file = "%s.converted" % (active_file.split('.')[0],)
updated_files_list.append(updated_file)
(exitcode, err_msg) = call_bibconvert(config=bibconvert_cfgfile,
harvestpath=active_file,
convertpath=updated_file)
if exitcode == 0:
write_message("harvested file %s was successfully converted" % \
(active_file,))
else:
write_message("an error occurred while converting %s:\n%s" % \
(active_file, err_msg))
error_happened_p = 2
continue
# print stats:
for updated_file in updated_files_list:
write_message("File %s contains %i records." % \
(updated_file,
get_nb_records_in_file(updated_file)))
active_files_list = updated_files_list
write_message("conversion step ended")
# plotextract phase
if 'p' in postmode:
write_message("plotextraction step started")
# Download tarball for each harvested/converted record, then run plotextrator.
# Update converted xml files with generated xml or add it for upload
updated_files_list = []
i = 0
for active_file in active_files_list:
identifiers = harvested_identifier_list[i]
i += 1
task_sleep_now_if_required()
task_update_progress("Extracting plots from harvested material from %s (%i/%i)" % \
(reponame, i, len(active_files_list)))
updated_file = "%s.plotextracted" % (active_file.split('.')[0],)
updated_files_list.append(updated_file)
(exitcode, err_msg) = call_plotextractor(active_file,
updated_file,
identifiers,
downloaded_material_dict,
source_id)
if exitcode == 0:
if err_msg != "":
write_message("plots from %s was extracted, but with some errors:\n%s" % \
(active_file, err_msg))
else:
write_message("plots from %s was successfully extracted" % \
(active_file,))
else:
write_message("an error occurred while extracting plots from %s:\n%s" % \
(active_file, err_msg))
error_happened_p = 2
continue
# print stats:
for updated_file in updated_files_list:
write_message("File %s contains %i records." % \
(updated_file,
get_nb_records_in_file(updated_file)))
active_files_list = updated_files_list
write_message("plotextraction step ended")
# refextract phase
if 'r' in postmode:
updated_files_list = []
i = 0
write_message("refextraction step started")
for active_file in active_files_list:
identifiers = harvested_identifier_list[i]
i += 1
task_sleep_now_if_required()
task_update_progress("Extracting references from material harvested from %s (%i/%i)" % \
(reponame, i, len(active_files_list)))
updated_file = "%s.refextracted" % (active_file.split('.')[0],)
updated_files_list.append(updated_file)
(exitcode, err_msg) = call_refextract(active_file,
updated_file,
identifiers,
downloaded_material_dict,
source_id)
if exitcode == 0:
if err_msg != "":
write_message("references from %s was extracted, but with some errors:\n%s" % \
(active_file, err_msg))
else:
write_message("references from %s was successfully extracted" % \
(active_file,))
else:
write_message("an error occurred while extracting references from %s:\n%s" % \
(active_file, err_msg))
error_happened_p = 2
continue
# print stats:
for updated_file in updated_files_list:
write_message("File %s contains %i records." % \
(updated_file,
get_nb_records_in_file(updated_file)))
active_files_list = updated_files_list
write_message("refextraction step ended")
# authorlist phase
if 'a' in postmode:
write_message("authorlist extraction step started")
# Initialize BibCatalog connection as default user, if possible
if bibcatalog_system is not None:
bibcatalog_response = bibcatalog_system.check_system()
else:
bibcatalog_response = "No ticket system configured"
if bibcatalog_response != "":
write_message("BibCatalog error: %s\n" % (bibcatalog_response,))
updated_files_list = []
i = 0
for active_file in active_files_list:
identifiers = harvested_identifier_list[i]
i += 1
task_sleep_now_if_required()
task_update_progress("Extracting any authorlists from material harvested from %s (%i/%i)" % \
(reponame, i, len(active_files_list)))
updated_file = "%s.authextracted" % (active_file.split('.')[0],)
updated_files_list.append(updated_file)
(exitcode, err_msg) = call_authorlist_extract(active_file,
updated_file,
identifiers,
downloaded_material_dict,
source_id)
if exitcode == 0:
if err_msg != "":
write_message("authorlists from %s was extracted, but with some errors:\n%s" % \
(active_file, err_msg))
else:
write_message("any authorlists from %s was successfully extracted" % \
(active_file,))
else:
write_message("an error occurred while extracting authorlists from %s:\n%s" % \
(active_file, err_msg))
error_happened_p = 2
continue
# print stats:
for updated_file in updated_files_list:
write_message("File %s contains %i records." % \
(updated_file,
get_nb_records_in_file(updated_file)))
active_files_list = updated_files_list
write_message("authorlist extraction step ended")
# fulltext phase
if 't' in postmode:
write_message("full-text attachment step started")
# Attaching fulltext
updated_files_list = []
i = 0
for active_file in active_files_list:
identifiers = harvested_identifier_list[i]
i += 1
task_sleep_now_if_required()
task_update_progress("Attaching fulltext to records harvested from %s (%i/%i)" % \
(reponame, i, len(active_files_list)))
updated_file = "%s.fulltext" % (active_file.split('.')[0],)
updated_files_list.append(updated_file)
(exitcode, err_msg) = call_fulltext(active_file,
updated_file,
identifiers,
downloaded_material_dict,
source_id)
if exitcode == 0:
write_message("fulltext from %s was successfully attached" % \
(active_file,))
else:
write_message("an error occurred while attaching fulltext to %s:\n%s" % \
(active_file, err_msg))
error_happened_p = 2
continue
# print stats:
for updated_file in updated_files_list:
write_message("File %s contains %i records." % \
(updated_file,
get_nb_records_in_file(updated_file)))
active_files_list = updated_files_list
write_message("full-text attachment step ended")
# Filter-phase
if 'f' in postmode:
write_message("filtering step started")
# first call bibfilter:
res = 0
i = 0
for active_file in active_files_list:
i += 1
task_sleep_now_if_required()
task_update_progress("Filtering material harvested from %s (%i/%i)" % \
(reponame, \
i, \
len(active_files_list)))
(exitcode, err_msg) = call_bibfilter(bibfilterprogram, active_file)
if exitcode == 0:
write_message("%s was successfully bibfiltered" % \
(active_file,))
else:
write_message("an error occurred while bibfiltering %s:\n%s" % \
(active_file, err_msg))
error_happened_p = 2
continue
# print stats:
for active_file in active_files_list:
write_message("File %s contains %i records." % \
(active_file + ".insert.xml",
get_nb_records_in_file(active_file + ".insert.xml")))
write_message("File %s contains %i records." % \
(active_file + ".correct.xml",
get_nb_records_in_file(active_file + ".correct.xml")))
write_message("File %s contains %i records." % \
(active_file + ".append.xml",
get_nb_records_in_file(active_file + ".append.xml")))
write_message("File %s contains %i records." % \
(active_file + ".holdingpen.xml",
get_nb_records_in_file(active_file + ".holdingpen.xml")))
write_message("filtering step ended")
# Upload files
if "u" in postmode:
write_message("upload step started")
if 'f' in postmode:
upload_modes = [('.insert.xml', '-i'),
('.correct.xml', '-c'),
('.append.xml', '-a'),
('.holdingpen.xml', '-o')]
else:
upload_modes = [('', '-ir')]
i = 0
last_upload_task_id = -1
# Get a random sequence ID that will allow for the tasks to be
# run in order, regardless if parallel task execution is activated
sequence_id = random.randrange(1, 4294967296)
for active_file in active_files_list:
task_sleep_now_if_required()
i += 1
task_update_progress("Uploading records harvested from %s (%i/%i)" % \
(reponame, \
i, \
len(active_files_list)))
for suffix, mode in upload_modes:
upload_filename = active_file + suffix
if get_nb_records_in_file(upload_filename) == 0:
continue
last_upload_task_id = call_bibupload(upload_filename, \
[mode], \
source_id, \
sequence_id)
if not last_upload_task_id:
error_happened_p = 2
write_message("an error occurred while uploading %s from %s" % \
(upload_filename, reponame))
break
else:
write_message("material harvested from source %s was successfully uploaded" % \
(reponame,))
if len(active_files_list) > 0:
write_message("nothing to upload")
write_message("upload step ended")
if CFG_INSPIRE_SITE:
# Launch BibIndex,Webcoll update task to show uploaded content quickly
bibindex_params = ['-w', 'reportnumber,collection', \
'-P', '6', \
'-I', str(sequence_id), \
'--post-process', 'bst_run_bibtask[taskname="webcoll", user="oaiharvest", P="6", c="HEP"]']
task_low_level_submission("bibindex", "oaiharvest", *tuple(bibindex_params))
write_message("post-harvest processes ended")
if error_happened_p:
if CFG_OAI_FAILED_HARVESTING_STOP_QUEUE == 0 or \
not task_get_task_param("sleeptime") or \
error_happened_p > 1:
# Admin want BibSched to stop, or the task is not set to
# run at a later date: we must stop the queue.
write_message("An error occurred. Task is configured to stop")
return False
else:
# An error happened, but it can be recovered at next run
# (task is re-scheduled) and admin set BibSched to
# continue even after failure.
write_message("An error occurred, but task is configured to continue")
if CFG_OAI_FAILED_HARVESTING_EMAILS_ADMIN:
try:
raise InvenioOAIHarvestWarning("OAIHarvest (task #%s) failed at fully harvesting source(s) %s. BibSched has NOT been stopped, and OAIHarvest will try to recover at next run" % (task_get_task_param("task_id"), ", ".join([repo[0][6] for repo in reposlist]),))
except InvenioOAIHarvestWarning, e:
register_exception(stream='warning', alert_admin=True)
return True
else:
return True
def collect_identifiers(harvested_file_list):
"""Collects all OAI PMH identifiers from each file in the list
and adds them to a list of identifiers per file.
@param harvested_file_list: list of filepaths to harvested files
@return list of lists, containing each files' identifier list"""
result = []
for harvested_file in harvested_file_list:
try:
fd_active = open(harvested_file)
except IOError:
write_message("Error opening harvested file '%s'. Skipping.." % (harvested_file,))
continue
data = fd_active.read()
fd_active.close()
result.append(REGEXP_OAI_ID.findall(data))
return result
def remove_duplicates(harvested_file_list):
"""
Go through a list of harvested files and remove any duplicate records.
"""
harvested_identifiers = []
for harvested_file in harvested_file_list:
# Firstly, rename original file to temporary name
try:
os.rename(harvested_file, "%s~" % (harvested_file,))
except OSError:
write_message("Error renaming harvested file '%s'. Skipping.." % (harvested_file,))
continue
# Secondly, open files for writing and reading
try:
updated_harvested_file = open(harvested_file, 'w')
original_harvested_file = open("%s~" % (harvested_file,))
except IOError:
write_message("Error opening harvested file '%s'. Skipping.." % (harvested_file,))
continue
data = original_harvested_file.read()
original_harvested_file.close()
# Get and write OAI-PMH XML header data to updated file
header_index_end = data.find("<ListRecords>") + len("<ListRecords>")
updated_harvested_file.write("%s\n" % (data[:header_index_end],))
# By checking the OAI ID we write all records not written previously (in any file)
harvested_records = REGEXP_RECORD.findall(data)
for record in harvested_records:
oai_identifier = REGEXP_OAI_ID.search(record)
if oai_identifier != None and oai_identifier.group(1) not in harvested_identifiers:
updated_harvested_file.write("<record>%s</record>\n" % (record,))
harvested_identifiers.append(oai_identifier.group(1))
updated_harvested_file.write("</ListRecords>\n</OAI-PMH>\n")
updated_harvested_file.close()
def add_timestamp_and_timelag(timestamp,
timelag):
""" Adds a time lag in seconds to a given date (timestamp).
Returns the resulting date. """
# remove any trailing .00 in timestamp:
timestamp = re.sub(r'\.[0-9]+$', '', timestamp)
# first convert timestamp to Unix epoch seconds:
timestamp_seconds = calendar.timegm(time.strptime(timestamp,
"%Y-%m-%d %H:%M:%S"))
# now add them:
result_seconds = timestamp_seconds + timelag
result = time.strftime("%Y-%m-%d %H:%M:%S", time.gmtime(result_seconds))
return result
def update_lastrun(index):
""" A method that updates the lastrun of a repository
successfully harvested """
try:
today = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime())
sql = 'UPDATE oaiHARVEST SET lastrun=%s WHERE id=%s'
run_sql(sql, (today, index))
return 1
except StandardError, e:
return (0, e)
def oai_harvest_get(prefix, baseurl, harvestpath,
fro=None, until=None, setspecs=None,
user=None, password=None, cert_file=None,
key_file=None, method="POST"):
"""
Retrieve OAI records from given repository, with given arguments
"""
try:
(addressing_scheme, network_location, path, dummy1, \
dummy2, dummy3) = urlparse.urlparse(baseurl)
secure = (addressing_scheme == "https")
http_param_dict = {'verb': "ListRecords",
'metadataPrefix': prefix}
if fro:
http_param_dict['from'] = fro
if until:
http_param_dict['until'] = until
sets = None
if setspecs:
sets = [oai_set.strip() for oai_set in setspecs.split(' ')]
harvested_files = oai_harvest_getter.harvest(network_location, path, http_param_dict, method, harvestpath,
sets, secure, user, password, cert_file, key_file)
remove_duplicates(harvested_files)
return (1, harvested_files)
except (StandardError, oai_harvest_getter.InvenioOAIRequestError), e:
return (0, e)
def call_bibconvert(config, harvestpath, convertpath):
""" Call BibConvert to convert file given at 'harvestpath' with
conversion template 'config', and save the result in file at
'convertpath'.
Returns status exit code of the conversion, as well as error
messages, if any
"""
exitcode, dummy, cmd_stderr = \
run_shell_command(cmd="%s/bibconvert -c %s < %s", \
args=(CFG_BINDIR, config, harvestpath), filename_out=convertpath)
return (exitcode, cmd_stderr)
def call_plotextractor(active_file, extracted_file, harvested_identifier_list, \
downloaded_files, source_id):
"""
Function that generates proper MARCXML containing harvested plots for
each record.
@param active_file: path to the currently processed file
@param extracted_file: path to the file where the final results will be saved
@param harvested_identifier_list: list of OAI identifiers for this active_file
@param downloaded_files: dict of identifier -> dict mappings for downloaded material.
@param source_id: the repository identifier
@type source_id: integer
@return: exitcode and any error messages as: (exitcode, err_msg)
"""
all_err_msg = []
exitcode = 0
# Read in active file
recs_fd = open(active_file, 'r')
records = recs_fd.read()
recs_fd.close()
# Find all record
record_xmls = REGEXP_RECORD.findall(records)
updated_xml = ['<?xml version="1.0" encoding="UTF-8"?>']
updated_xml.append('<collection>')
i = 0
for record_xml in record_xmls:
current_exitcode = 0
identifier = harvested_identifier_list[i]
i += 1
if identifier not in downloaded_files:
downloaded_files[identifier] = {}
updated_xml.append("<record>")
updated_xml.append(record_xml)
if not oaiharvest_templates.tmpl_should_process_record_with_mode(record_xml, 'p', source_id):
# We skip this record
updated_xml.append("</record>")
continue
if "tarball" not in downloaded_files[identifier]:
current_exitcode, err_msg, tarball, dummy = \
plotextractor_harvest(identifier, active_file, selection=["tarball"])
if current_exitcode != 0:
exitcode = current_exitcode
all_err_msg.append(err_msg)
else:
downloaded_files[identifier]["tarball"] = tarball
if current_exitcode == 0:
plotextracted_xml_path = process_single(downloaded_files[identifier]["tarball"])
if plotextracted_xml_path != None:
# We store the path to the directory the tarball contents live
downloaded_files[identifier]["tarball-extracted"] = os.path.split(plotextracted_xml_path)[0]
# Read and grab MARCXML from plotextractor run
plotsxml_fd = open(plotextracted_xml_path, 'r')
plotextracted_xml = plotsxml_fd.read()
plotsxml_fd.close()
re_list = REGEXP_RECORD.findall(plotextracted_xml)
if re_list != []:
updated_xml.append(re_list[0])
updated_xml.append("</record>")
updated_xml.append('</collection>')
# Write to file
file_fd = open(extracted_file, 'w')
file_fd.write("\n".join(updated_xml))
file_fd.close()
if len(all_err_msg) > 0:
return exitcode, "\n".join(all_err_msg)
return exitcode, ""
def call_refextract(active_file, extracted_file, harvested_identifier_list,
downloaded_files, source_id):
"""
Function that calls refextractor to extract references and attach them to
harvested records. It will download the fulltext-pdf for each identifier
if necessary.
@param active_file: path to the currently processed file
@param extracted_file: path to the file where the final results will be saved
@param harvested_identifier_list: list of OAI identifiers for this active_file
@param downloaded_files: dict of identifier -> dict mappings for downloaded material.
@param source_id: the repository identifier
@type source_id: integer
@return: exitcode and any error messages as: (exitcode, all_err_msg)
"""
all_err_msg = []
exitcode = 0
flag = ""
if CFG_INSPIRE_SITE == 1:
flag = "--inspire"
# Read in active file
recs_fd = open(active_file, 'r')
records = recs_fd.read()
recs_fd.close()
# Find all record
record_xmls = REGEXP_RECORD.findall(records)
updated_xml = ['<?xml version="1.0" encoding="UTF-8"?>']
updated_xml.append('<collection>')
i = 0
for record_xml in record_xmls:
current_exitcode = 0
identifier = harvested_identifier_list[i]
i += 1
if identifier not in downloaded_files:
downloaded_files[identifier] = {}
updated_xml.append("<record>")
updated_xml.append(record_xml)
if not oaiharvest_templates.tmpl_should_process_record_with_mode(record_xml, 'p', source_id):
# We skip this record
updated_xml.append("</record>")
continue
if "pdf" not in downloaded_files[identifier]:
current_exitcode, err_msg, dummy, pdf = \
plotextractor_harvest(identifier, active_file, selection=["pdf"])
if current_exitcode != 0:
exitcode = current_exitcode
all_err_msg.append(err_msg)
else:
downloaded_files[identifier]["pdf"] = pdf
if current_exitcode == 0:
current_exitcode, cmd_stdout, err_msg = run_shell_command(cmd="%s/refextract %s -f '%s'" % \
(CFG_BINDIR, flag, downloaded_files[identifier]["pdf"]))
if err_msg != "" or current_exitcode != 0:
exitcode = current_exitcode
all_err_msg.append("Error extracting references from id: %s\nError:%s" % \
(identifier, err_msg))
else:
references_xml = REGEXP_REFS.search(cmd_stdout)
if references_xml:
updated_xml.append(references_xml.group(1))
updated_xml.append("</record>")
updated_xml.append('</collection>')
# Write to file
file_fd = open(extracted_file, 'w')
file_fd.write("\n".join(updated_xml))
file_fd.close()
if len(all_err_msg) > 0:
return exitcode, "\n".join(all_err_msg)
return exitcode, ""
def call_authorlist_extract(active_file, extracted_file, harvested_identifier_list,
downloaded_files, source_id):
"""
Function that will look in harvested tarball for any authorlists. If found
it will extract and convert the authors using a XSLT stylesheet.
@param active_file: path to the currently processed file
@type active_file: string
@param extracted_file: path to the file where the final results will be saved
@type extracted_file: string
@param harvested_identifier_list: list of OAI identifiers for this active_file
@type harvested_identifier_list: list
@param downloaded_files: dict of identifier -> dict mappings for downloaded material.
@type downloaded_files: dict
@param source_id: the repository identifier
@type source_id: integer
@return: exitcode and any error messages as: (exitcode, all_err_msg)
@rtype: tuple
"""
all_err_msg = []
exitcode = 0
# Read in active file
recs_fd = open(active_file, 'r')
records = recs_fd.read()
recs_fd.close()
# Find all records
record_xmls = REGEXP_RECORD.findall(records)
updated_xml = ['<?xml version="1.0" encoding="UTF-8"?>']
updated_xml.append('<collection>')
i = 0
for record_xml in record_xmls:
current_exitcode = 0
identifier = harvested_identifier_list[i]
i += 1
if not oaiharvest_templates.tmpl_should_process_record_with_mode(record_xml, 'p', source_id):
# We skip this record
updated_xml.append("<record>")
updated_xml.append(record_xml)
updated_xml.append("</record>")
continue
# Grab BibRec instance of current record for later amending
existing_record, status_code, dummy1 = create_record("<record>%s</record>" % (record_xml,))
if status_code == 0:
all_err_msg.append("Error parsing record, skipping authorlist extraction of: %s\n" % \
(identifier,))
updated_xml.append("<record>%s</record>" % (record_xml,))
continue
if identifier not in downloaded_files:
downloaded_files[identifier] = {}
if "tarball" not in downloaded_files[identifier]:
current_exitcode, err_msg, tarball, dummy = \
plotextractor_harvest(identifier, active_file, selection=["tarball"])
if current_exitcode != 0:
exitcode = current_exitcode
all_err_msg.append(err_msg)
else:
downloaded_files[identifier]["tarball"] = tarball
if current_exitcode == 0:
current_exitcode, err_msg, authorlist_xml_path = authorlist_extract(downloaded_files[identifier]["tarball"], \
identifier, downloaded_files)
if current_exitcode != 0:
exitcode = current_exitcode
all_err_msg.append("Error extracting authors from id: %s\nError:%s" % \
(identifier, err_msg))
elif authorlist_xml_path is not None:
## Authorlist found
# Read and create BibRec
xml_fd = open(authorlist_xml_path, 'r')
author_xml = xml_fd.read()
xml_fd.close()
authorlist_record = create_records(author_xml)
if len(authorlist_record) == 1:
if authorlist_record[0][0] == None:
all_err_msg.append("Error parsing authorlist record for id: %s" % \
(identifier,))
continue
authorlist_record = authorlist_record[0][0]
# Convert any LaTeX symbols in authornames
translate_fieldvalues_from_latex(authorlist_record, '100', code='a')
translate_fieldvalues_from_latex(authorlist_record, '700', code='a')
# Look for any UNDEFINED fields in authorlist
key = "UNDEFINED"
matching_fields = record_find_matching_fields(key, authorlist_record, tag='100') \
+ record_find_matching_fields(key, authorlist_record, tag='700')
if len(matching_fields) > 0 and bibcatalog_system != None:
# UNDEFINED found. Create ticket in author queue
ticketid = create_authorlist_ticket(matching_fields, identifier)
if ticketid:
write_message("authorlist RT ticket %d submitted for %s" % (ticketid, identifier))
else:
all_err_msg.append("Error while submitting RT ticket for %s" % (identifier,))
# Replace 100,700 fields of original record with extracted fields
record_delete_fields(existing_record, '100')
record_delete_fields(existing_record, '700')
first_author = record_get_field_instances(authorlist_record, '100')
additional_authors = record_get_field_instances(authorlist_record, '700')
record_add_fields(existing_record, '100', first_author)
record_add_fields(existing_record, '700', additional_authors)
updated_xml.append(record_xml_output(existing_record))
updated_xml.append('</collection>')
# Write to file
file_fd = open(extracted_file, 'w')
file_fd.write("\n".join(updated_xml))
file_fd.close()
if len(all_err_msg) > 0:
return exitcode, all_err_msg
return exitcode, ""
def call_fulltext(active_file, extracted_file, harvested_identifier_list,
downloaded_files, source_id):
"""
Function that calls attach FFT tag for full-text pdf to harvested records.
It will download the fulltext-pdf for each identifier if necessary.
@param active_file: path to the currently processed file
@param extracted_file: path to the file where the final results will be saved
@param harvested_identifier_list: list of OAI identifiers for this active_file
@param downloaded_files: dict of identifier -> dict mappings for downloaded material.
@return: exitcode and any error messages as: (exitcode, err_msg)
"""
all_err_msg = []
exitcode = 0
# Read in active file
recs_fd = open(active_file, 'r')
records = recs_fd.read()
recs_fd.close()
# Set doctype FIXME: Remove when parameters are introduced to post-process steps
if CFG_INSPIRE_SITE == 1:
doctype = "arXiv"
elif CFG_CERN_SITE == 1:
doctype = ""
else:
doctype = ""
# Find all records
record_xmls = REGEXP_RECORD.findall(records)
updated_xml = ['<?xml version="1.0" encoding="UTF-8"?>']
updated_xml.append('<collection>')
i = 0
for record_xml in record_xmls:
current_exitcode = 0
identifier = harvested_identifier_list[i]
i += 1
if identifier not in downloaded_files:
downloaded_files[identifier] = {}
updated_xml.append("<record>")
updated_xml.append(record_xml)
if not oaiharvest_templates.tmpl_should_process_record_with_mode(record_xml, 'p', source_id):
# We skip this record
updated_xml.append("</record>")
continue
if "pdf" not in downloaded_files[identifier]:
current_exitcode, err_msg, dummy, pdf = \
plotextractor_harvest(identifier, active_file, selection=["pdf"])
if current_exitcode != 0:
exitcode = current_exitcode
all_err_msg.append(err_msg)
else:
downloaded_files[identifier]["pdf"] = pdf
if current_exitcode == 0:
fulltext_xml = """ <datafield tag="FFT" ind1=" " ind2=" ">
<subfield code="a">%(url)s</subfield>
<subfield code="t">%(doctype)s</subfield>
</datafield>""" % {'url': downloaded_files[identifier]["pdf"],
'doctype': doctype}
updated_xml.append(fulltext_xml)
updated_xml.append("</record>")
updated_xml.append('</collection>')
# Write to file
file_fd = open(extracted_file, 'w')
file_fd.write("\n".join(updated_xml))
file_fd.close()
if len(all_err_msg) > 0:
return exitcode, "\n".join(all_err_msg)
return exitcode, ""
def authorlist_extract(tarball_path, identifier, downloaded_files):
"""
Try to extract the tarball given, if not already extracted, and look for
any XML files that could be authorlists. If any is found, use a XSLT stylesheet
to transform the authorlist into MARCXML author-fields, and return the full path
of resulting conversion.
@param tarball_path: path to the tarball to check
@type tarball_path: string
@param identifier: OAI Identifier to the current record
@type identifier: string
@param downloaded_files: dict of identifier -> dict mappings for downloaded material.
@type downloaded_files: dict
@return: path to converted authorlist together with exitcode and any error messages as:
(exitcode, err_msg, authorlist_path)
@rtype: tuple
"""
all_err_msg = []
exitcode = 0
if "tarball-extracted" not in downloaded_files[identifier]:
# tarball has not been extracted
tar_dir, dummy = get_defaults(tarball=tarball_path, sdir=CFG_TMPDIR, refno_url="")
try:
dummy = untar(tarball_path, tar_dir)
except Timeout:
all_err_msg.append("Timeout during tarball extraction of %s" % (tarball_path,))
exitcode = 1
return exitcode, "\n".join(all_err_msg), None
downloaded_files[identifier]["tarball-extracted"] = tar_dir
# tarball is now surely extracted, so we try to fetch all XML in the folder
xml_files_list = find_matching_files(downloaded_files[identifier]["tarball-extracted"], \
["xml"])
# Try to convert authorlist candidates, returning on first success
for xml_file in xml_files_list:
xml_file_fd = open(xml_file, "r")
xml_content = xml_file_fd.read()
xml_file_fd.close()
match = REGEXP_AUTHLIST.findall(xml_content)
if match != []:
tempfile_fd, temp_authorlist_path = tempfile.mkstemp(suffix=".xml", prefix="authorlist_temp", dir=CFG_TMPDIR)
os.write(tempfile_fd, match[0])
os.close(tempfile_fd)
# Generate file to store conversion results
newfile_fd, authorlist_resultxml_path = tempfile.mkstemp(suffix=".xml", prefix="authorlist_MARCXML", \
dir=downloaded_files[identifier]["tarball-extracted"])
os.close(newfile_fd)
exitcode, cmd_stderr = call_bibconvert(config=CFG_OAI_AUTHORLIST_POSTMODE_STYLESHEET, \
harvestpath=temp_authorlist_path, \
convertpath=authorlist_resultxml_path)
if cmd_stderr == "" and exitcode == 0:
# Success!
return 0, "", authorlist_resultxml_path
# No valid authorlist found
return 0, "", None
def plotextractor_harvest(identifier, active_file, selection=["pdf", "tarball"]):
"""
Function that calls plotextractor library to download selected material,
i.e. tarball or pdf, for passed identifier. Returns paths to respective files.
@param identifier: OAI identifier of the record to harvest
@param active_file: path to the currently processed file
@param selection: list of materials to harvest
@return: exitcode, errormessages and paths to harvested tarball and fulltexts
(exitcode, err_msg, tarball, pdf)
"""
all_err_msg = []
exitcode = 0
active_dir, active_name = os.path.split(active_file)
# turn oaiharvest_23_1_20110214161632_converted -> oaiharvest_23_1_material
# to let harvested material in same folder structure
active_name = "_".join(active_name.split('_')[:-2]) + "_material"
extract_path = make_single_directory(active_dir, active_name)
tarball, pdf = harvest_single(identifier, extract_path, selection)
time.sleep(CFG_PLOTEXTRACTOR_DOWNLOAD_TIMEOUT)
if tarball == None and "tarball" in selection:
all_err_msg.append("Error harvesting tarball from id: %s %s" % \
(identifier, extract_path))
exitcode = 1
if pdf == None and "pdf" in selection:
all_err_msg.append("Error harvesting full-text from id: %s %s" % \
(identifier, extract_path))
exitcode = 1
return exitcode, "\n".join(all_err_msg), tarball, pdf
def find_matching_files(basedir, filetypes):
"""
This functions tries to find all files matching given filetypes by looking at
all the files and filenames in the given directory, including subdirectories.
@param basedir: full path to base directory to search in
@type basedir: string
@param filetypes: list of filetypes, extensions
@type filetypes: list
@return: exitcode and any error messages as: (exitcode, err_msg)
@rtype: tuple
"""
files_list = []
for dirpath, dummy0, filenames in os.walk(basedir):
for filename in filenames:
full_path = os.path.join(dirpath, filename)
dummy1, cmd_out, dummy2 = run_shell_command('file %s', (full_path,))
for filetype in filetypes:
if cmd_out.lower().find(filetype) > -1:
files_list.append(full_path)
elif filename.split('.')[-1].lower() == filetype:
files_list.append(full_path)
return files_list
def translate_fieldvalues_from_latex(record, tag, code='', encoding='utf-8'):
"""
Given a record and field tag, this function will modify the record by
translating the subfield values of found fields from LaTeX to chosen
encoding for all the subfields with given code (or all if no code is given).
@param record: record to modify, in BibRec style structure
@type record: dict
@param tag: tag of fields to modify
@type tag: string
@param code: restrict the translation to a given subfield code
@type code: string
@param encoding: scharacter encoding for the new value. Defaults to UTF-8.
@type encoding: string
"""
field_list = record_get_field_instances(record, tag)
for field in field_list:
subfields = field[0]
subfield_index = 0
for subfield_code, subfield_value in subfields:
if code == '' or subfield_code == code:
newvalue = translate_latex2unicode(subfield_value).encode(encoding)
record_modify_subfield(record, tag, subfield_code, newvalue, \
subfield_index, field_position_global=field[4])
subfield_index += 1
def create_authorlist_ticket(matching_fields, identifier):
"""
This function will submit a ticket generated by UNDEFINED affiliations
in extracted authors from collaboration authorlists.
@param matching_fields: list of (tag, field_instances) for UNDEFINED nodes
@type matching_fields: list
@param identifier: OAI identifier of record
@type identifier: string
@return: return the ID of the created ticket, or None on failure
@rtype: int or None
"""
if bibcatalog_system is None:
return None
subject = "[OAI Harvest] UNDEFINED affiliations for record %s" % (identifier,)
text = """
Harvested record with identifier %(ident)s has had its authorlist extracted and contains some UNDEFINED affiliations.
To see the record, go here: %(baseurl)s/search?p=%(ident)s
If the record is not there yet, try again later. It may take some time for it to load into the system.
List of unidentified fields:
%(fields)s
""" % {
'ident' : identifier,
'baseurl' : CFG_SITE_URL,
'fields' : "\n".join([field_xml_output(field, tag) for tag, field_instances in matching_fields \
for field in field_instances])
}
queue = "Authors"
ticketid = bibcatalog_system.ticket_submit(subject=subject, queue=queue)
if bibcatalog_system.ticket_comment(None, ticketid, text) == None:
write_message("Error: commenting on ticket %s failed." % (str(ticketid),))
return ticketid
def create_oaiharvest_log(task_id, oai_src_id, marcxmlfile):
"""
Function which creates the harvesting logs
@param task_id bibupload task id
"""
file_fd = open(marcxmlfile, "r")
xml_content = file_fd.read(-1)
file_fd.close()
create_oaiharvest_log_str(task_id, oai_src_id, xml_content)
def create_oaiharvest_log_str(task_id, oai_src_id, xml_content):
"""
Function which creates the harvesting logs
@param task_id bibupload task id
"""
try:
records = create_records(xml_content)
for record in records:
oai_id = record_extract_oai_id(record[0])
query = "INSERT INTO oaiHARVESTLOG (id_oaiHARVEST, oai_id, date_harvested, bibupload_task_id) VALUES (%s, %s, NOW(), %s)"
run_sql(query, (str(oai_src_id), str(oai_id), str(task_id)))
except Exception, msg:
print "Logging exception : %s " % (str(msg),)
def call_bibupload(marcxmlfile, mode=None, oai_src_id= -1, sequence_id=None):
"""
Creates a bibupload task for the task scheduler in given mode
on given file. Returns the generated task id and logs the event
in oaiHARVESTLOGS, also adding any given oai source identifier.
@param marcxmlfile: base-marcxmlfilename to upload
@param mode: mode to upload in
@param oai_src_id: id of current source config
@param sequence_id: sequence-number, if relevant
@return: task_id if successful, otherwise None.
"""
if mode is None:
mode = ["-r", "-i"]
if os.path.exists(marcxmlfile):
try:
args = mode
# Add job with priority 6 (above normal bibedit tasks) and file to upload to arguments
#FIXME: allow per-harvest arguments
args.extend(["-P", "6", marcxmlfile])
if sequence_id:
args.extend(['-I', str(sequence_id)])
task_id = task_low_level_submission("bibupload", "oaiharvest", *tuple(args))
create_oaiharvest_log(task_id, oai_src_id, marcxmlfile)
except Exception, msg:
write_message("An exception during submitting oaiharvest task occured : %s " % (str(msg)))
return None
return task_id
else:
write_message("marcxmlfile %s does not exist" % (marcxmlfile,))
return None
def call_bibfilter(bibfilterprogram, marcxmlfile):
"""
Call bibfilter program BIBFILTERPROGRAM on MARCXMLFILE, which is usually
run before uploading records.
The bibfilter should produce up to four files called MARCXMLFILE.insert.xml,
MARCXMLFILE.correct.xml, MARCXMLFILE.append.xml and MARCXMLFILE.holdingpen.xml.
The first file contains parts of MARCXML to be uploaded in insert mode,
the second file is uploaded in correct mode, third in append mode and the last file
contains MARCXML to be uploaded into the holding pen.
@param bibfilterprogram: path to bibfilter script to run
@param marcxmlfile: base-marcxmlfilename
@return: exitcode and any error messages as: (exitcode, err_msg)
"""
all_err_msg = []
exitcode = 0
if bibfilterprogram:
if not os.path.isfile(bibfilterprogram):
all_err_msg.append("bibfilterprogram %s is not a file" %
(bibfilterprogram,))
exitcode = 1
elif not os.path.isfile(marcxmlfile):
all_err_msg.append("marcxmlfile %s is not a file" % (marcxmlfile,))
exitcode = 1
else:
exitcode, dummy, cmd_stderr = run_shell_command(cmd="%s '%s'", \
args=(bibfilterprogram, \
marcxmlfile))
if exitcode != 0 or cmd_stderr != "":
all_err_msg.append("Error while running filtering script on %s\nError:%s" % \
(marcxmlfile, cmd_stderr))
else:
try:
all_err_msg.append("no bibfilterprogram defined, copying %s only" %
(marcxmlfile,))
shutil.copy(marcxmlfile, marcxmlfile + ".insert.xml")
except:
all_err_msg.append("cannot copy %s into %s.insert.xml" % (marcxmlfile, marcxmlfile))
exitcode = 1
return exitcode, "\n".join(all_err_msg)
def get_row_from_reposname(reposname):
""" Returns all information about a row (OAI source)
from the source name """
try:
sql = """SELECT id, baseurl, metadataprefix, arguments,
comment, bibconvertcfgfile, name, lastrun,
frequency, postprocess, setspecs,
bibfilterprogram
FROM oaiHARVEST WHERE name=%s"""
res = run_sql(sql, (reposname,))
reposdata = []
for element in res:
reposdata.append(element)
return reposdata
except StandardError, e:
return (0, e)
def get_all_rows_from_db():
""" This method retrieves the full database of repositories and returns
a list containing (in exact order):
| id | baseurl | metadataprefix | arguments | comment
| bibconvertcfgfile | name | lastrun | frequency
| postprocess | setspecs | bibfilterprogram
"""
try:
reposlist = []
sql = """SELECT id FROM oaiHARVEST"""
idlist = run_sql(sql)
for index in idlist:
sql = """SELECT id, baseurl, metadataprefix, arguments,
comment, bibconvertcfgfile, name, lastrun,
frequency, postprocess, setspecs,
bibfilterprogram
FROM oaiHARVEST WHERE id=%s""" % index
reposelements = run_sql(sql)
repos = []
for element in reposelements:
repos.append(element)
reposlist.append(repos)
return reposlist
except StandardError, e:
return (0, e)
def compare_timestamps_with_tolerance(timestamp1,
timestamp2,
tolerance=0):
"""Compare two timestamps TIMESTAMP1 and TIMESTAMP2, of the form
'2005-03-31 17:37:26'. Optionally receives a TOLERANCE argument
(in seconds). Return -1 if TIMESTAMP1 is less than TIMESTAMP2
minus TOLERANCE, 0 if they are equal within TOLERANCE limit,
and 1 if TIMESTAMP1 is greater than TIMESTAMP2 plus TOLERANCE.
"""
# remove any trailing .00 in timestamps:
timestamp1 = re.sub(r'\.[0-9]+$', '', timestamp1)
timestamp2 = re.sub(r'\.[0-9]+$', '', timestamp2)
# first convert timestamps to Unix epoch seconds:
timestamp1_seconds = calendar.timegm(time.strptime(timestamp1,
"%Y-%m-%d %H:%M:%S"))
timestamp2_seconds = calendar.timegm(time.strptime(timestamp2,
"%Y-%m-%d %H:%M:%S"))
# now compare them:
if timestamp1_seconds < timestamp2_seconds - tolerance:
return -1
elif timestamp1_seconds > timestamp2_seconds + tolerance:
return 1
else:
return 0
def get_dates(dates):
""" A method to validate and process the dates input by the user
at the command line """
twodates = []
if dates:
datestring = dates.split(":")
if len(datestring) == 2:
for date in datestring:
### perform some checks on the date format
datechunks = date.split("-")
if len(datechunks) == 3:
try:
if int(datechunks[0]) and int(datechunks[1]) and \
int(datechunks[2]):
twodates.append(date)
except StandardError:
write_message("Dates have invalid format, not "
"'yyyy-mm-dd:yyyy-mm-dd'")
twodates = None
return twodates
else:
write_message("Dates have invalid format, not "
"'yyyy-mm-dd:yyyy-mm-dd'")
twodates = None
return twodates
## final check.. date1 must me smaller than date2
date1 = str(twodates[0]) + " 01:00:00"
date2 = str(twodates[1]) + " 01:00:00"
if compare_timestamps_with_tolerance(date1, date2) != -1:
write_message("First date must be before second date.")
twodates = None
return twodates
else:
write_message("Dates have invalid format, not "
"'yyyy-mm-dd:yyyy-mm-dd'")
twodates = None
else:
twodates = None
return twodates
def get_repository_names(repositories):
""" A method to validate and process the repository names input by the
user at the command line """
repository_names = []
if repositories:
names = repositories.split(",")
for name in names:
### take into account both single word names and multiple word
### names (which get wrapped around "" or '')
name = name.strip()
if name.startswith("'"):
name = name.strip("'")
elif name.startswith('"'):
name = name.strip('"')
repository_names.append(name)
else:
repository_names = None
return repository_names
def usage(exitcode=0, msg=""):
"Print out info. Only used when run in 'manual' harvesting mode"
sys.stderr.write("*Manual single-shot harvesting mode*\n")
if msg:
sys.stderr.write(msg + "\n")
sys.exit(exitcode)
@with_app_context
def main():
"""Starts the tool.
If the command line arguments are those of the 'manual' mode, then
starts a manual one-time harvesting. Else trigger a BibSched task
for automated harvesting based on the OAIHarvest admin settings.
"""
# Let's try to parse the arguments as used in manual harvesting:
try:
opts, args = getopt.getopt(sys.argv[1:], "o:v:m:p:i:s:f:u:r:x:c:k:w:l:",
["output=",
"verb=",
"method=",
"metadataPrefix=",
"identifier=",
"set=",
"from=",
"until=",
"resumptionToken=",
"certificate=",
"key=",
"user=",
"password="]
)
# So everything went smoothly: start harvesting in manual mode
if len([opt for opt, opt_value in opts if opt in ['-v', '--verb']]) > 0:
# verb parameter is given
http_param_dict = {}
method = "POST"
output = ""
user = None
password = None
cert_file = None
key_file = None
sets = []
# get options and arguments
for opt, opt_value in opts:
if opt in ["-v", "--verb"]:
http_param_dict['verb'] = opt_value
elif opt in ["-m", '--method']:
if opt_value == "GET" or opt_value == "POST":
method = opt_value
elif opt in ["-p", "--metadataPrefix"]:
http_param_dict['metadataPrefix'] = opt_value
elif opt in ["-i", "--identifier"]:
http_param_dict['identifier'] = opt_value
elif opt in ["-s", "--set"]:
sets = opt_value.split()
elif opt in ["-f", "--from"]:
http_param_dict['from'] = opt_value
elif opt in ["-u", "--until"]:
http_param_dict['until'] = opt_value
elif opt in ["-r", "--resumptionToken"]:
http_param_dict['resumptionToken'] = opt_value
elif opt in ["-o", "--output"]:
output = opt_value
elif opt in ["-c", "--certificate"]:
cert_file = opt_value
elif opt in ["-k", "--key"]:
key_file = opt_value
elif opt in ["-l", "--user"]:
user = opt_value
elif opt in ["-w", "--password"]:
password = opt_value
elif opt in ["-V", "--version"]:
print __revision__
sys.exit(0)
else:
usage(1, "Option %s is not allowed" % opt)
if len(args) > 0:
base_url = args[-1]
if not base_url.lower().startswith('http'):
base_url = 'http://' + base_url
(addressing_scheme, network_location, path, dummy1, \
dummy2, dummy3) = urlparse.urlparse(base_url)
secure = (addressing_scheme == "https")
if (cert_file and not key_file) or \
(key_file and not cert_file):
# Both are needed if one specified
usage(1, "You must specify both certificate and key files")
if password and not user:
# User must be specified when password is given
usage(1, "You must specify a username")
elif user and not password:
if not secure:
sys.stderr.write("*WARNING* Your password will be sent in clear!\n")
try:
password = getpass.getpass()
except KeyboardInterrupt, error:
sys.stderr.write("\n%s\n" % (error,))
sys.exit(0)
oai_harvest_getter.harvest(network_location, path,
http_param_dict, method,
output, sets, secure, user,
password, cert_file,
key_file)
sys.stderr.write("Harvesting completed at: %s\n\n" %
time.strftime("%Y-%m-%d %H:%M:%S --> ", time.localtime()))
return
else:
usage(1, "You must specify the URL to harvest")
else:
# verb is not given. We will continue with periodic
# harvesting. But first check if URL parameter is given:
# if it is, then warn directly now
if len(args) > 1 or \
(len(args) == 1 and not args[0].isdigit()):
usage(1, "You must specify the --verb parameter")
except getopt.error, e:
# So could it be that we are using different arguments? Try to
# start the BibSched task (automated harvesting) and see if it
# validates
pass
# BibSched mode - periodical harvesting
# Note that the 'help' is common to both manual and automated
# mode.
task_set_option("repository", None)
task_set_option("dates", None)
task_init(authorization_action='runoaiharvest',
authorization_msg="oaiharvest Task Submission",
description="""
Harvest records from OAI sources.
Manual vs automatic harvesting:
- Manual harvesting retrieves records from the specified URL,
with the specified OAI arguments. Harvested records are displayed
on the standard output or saved to a file, but are not integrated
into the repository. This mode is useful to 'play' with OAI
repositories or to build special harvesting scripts.
- Automatic harvesting relies on the settings defined in the OAI
Harvest admin interface to periodically retrieve the repositories
and sets to harvest. It also take care of harvesting only new or
modified records. Records harvested using this mode are converted
and integrated into the repository, according to the settings
defined in the OAI Harvest admin interface.
Examples:
Manual (single-shot) harvesting mode:
Save to /tmp/z.xml records from CDS added/modified between 2004-04-01
and 2004-04-02, in MARCXML:
$ oaiharvest -vListRecords -f2004-04-01 -u2004-04-02 -pmarcxml -o/tmp/z.xml http://cds.cern.ch/oai2d
Automatic (periodical) harvesting mode:
Schedule daily harvesting of all repositories defined in OAIHarvest admin:
$ oaiharvest -s 24h
Schedule daily harvesting of repository 'arxiv', defined in OAIHarvest admin:
$ oaiharvest -r arxiv -s 24h
Harvest in 10 minutes from 'pubmed' repository records added/modified
between 2005-05-05 and 2005-05-10:
$ oaiharvest -r pubmed -d 2005-05-05:2005-05-10 -t 10m
""",
help_specific_usage='Manual single-shot harvesting mode:\n'
' -o, --output specify output file\n'
' -v, --verb OAI verb to be executed\n'
' -m, --method http method (default POST)\n'
' -p, --metadataPrefix metadata format\n'
' -i, --identifier OAI identifier\n'
' -s, --set OAI set(s). Whitespace-separated list\n'
' -r, --resuptionToken Resume previous harvest\n'
' -f, --from from date (datestamp)\n'
' -u, --until until date (datestamp)\n'
' -c, --certificate path to public certificate (in case of certificate-based harvesting)\n'
' -k, --key path to private key (in case of certificate-based harvesting)\n'
' -l, --user username (in case of password-protected harvesting)\n'
' -w, --password password (in case of password-protected harvesting)\n'
'Automatic periodical harvesting mode:\n'
' -r, --repository="repo A"[,"repo B"] \t which repositories to harvest (default=all)\n'
' -d, --dates=yyyy-mm-dd:yyyy-mm-dd \t reharvest given dates only\n',
version=__revision__,
specific_params=("r:d:", ["repository=", "dates=", ]),
task_submit_elaborate_specific_parameter_fnc=
task_submit_elaborate_specific_parameter,
task_run_fnc=task_run_core)
def task_submit_elaborate_specific_parameter(key, value, opts, args):
"""Elaborate specific cli parameters for oaiharvest."""
if key in ("-r", "--repository"):
task_set_option('repository', get_repository_names(value))
elif key in ("-d", "--dates"):
task_set_option('dates', get_dates(value))
if value is not None and task_get_option("dates") is None:
raise StandardError, "Date format not valid."
else:
return False
return True
### okay, here we go:
if __name__ == '__main__':
main()
diff --git a/invenio/legacy/oairepository/admin.py b/invenio/legacy/oairepository/admin.py
index 29b11380a..9ff5e974d 100644
--- a/invenio/legacy/oairepository/admin.py
+++ b/invenio/legacy/oairepository/admin.py
@@ -1,864 +1,864 @@
## This file is part of Invenio.
## Copyright (C) 2009, 2010, 2011, 2012 CERN.
##
## Invenio is free software; you can redistribute it and/or
## modify it under the terms of the GNU General Public License as
## published by the Free Software Foundation; either version 2 of the
## License, or (at your option) any later version.
##
## Invenio is distributed in the hope that it will be useful, but
## WITHOUT ANY WARRANTY; without even the implied warranty of
## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
## General Public License for more details.
##
## You should have received a copy of the GNU General Public License
## along with Invenio; if not, write to the Free Software Foundation, Inc.,
## 59 Temple Place, Suite 330, Boston, MA 02111-1307, USA.
"""Invenio OAI Repository Administrator Interface."""
__revision__ = "$Id$"
import cgi
import os
from invenio.config import \
CFG_SITE_LANG, \
CFG_TMPDIR, \
CFG_SITE_URL
import invenio.access_control_engine as access_manager
from invenio.utils.url import create_html_link
from invenio.dbquery import run_sql
from invenio.oai_repository_updater import parse_set_definition
from invenio.base.i18n import gettext_set_language
-from invenio.errorlib import register_exception
+from invenio.ext.logging import register_exception
from invenio.oai_repository_config import CFG_OAI_REPOSITORY_GLOBAL_SET_SPEC
import invenio.template
oaiharvest_templates = invenio.template.load('oai_harvest')
webstyle_templates = invenio.template.load('webstyle')
tmppath = CFG_TMPDIR + '/oairepositoryadmin.' + str(os.getpid())
guideurl = "help/admin/oairepository-admin-guide"
oai_rep_admin_url = CFG_SITE_URL + \
"/admin/oairepository/oairepositoryadmin.py"
def getnavtrail(previous = '', ln = CFG_SITE_LANG):
"""Get navtrail"""
return oaiharvest_templates.tmpl_getnavtrail(previous = previous, ln = ln)
def perform_request_index(ln=CFG_SITE_LANG):
"""OAI Repository admin index"""
out = '''<p>Define below the sets to expose through the OAI harvesting
protocol. <br /> You will have to run the
<a href="%(siteurl)s/help/admin/oairepository-admin-guide?ln=%(ln)s#2.2"><code>oairepositoryupdater</code></a>
utility to apply the settings you have defined here.</p>''' % {'siteurl': CFG_SITE_URL,
'ln': ln}
header = ['id', 'setSpec',
'setName', 'collection',
'p1', 'f1', 'm1', 'op1',
'p2', 'f2', 'm2', 'op2',
'p3', 'f3', 'm3', '', '']
oai_set = get_oai_set()
sets = []
for (id, setSpec, setName, setCollection, \
dummy_setDescription, p1, f1, m1, p2, f2, m2, \
p3, f3, m3, op1, op2) in oai_set:
del_request = '<a href="' + CFG_SITE_URL + "/" + \
"admin/oairepository/oairepositoryadmin.py/delset?ln=" + \
ln + "&amp;oai_set_id=" + str(id) + '">delete</a>'
edit_request = '<a href="' + CFG_SITE_URL + "/" + \
"admin/oairepository/oairepositoryadmin.py/editset?ln=" + \
ln + "&amp;oai_set_id=" + str(id) + '">edit</a>'
edit_request = '<a href="' + CFG_SITE_URL + "/" + \
"admin/oairepository/oairepositoryadmin.py/touchset?ln=" + \
ln + "&amp;oai_set_id=" + str(id) + '">touch</a>'
sets.append([id, cgi.escape(setSpec), cgi.escape(setName),
cgi.escape(setCollection),
cgi.escape(p1), f1, m1, op1,
cgi.escape(p2), f2, m2, op2,
cgi.escape(p3), f3, m3,
del_request, edit_request])
add_request = '<a href="' + CFG_SITE_URL + "/" + \
"admin/oairepository/oairepositoryadmin.py/addset?ln=" + \
ln + '">Add new OAI set definition</a>'
sets.append(['', add_request, '', '', '', '', '',
'', '', '', '', '', '', '', '', '', ''])
out += transform_tuple(header=header, tuple=sets)
out += "<br /><br />"
return out
def perform_request_addset(oai_set_name='', oai_set_spec='',
oai_set_collection='',
oai_set_description='',
oai_set_p1='', oai_set_f1='',oai_set_m1='',
oai_set_p2='', oai_set_f2='',
oai_set_m2='', oai_set_p3='',
oai_set_f3='', oai_set_m3='',
oai_set_op1='a', oai_set_op2='a',
ln=CFG_SITE_LANG, func=0):
"""add a new OAI set"""
_ = gettext_set_language(ln)
out = ""
if func in ["0", 0]:
text = input_form(oai_set_name, oai_set_spec,
oai_set_collection,
oai_set_p1, oai_set_f1,oai_set_m1,
oai_set_p2, oai_set_f2,oai_set_m2,
oai_set_p3, oai_set_f3, oai_set_m3,
oai_set_op1, oai_set_op2)
out = createform(action="addset",
text=text,
ln=ln,
button="Add new OAI set definition line",
func=1)
if func in ["1", 1]:
out += "<br />"
res = add_oai_set(oai_set_name, oai_set_spec,
oai_set_collection, oai_set_description,
oai_set_p1, oai_set_f1, oai_set_m1,
oai_set_p2, oai_set_f2, oai_set_m2,
oai_set_p3, oai_set_f3, oai_set_m3,
oai_set_op1, oai_set_op2)
if res[0] == 1:
out += oaiharvest_templates.tmpl_print_info(ln,
"OAI set definition %s added." % \
cgi.escape(oai_set_name))
out += "<br />"
out += "<br /><br />"
out += create_html_link(urlbase=oai_rep_admin_url + \
"/index",
urlargd={'ln': ln},
link_label=_("Return to main selection"))
return nice_box("", out)
def perform_request_editset(oai_set_id=None, oai_set_name='',
oai_set_spec='', oai_set_collection='',
oai_set_description='',
oai_set_p1='', oai_set_f1='',
oai_set_m1='', oai_set_p2='',
oai_set_f2='', oai_set_m2='',
oai_set_p3='', oai_set_f3='',
oai_set_m3='', oai_set_op1='a',
oai_set_op2='a', ln=CFG_SITE_LANG,
func=0):
"""creates html form to edit an OAI set."""
_ = gettext_set_language(ln)
if oai_set_id is None:
return "No OAI set ID selected."
out = ""
if func in [0, "0"]:
oai_set = get_oai_set(oai_set_id)
if not oai_set:
return "ERROR: oai_set_id %s seems invalid" % oai_set_id
oai_set_spec = oai_set[0][1]
oai_set_name = oai_set[0][2]
oai_set_collection = oai_set[0][3]
oai_set_description = oai_set[0][4]
oai_set_p1 = oai_set[0][5]
oai_set_f1 = oai_set[0][6]
oai_set_m1 = oai_set[0][7]
oai_set_p2 = oai_set[0][8]
oai_set_f2 = oai_set[0][9]
oai_set_m2 = oai_set[0][10]
oai_set_p3 = oai_set[0][11]
oai_set_f3 = oai_set[0][12]
oai_set_m3 = oai_set[0][13]
oai_set_op1 = oai_set[0][14]
oai_set_op2 = oai_set[0][15]
text = input_form(oai_set_name,
oai_set_spec,
oai_set_collection,
oai_set_p1,
oai_set_f1,
oai_set_m1,
oai_set_p2,
oai_set_f2,
oai_set_m2,
oai_set_p3,
oai_set_f3,
oai_set_m3,
oai_set_op1,
oai_set_op2)
out += extended_input_form(action="editset",
text=text,
button="Modify",
oai_set_id=oai_set_id,
ln=ln,
func=1)
if func in [1, "1"]:
res = modify_oai_set(oai_set_id,
oai_set_name,
oai_set_spec,
oai_set_collection,
oai_set_description,
oai_set_p1,
oai_set_f1,
oai_set_m1,
oai_set_p2,
oai_set_f2,
oai_set_m2,
oai_set_p3,
oai_set_f3,
oai_set_m3,
oai_set_op1,
oai_set_op2)
out += "<br />"
if res[0] == 1:
out += oaiharvest_templates.tmpl_print_info(ln, "OAI set definition #%s edited." % oai_set_id)
out += "<br />"
else:
out += webstyle_templates.tmpl_write_warning("A problem was encountered: <br/>" + cgi.escape(res[1]))
out += "<br />"
out += "<br />"
out += create_html_link(urlbase=oai_rep_admin_url + \
"/index",
urlargd={'ln': ln},
link_label=_("Return to main selection"))
return nice_box("", out)
def perform_request_delset(oai_set_id=None, ln=CFG_SITE_LANG, func=0):
"""creates html form to delete an OAI set"""
_ = gettext_set_language(ln)
out = ""
if oai_set_id:
oai_set = get_oai_set(oai_set_id)
if not oai_set:
return "ERROR: oai_set_id %s seems invalid" % oai_set_id
if func in ["0", 0]:
oai_set = get_oai_set(oai_set_id)
oai_set_spec = oai_set[0][1]
oai_set_name = oai_set[0][2]
oai_set_collection = oai_set[0][3]
oai_set_p1 = oai_set[0][5]
oai_set_f1 = oai_set[0][6]
oai_set_m1 = oai_set[0][7]
oai_set_p2 = oai_set[0][8]
oai_set_f2 = oai_set[0][9]
oai_set_m2 = oai_set[0][10]
oai_set_p3 = oai_set[0][11]
oai_set_f3 = oai_set[0][12]
oai_set_m3 = oai_set[0][13]
oai_set_op1 = oai_set[0][14]
oai_set_op2 = oai_set[0][15]
if oai_set:
question = """Do you want to delete the OAI definition #%s?""" % oai_set_id
text = oaiharvest_templates.tmpl_print_info(ln, question)
text += "<br /><br /><br />"
text += pagebody_text(
cgi.escape("%s-%s-%s-%s-%s-%s-%s-%s-%s-%s-%s-%s-%s-%s" % \
(oai_set_spec,
oai_set_name,
oai_set_collection,
oai_set_p1,
oai_set_f1,
oai_set_m1,
oai_set_op1,
oai_set_p2,
oai_set_f2,
oai_set_m2,
oai_set_op2,
oai_set_p3,
oai_set_f3,
oai_set_m3)))
out += createform(action="delset",
text=text,
button="Delete",
oai_set_id=oai_set_id,
func=1)
else:
return oaiharvest_templates.tmpl_print_info(ln, "OAI set does not exist.")
elif func in ["1", 1]:
res = delete_oai_set(oai_set_id)
if res[0] == 1:
out += oaiharvest_templates.tmpl_print_info(ln, "OAI set definition #%s deleted." % oai_set_id)
out += "<br />"
else:
pass
out += "<br /><br />"
out += create_html_link(urlbase=oai_rep_admin_url + \
"/index",
urlargd={'ln': ln},
link_label=_("Return to main selection"))
return nice_box("", out)
def perform_request_touchset(oai_set_id=None, ln=CFG_SITE_LANG, func=0):
"""creates html form to touch an OAI set"""
_ = gettext_set_language(ln)
out = ""
if oai_set_id:
oai_set = get_oai_set(oai_set_id)
if not oai_set:
return "ERROR: oai_set_id %s seems invalid" % oai_set_id
oai_set = get_oai_set(oai_set_id)
oai_set_spec = oai_set[0][1]
oai_set_name = oai_set[0][2]
oai_set_collection = oai_set[0][3]
oai_set_p1 = oai_set[0][5]
oai_set_f1 = oai_set[0][6]
oai_set_m1 = oai_set[0][7]
oai_set_p2 = oai_set[0][8]
oai_set_f2 = oai_set[0][9]
oai_set_m2 = oai_set[0][10]
oai_set_p3 = oai_set[0][11]
oai_set_f3 = oai_set[0][12]
oai_set_m3 = oai_set[0][13]
oai_set_op1 = oai_set[0][14]
oai_set_op2 = oai_set[0][15]
if func in ["0", 0]:
if oai_set:
question = _("""Do you want to touch the OAI set %s? Note that this will force all clients to re-harvest the whole set.""") % cgi.escape(oai_set_spec)
text = oaiharvest_templates.tmpl_print_info(ln, question)
out += createform(action="touchset",
text=text,
button="Touch",
oai_set_id=oai_set_id,
func=1)
else:
return oaiharvest_templates.tmpl_print_info(ln, _("OAI set does not exist."))
elif func in ["1", 1]:
touch_oai_set(oai_set_spec)
out += oaiharvest_templates.tmpl_print_info(ln, _("OAI set %s touched.") % cgi.escape(oai_set_spec))
out += "<br />"
out += "<br /><br />"
out += create_html_link(urlbase=oai_rep_admin_url + \
"/index",
urlargd={'ln': ln},
link_label=_("Return to main selection"))
return nice_box("", out)
def get_oai_set(id=''):
"""Returns a row parameters for a given id"""
sets = []
sql = "SELECT id, setSpec, setName, setCollection, setDescription, p1,f1,m1, p2,f2,m2, p3,f3,m3, setDefinition FROM oaiREPOSITORY"
try:
if id:
sql += " WHERE id=%s" % id
sql += " ORDER BY setSpec asc"
res = run_sql(sql)
for row in res:
set = ['']*16
set[0] = row[0]
set[1] = row[1]
set[2] = row[2]
params = parse_set_definition(row[14])
set[3] = params.get('c', '')
set[5] = params.get('p1', '')
set[6] = params.get('f1', '')
set[7] = params.get('m1', '')
set[8] = params.get('p2', '')
set[9] = params.get('f2', '')
set[10] = params.get('m2', '')
set[11] = params.get('p3', '')
set[12] = params.get('f3', '')
set[13] = params.get('m3', '')
set[14] = params.get('op1', 'a')
set[15] = params.get('op2', 'a')
sets.append(set)
return sets
except StandardError, e:
register_exception(alert_admin=True)
return str(e)
def touch_oai_set(setSpec):
"""
Updates the last_updated timestamp of an oai_set. This will cause
any record belonging to it to be actually re-exported. This is
useful in case e.g. the format template used to generate an
export has been amended.
Note: the last_updated time is in localtime to the server.
"""
run_sql("UPDATE oaiREPOSITORY SET last_updated=NOW() WHERE setSpec=%s", (setSpec, ))
def modify_oai_set(oai_set_id, oai_set_name, oai_set_spec,
oai_set_collection, oai_set_description,
oai_set_p1, oai_set_f1,oai_set_m1, oai_set_p2,
oai_set_f2, oai_set_m2, oai_set_p3, oai_set_f3,
oai_set_m3, oai_set_op1, oai_set_op2):
"""Modifies a row's parameters"""
try:
if not oai_set_spec:
oai_set_spec = CFG_OAI_REPOSITORY_GLOBAL_SET_SPEC
set_definition = 'c=' + oai_set_collection + ';' + \
'p1=' + oai_set_p1 + ';' + \
'f1=' + oai_set_f1 + ';' + \
'm1=' + oai_set_m1 + ';' + \
'op1='+ oai_set_op1 + ';' + \
'p2=' + oai_set_p2 + ';' + \
'f2=' + oai_set_f2 + ';' + \
'm2=' + oai_set_m2 + ';' + \
'op2='+ oai_set_op2 + ';' + \
'p3=' + oai_set_p3 + ';' + \
'f3=' + oai_set_f3 + ';' + \
'm3=' + oai_set_m3 + ';'
run_sql("""UPDATE oaiREPOSITORY SET
setName=%s,
setSpec=%s,
setCollection=%s,
setDescription=%s,
setDefinition=%s,
p1=%s,
f1=%s,
m1=%s,
p2=%s,
f2=%s,
m2=%s,
p3=%s,
f3=%s,
m3=%s
WHERE id=%s""",
(oai_set_name,
oai_set_spec,
oai_set_collection,
oai_set_description,
set_definition,
oai_set_p1,
oai_set_f1,
oai_set_m1,
oai_set_p2,
oai_set_f2,
oai_set_m2,
oai_set_p3,
oai_set_f3,
oai_set_m3,
oai_set_id))
return (1, "")
except StandardError, e:
register_exception(alert_admin=True)
return (0, str(e))
def add_oai_set(oai_set_name, oai_set_spec, oai_set_collection,
oai_set_description, oai_set_p1, oai_set_f1,oai_set_m1,
oai_set_p2, oai_set_f2,oai_set_m2, oai_set_p3,
oai_set_f3, oai_set_m3, oai_set_op1, oai_set_op2):
"""Add a definition into the OAI Repository"""
try:
if not oai_set_spec:
oai_set_spec = CFG_OAI_REPOSITORY_GLOBAL_SET_SPEC
set_definition = 'c=' + oai_set_collection + ';' + \
'p1=' + oai_set_p1 + ';' + \
'f1=' + oai_set_f1 + ';' + \
'm1=' + oai_set_m1 + ';' + \
'op1='+ oai_set_op1 + ';' + \
'p2=' + oai_set_p2 + ';' + \
'f2=' + oai_set_f2 + ';' + \
'm2=' + oai_set_m2 + ';' + \
'op2='+ oai_set_op2 + ';' + \
'p3=' + oai_set_p3 + ';' + \
'f3=' + oai_set_f3 + ';' + \
'm3=' + oai_set_m3 + ';'
run_sql("""INSERT INTO oaiREPOSITORY (id, setName, setSpec,
setCollection, setDescription, setDefinition,
setRecList, p1, f1, m1, p2, f2, m2, p3, f3, m3)
VALUES (0, %s, %s, %s, %s, %s, NULL, %s, %s, %s,
%s, %s, %s, %s, %s, %s)""",
(oai_set_name, oai_set_spec, oai_set_collection,
oai_set_description, set_definition, oai_set_p1,
oai_set_f1, oai_set_m1, oai_set_p2, oai_set_f2,
oai_set_m2, oai_set_p3, oai_set_f3, oai_set_m3))
return (1, "")
except StandardError, e:
register_exception(alert_admin=True)
return (0, e)
def delete_oai_set(oai_set_id):
""""""
try:
run_sql("DELETE FROM oaiREPOSITORY WHERE id=%s", (oai_set_id,))
return (1, "")
except StandardError, e:
register_exception(alert_admin=True)
return (0, e)
def drop_down_menu(boxname, content):
"""
Returns the code of a drop down menu.
Parameters:
boxname - *str* name of the input form
content - *list(tuple3)* the content of the list. List of items
as tuple3 with:
- *str* value of the item
- *bool* if item is selected of not
- *str* label of the item (displayed value)
"""
text = "<select name=\"%s\">" % boxname
for (value, selectedflag, txt) in content:
text += "<option value=\""
text += "%s\"" % value
if selectedflag:
text += ' selected="selected"'
text += ">%s</option>" % txt
text += "</select>"
return text
def create_drop_down_menu_content(sql):
"""
Create the content to be used in the drop_down_menu(..) function
from an SQL statement
"""
content = []
res = run_sql(sql)
for item in res:
tmp_list = []
tmp_list.append(item)
tmp_list.append("")
tmp_list.append(item)
content.append(tmp_list)
return content
def createform(action="", text="", button="func", cnfrm='', **hidden):
""""""
out = '<form action="%s" method="post">\n' % (action, )
out += text
if cnfrm:
out += ' <input type="checkbox" name="func" value="1"/>'
for key in hidden.keys():
if type(hidden[key]) is list:
for value in hidden[key]:
out += ' <input type="hidden" name="%s" value="%s"/>\n' % (key, value)
else:
out += ' <input type="hidden" name="%s" value="%s"/>\n' % (key, hidden[key])
out += ' <input class="adminbutton" type="submit" value="%s"/>\n' % (button, )
out += '</form>\n'
return out
def input_text(title, name, value):
""""""
if name is None:
name = ""
if value is None:
value = ""
text = """<table><tr><td width="100%%"><span class="adminlabel">%s</span></td>""" % title
text += """<td align="left">
<input class="admin_w200" type="text" name="%s" value="%s" />
</td></tr></table>""" % \
(cgi.escape(name, 1), cgi.escape(value, 1))
return text
def pagebody_text(title):
""""""
text = """<span class="admintd">%s</span>""" % title
return text
def bar_text(title):
""""""
text = """<span class="adminlabel">%s</span>""" % title
return text
def input_form(oai_set_name, oai_set_spec, oai_set_collection,
oai_set_p1, oai_set_f1,oai_set_m1,
oai_set_p2, oai_set_f2,oai_set_m2, oai_set_p3,
oai_set_f3, oai_set_m3, oai_set_op1, oai_set_op2):
"""returns the standard settings form"""
modes = {
'r' : 'Regular Expression',
'a' : 'All of the words',
'y' : 'Any of the words',
'e' : 'Exact phrase',
'p' : 'Partial phrase'
}
mode_dropdown = [['r', '', modes['r']],
['e', '', modes['e']],
['p', '', modes['p']],
['a', '', modes['a']],
['y', '', modes['y']],
['', '', '']]
operators = {
'a' : 'AND',
'o' : 'OR',
'n' : 'AND NOT',
}
mode_operators_1 = [['a', '', operators['a']],
['o', '', operators['o']],
['n', '', operators['n']],
['a', '', '']]
mode_operators_2 = [['a', '', operators['a']],
['o', '', operators['o']],
['n', '', operators['n']],
['a', '', '']]
text = "<br />"
text += "<table><tr><td>"
text += input_text(title = "OAI Set spec:",
name = "oai_set_spec", value = oai_set_spec)
text += '</td><td colspan="3"><small><small><em>Optional: if you leave it blank it will be automatically set to "%s", with the implicit convention that any record belonging to it can be harvested by not specifying any set.</em> [<a href="http://www.openarchives.org/OAI/openarchivesprotocol.html#set" target="_blank">?</a>]</small></small>' % CFG_OAI_REPOSITORY_GLOBAL_SET_SPEC
text += "</td></tr><tr><td>"
text += input_text(title = "OAI Set name:",
name = "oai_set_name", value = oai_set_name)
text += '</td><td colspan="3"><small><small><em>Optional: leave blank if not needed</em> [<a href="http://www.openarchives.org/OAI/openarchivesprotocol.html#Set" target="_blank">?</a>]</small></small>'
text += "</td></tr><tr><td>&nbsp;</td></tr><tr><td>"
text += '</td></tr><tr><td colspan="4">Choose below the search query that defines the records that belong to this set:</td></tr><tr><td>'
text += "</td></tr><tr><td>&nbsp;</td></tr><tr><td>"
# text += input_text(title = "OAI Set description", name = "oai_set_description", value = oai_set_description)
#text += "</td><td colspan=2>"
#menu = create_drop_down_menu_content("SELECT distinct(name) from collection")
#menu.append(['','',''])
#if (oai_set_collection):
# menu.append([oai_set_collection,'selected',oai_set_collection])
#else:
# menu.append(['','selected','Collection'])
text += input_text(title = "Collection(s):",
name="oai_set_collection",
value=oai_set_collection)
#text += drop_down_menu("oai_set_collection", menu)
text += '</td><td colspan="3"><small><small>Eg:</small> <code>Published Articles, Preprints, Theses</code><br/><small><em>(collections <b>identifiers</b>, not collections names/translations).</em></small></small></td></tr><tr><td>'
text += input_text(title = "Phrase:", name =
"oai_set_p1", value = oai_set_p1)
text += "</td><td>"
fields = create_drop_down_menu_content("SELECT distinct(code) from field")
fields.append(['', '', ''])
if (oai_set_f1):
fields.append([oai_set_f1, 'selected', oai_set_f1])
else:
fields.append(['', 'selected', 'Field'])
if (oai_set_m1):
mode_dropdown_m1 = [[oai_set_m1, 'selected', modes[oai_set_m1]]]
else:
mode_dropdown_m1 = [['', 'selected', 'Mode']]
text += drop_down_menu("oai_set_f1", fields)
text += "</td><td>"
text += drop_down_menu("oai_set_m1", mode_dropdown + mode_dropdown_m1)
text += "</td><td>"
if (oai_set_op1):
mode_operators_1.append([oai_set_op1, 'selected', operators[oai_set_op1]])
else:
mode_operators_1.append(['', 'selected', 'Operators'])
text += drop_down_menu("oai_set_op1", mode_operators_1)
text += "</td></tr><tr><td>"
text += input_text(title = "Phrase:", name = "oai_set_p2", value = oai_set_p2)
text += "</td><td>"
fields = create_drop_down_menu_content("SELECT distinct(code) from field")
fields.append(['', '', ''])
if (oai_set_f2):
fields.append([oai_set_f2, 'selected', oai_set_f2])
else:
fields.append(['', 'selected', 'Field'])
if (oai_set_m2):
mode_dropdown_m2 = [[oai_set_m2, 'selected', modes[oai_set_m2]]]
else:
mode_dropdown_m2 = [['', 'selected', 'Mode']]
text += drop_down_menu("oai_set_f2", fields)
text += "</td><td>"
text += drop_down_menu("oai_set_m2", mode_dropdown + mode_dropdown_m2)
text += "</td><td>"
if (oai_set_op2):
mode_operators_2.append([oai_set_op2, 'selected', operators[oai_set_op2]])
else:
mode_operators_2.append(['', 'selected', 'Operators'])
text += drop_down_menu("oai_set_op2", mode_operators_2)
text += "</td></tr><tr><td>"
text += input_text(title = "Phrase:", name = "oai_set_p3", value = oai_set_p3)
text += "</td><td>"
fields = create_drop_down_menu_content("SELECT distinct(code) from field")
fields.append(['', '', ''])
if (oai_set_f3):
fields.append([oai_set_f3, 'selected', oai_set_f3])
else:
fields.append(['', 'selected', 'Field'])
if (oai_set_m3):
mode_dropdown_m3 = [[oai_set_m3, 'selected', modes[oai_set_m3]]]
else:
mode_dropdown_m3 = [['', 'selected', 'Mode']]
text += drop_down_menu("oai_set_f3", fields)
text += "</td><td>"
text += drop_down_menu("oai_set_m3", mode_dropdown + mode_dropdown_m3)
text += "</td></tr></table>"
return text
def check_user(req, role, authorized=0):
""""""
(auth_code, auth_message) = access_manager.acc_authorize_action(req, role)
if not authorized and auth_code != 0:
return ("false", auth_message)
return ("", auth_message)
def transform_tuple(header, tuple, start='', end=''):
""""""
align = []
try:
firstrow = tuple[0]
if type(firstrow) in [int, long]:
align = ['admintdright']
elif type(firstrow) in [str, dict]:
align = ['admintdleft']
else:
for item in firstrow:
if type(item) is int:
align.append('admintdright')
else:
align.append('admintdleft')
except IndexError:
firstrow = []
tblstr = ''
for h in header:
tblstr += ' <th class="adminheader">%s</th>\n' % (h, )
if tblstr: tblstr = ' <tr>\n%s\n </tr>\n' % (tblstr, )
tblstr = start + '<table class="admin_wvar_nomargin">\n' + tblstr
try:
extra = '<tr>'
if type(firstrow) not in [int, long, str, dict]:
for i in range(len(firstrow)): extra += '<td class="%s">%s</td>\n' % (align[i], firstrow[i])
else:
extra += ' <td class="%s">%s</td>\n' % (align[0], firstrow)
#extra += '<td rowspan="%s" style="vertical-align: top">\n%s\n</td>\n</tr>\n' % (len(tuple), extracolumn)
extra += '</tr>\n'
except IndexError:
extra = ''
tblstr += extra
j = 1
for row in tuple[1:]:
style = ''
if j % 2:
style = ' style="background-color: rgb(235, 247, 255);"'
j += 1
tblstr += ' <tr%s>\n' % style
if type(row) not in [int, long, str, dict]:
for i in range(len(row)): tblstr += '<td class="%s" style="padding:5px 10px;">%s</td>\n' % (align[i], row[i])
else:
tblstr += ' <td class="%s" style="padding:5px 10px;">%s</td>\n' % (align[0], row)
tblstr += ' </tr> \n'
tblstr += '</table> \n '
tblstr += end
return tblstr
def nice_box(header='', content='', cls="admin_wvar"):
"""
Embed the content into a box with given header
Parameters:
header - *str* header of the box
datalist - *str* the content of the box
cls - *str* the class of the box
"""
out = '''
<table class="%s" width="95%%">
<thead>
<tr>
<th class="adminheaderleft" colspan="1">%s</th>
</tr>
</thead>
<tbody>
<tr>
<td style="vertical-align: top; margin-top: 5px; width: 100%%;">
%s
</td>
</tr>
</tbody>
</table>
''' % (cls, header, content)
return out
def extended_input_form(action="", text="", button="func", cnfrm='',
**hidden):
""""""
out = '<form action="%s" method="post">\n' % (action, )
out += '<table>\n<tr><td style="vertical-align: top">'
out += text
if cnfrm:
out += ' <input type="checkbox" name="func" value="1"/>'
for key in hidden.keys():
if type(hidden[key]) is list:
for value in hidden[key]:
out += ' <input type="hidden" name="%s" value="%s"/>\n' % (key, value)
else:
out += ' <input type="hidden" name="%s" value="%s"/>\n' % (key, hidden[key])
out += '</td><td style="vertical-align: bottom">'
out += ' <input class="adminbutton" type="submit" value="%s"/>\n' % (button, )
out += '</td></tr></table>'
out += '</form>\n'
return out
diff --git a/invenio/legacy/oairepository/server.py b/invenio/legacy/oairepository/server.py
index f6a2509f4..8f0c7bb1a 100644
--- a/invenio/legacy/oairepository/server.py
+++ b/invenio/legacy/oairepository/server.py
@@ -1,901 +1,901 @@
## This file is part of Invenio.
## Copyright (C) 2009, 2010, 2011 CERN.
##
## Invenio is free software; you can redistribute it and/or
## modify it under the terms of the GNU General Public License as
## published by the Free Software Foundation; either version 2 of the
## License, or (at your option) any later version.
##
## Invenio is distributed in the hope that it will be useful, but
## WITHOUT ANY WARRANTY; without even the implied warranty of
## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
## General Public License for more details.
##
## You should have received a copy of the GNU General Public License
## along with Invenio; if not, write to the Free Software Foundation, Inc.,
## 59 Temple Place, Suite 330, Boston, MA 02111-1307, USA.
"""Receive OAI-PMH 2.0 requests and responds"""
__revision__ = "$Id$"
import cPickle
import os
import re
import time
import datetime
import tempfile
import sys
if sys.hexversion < 0x2050000:
from glob import glob as iglob
else:
from glob import iglob
from invenio.config import \
CFG_OAI_DELETED_POLICY, \
CFG_OAI_EXPIRE, \
CFG_OAI_IDENTIFY_DESCRIPTION, \
CFG_OAI_ID_FIELD, \
CFG_OAI_LOAD, \
CFG_OAI_SET_FIELD, \
CFG_OAI_PREVIOUS_SET_FIELD, \
CFG_OAI_METADATA_FORMATS, \
CFG_CACHEDIR, \
CFG_SITE_NAME, \
CFG_SITE_SUPPORT_EMAIL, \
CFG_SITE_URL, \
CFG_WEBSTYLE_HTTP_USE_COMPRESSION, \
CFG_CERN_SITE, \
CFG_OAI_SAMPLE_IDENTIFIER, \
CFG_OAI_ID_PREFIX, \
CFG_OAI_FRIENDS, \
CFG_BIBUPLOAD_EXTERNAL_OAIID_TAG, \
CFG_OAI_PROVENANCE_BASEURL_SUBFIELD, \
CFG_OAI_PROVENANCE_DATESTAMP_SUBFIELD, \
CFG_OAI_PROVENANCE_METADATANAMESPACE_SUBFIELD, \
CFG_OAI_PROVENANCE_ORIGINDESCRIPTION_SUBFIELD, \
CFG_OAI_PROVENANCE_HARVESTDATE_SUBFIELD, \
CFG_OAI_PROVENANCE_ALTERED_SUBFIELD
from invenio.intbitset import intbitset
from invenio.htmlutils import X, EscapedXMLString
from invenio.dbquery import run_sql, wash_table_column_name
from invenio.search_engine import record_exists, get_all_restricted_recids, get_all_field_values, search_unit_in_bibxxx, get_record
from invenio.modules.formatter import format_record
from invenio.legacy.bibrecord import record_get_field_instances
-from invenio.errorlib import register_exception
+from invenio.ext.logging import register_exception
from invenio.oai_repository_config import CFG_OAI_REPOSITORY_GLOBAL_SET_SPEC
CFG_VERBS = {
'GetRecord' : ['identifier', 'metadataPrefix'],
'Identify' : [],
'ListIdentifiers' : ['from', 'until',
'metadataPrefix',
'set',
'resumptionToken'],
'ListMetadataFormats': ['identifier'],
'ListRecords' : ['from', 'until',
'metadataPrefix',
'set',
'resumptionToken'],
'ListSets' : ['resumptionToken']
}
CFG_ERRORS = {
"badArgument": "The request includes illegal arguments, is missing required arguments, includes a repeated argument, or values for arguments have an illegal syntax:",
"badResumptionToken": "The value of the resumptionToken argument is invalid or expired:",
"badVerb": "Value of the verb argument is not a legal OAI-PMH verb, the verb argument is missing, or the verb argument is repeated:",
"cannotDisseminateFormat": "The metadata format identified by the value given for the metadataPrefix argument is not supported by the item or by the repository:",
"idDoesNotExist": "The value of the identifier argument is unknown or illegal in this repository:",
"noRecordsMatch": "The combination of the values of the from, until, set and metadataPrefix arguments results in an empty list:",
"noMetadataFormats": "There are no metadata formats available for the specified item:",
"noSetHierarchy": "The repository does not support sets:"
}
def oai_error(argd, errors):
"""
Return a well-formatted OAI-PMH error
"""
out = """<?xml version="1.0" encoding="UTF-8"?>
<OAI-PMH xmlns="http://www.openarchives.org/OAI/2.0/"
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://www.openarchives.org/OAI/2.0/
http://www.openarchives.org/OAI/2.0/OAI-PMH.xsd">"""
out += X.responseDate()(get_utc_now())
for error_code, error_msg in errors:
assert(error_code in CFG_ERRORS)
if error_code in ("badArgument", "badVerb"):
out += X.request()(oai_get_request_url())
break
else:
## There are no badArgument or badVerb errors so we can
## return the whole request information
out += X.request(**argd)(oai_get_request_url())
for error_code, error_msg in errors:
if error_msg is None:
error_msg = CFG_ERRORS[error_code]
else:
error_msg = "%s %s" % (CFG_ERRORS[error_code], error_msg)
out += X.error(code=error_code)(error_msg)
out += "</OAI-PMH>"
return out
def oai_header(argd, verb):
"""
Return OAI header
"""
out = "<?xml version=\"1.0\" encoding=\"UTF-8\"?>" + "\n"
out += "<?xml-stylesheet type=\"text/xsl\" href=\"%s/css/oai2.xsl.v1.0\" ?>\n" % CFG_SITE_URL
out += "<OAI-PMH xmlns=\"http://www.openarchives.org/OAI/2.0/\" xmlns:xsi=\"http://www.w3.org/2001/XMLSchema-instance\" xsi:schemaLocation=\"http://www.openarchives.org/OAI/2.0/ http://www.openarchives.org/OAI/2.0/OAI-PMH.xsd\">\n"
#out += "<responseDate>%s</responseDate>" % get_utc_now()
out += X.responseDate()(get_utc_now())
if verb:
out += X.request(**argd)(oai_get_request_url())
out += "<%s>\n" % verb
else:
out += X.request()(oai_get_request_url())
return out
def oai_footer(verb):
"""
@return: the OAI footer.
"""
out = ""
if verb:
out += "</%s>\n" % (verb)
out += "</OAI-PMH>\n"
return out
def get_field(recid, field):
"""
Gets list of field 'field' for the record with 'recid' system number.
"""
digit = field[0:2]
bibbx = "bib%sx" % digit
bibx = "bibrec_bib%sx" % digit
query = "SELECT bx.value FROM %s AS bx, %s AS bibx WHERE bibx.id_bibrec=%%s AND bx.id=bibx.id_bibxxx AND bx.tag=%%s" % (wash_table_column_name(bibbx), wash_table_column_name(bibx))
return [row[0] for row in run_sql(query, (recid, field))]
def utc_to_localtime(date):
"""
Convert UTC to localtime
Reference:
- (1) http://www.openarchives.org/OAI/openarchivesprotocol.html#Dates
- (2) http://www.w3.org/TR/NOTE-datetime
This function works only with dates complying with the
"Complete date plus hours, minutes and seconds" profile of
ISO 8601 defined by (2), and linked from (1).
Eg: 1994-11-05T13:15:30Z
"""
ldate = date.split("T")[0]
ltime = date.split("T")[1]
lhour = ltime.split(":")[0]
lminute = ltime.split(":")[1]
lsec = ltime.split(":")[2]
lsec = lsec[:-1] # Remove trailing "Z"
lyear = ldate.split("-")[0]
lmonth = ldate.split("-")[1]
lday = ldate.split("-")[2]
# 1: Build a time as UTC. Since time.mktime() expect a local time :
## 1a: build it without knownledge of dst
## 1b: substract timezone to get a local time, with possibly wrong dst
utc_time = time.mktime((int(lyear), int(lmonth), int(lday), int(lhour), int(lminute), int(lsec), 0, 0, -1))
local_time = utc_time - time.timezone
# 2: Fix dst for local_time
# Find out the offset for daily saving time of the local
# timezone at the time of the given 'date'
if time.localtime(local_time)[-1] == 1:
local_time = local_time + 3600
return time.strftime("%Y-%m-%d %H:%M:%S", time.localtime(local_time))
def localtime_to_utc(date):
"""Convert localtime to UTC"""
ldate = date.split(" ")[0]
ltime = date.split(" ")[1]
lhour = ltime.split(":")[0]
lminute = ltime.split(":")[1]
lsec = ltime.split(":")[2]
lyear = ldate.split("-")[0]
lmonth = ldate.split("-")[1]
lday = ldate.split("-")[2]
# Find out the offset for daily saving time of the local
# timezone at the time of the given 'date'
#
# 1: build time that correspond to local date, without knowledge of dst
# 2: determine if dst is locally enabled at this time
tmp_date = time.mktime((int(lyear), int(lmonth), int(lday), int(lhour), int(lminute), int(lsec), 0, 0, -1))
if time.localtime(tmp_date)[-1] == 1:
dst = time.localtime(tmp_date)[-1]
else:
dst = 0
# 3: Build a new time with knowledge of the dst
local_time = time.mktime((int(lyear), int(lmonth), int(lday), int(lhour), int(lminute), int(lsec), 0, 0, dst))
# 4: Get the time as UTC
utc_time = time.gmtime(local_time)
return time.strftime("%Y-%m-%dT%H:%M:%SZ", utc_time)
def get_modification_date(recid):
"""Returns the date of last modification for the record 'recid'.
Return empty string if no record or modification date in UTC.
"""
out = ""
res = run_sql("SELECT DATE_FORMAT(modification_date,'%%Y-%%m-%%d %%H:%%i:%%s') FROM bibrec WHERE id=%s", (recid,), 1)
if res and res[0][0]:
out = localtime_to_utc(res[0][0])
return out
def get_earliest_datestamp():
"""Get earliest datestamp in the database
Return empty string if no records or earliest datestamp in UTC.
"""
out = ""
res = run_sql("SELECT DATE_FORMAT(MIN(creation_date),'%Y-%m-%d %H:%i:%s') FROM bibrec", n=1)
if res:
out = localtime_to_utc(res[0][0])
return out
def get_latest_datestamp():
"""Get latest datestamp in the database
Return empty string if no records or latest datestamp in UTC.
"""
out = ""
res = run_sql("SELECT DATE_FORMAT(MAX(modification_date),'%Y-%m-%d %H:%i:%s') FROM bibrec", n=1)
if res:
out = localtime_to_utc(res[0][0])
return out
def check_date(date):
"""Check if given date has a correct format, complying to "Complete date" or
"Complete date plus hours, minutes and seconds" formats defined in ISO8601."""
if(re.match("\d\d\d\d-\d\d-\d\d(T\d\d:\d\d:\d\dZ)?\Z", date) is not None):
return date
else:
return ""
def normalize_date(date, dtime="T00:00:00Z"):
"""
Normalize the given date to the
"Complete date plus hours, minutes and seconds" format defined in ISO8601
(If "hours, minutes and seconds" part is missing, append 'dtime' to date).
'date' must be checked before with check_date(..).
Returns empty string if cannot be normalized
"""
if len(date) == 10:
date = date + dtime
elif len(date) != 20:
date = ""
return date
def get_record_provenance(recid):
"""
Return the provenance XML representation of a record, suitable to be put
in the about tag.
"""
record = get_record(recid)
provenances = record_get_field_instances(record, CFG_BIBUPLOAD_EXTERNAL_OAIID_TAG[:3], CFG_BIBUPLOAD_EXTERNAL_OAIID_TAG[3], CFG_BIBUPLOAD_EXTERNAL_OAIID_TAG[4])
out = ""
for provenance in provenances:
base_url = identifier = datestamp = metadata_namespace = origin_description = harvest_date = altered = ""
for (code, value) in provenance[0]:
if code == CFG_OAI_PROVENANCE_BASEURL_SUBFIELD:
base_url = value
elif code == CFG_BIBUPLOAD_EXTERNAL_OAIID_TAG[5]:
identifier = value
elif code == CFG_OAI_PROVENANCE_DATESTAMP_SUBFIELD:
datestamp = value
elif code == CFG_OAI_PROVENANCE_METADATANAMESPACE_SUBFIELD:
metadata_namespace = value
elif code == CFG_OAI_PROVENANCE_ORIGINDESCRIPTION_SUBFIELD:
origin_description = value
elif code == CFG_OAI_PROVENANCE_HARVESTDATE_SUBFIELD:
harvest_date = value
elif code == CFG_OAI_PROVENANCE_ALTERED_SUBFIELD:
altered = value
if base_url:
out += """<provenance xmlns="http://www.openarchives.org/OAI/2.0/provenance" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://www.openarchives.org/OAI/2.0/provenance http://www.openarchives.org/OAI/2.0/provenance.xsd">"""
out += X.originDescription(harvestDate=harvest_date, altered=altered)(
X.baseURL()(base_url),
X.identifier()(identifier),
X.datestamp()(datestamp),
X.metadataNamespace()(metadata_namespace),
origin_description and X.originDescription(origin_description) or '' ## This is already XML
)
out += """</provenance>"""
return out
def get_record_rights(dummy):
"""
Return the record rights parts, suitable to be put in the about tag.
"""
return ""
## FIXME: This need to be thought in a good way. What shall we really
## put in the rights parts?
#record = get_record(recid)
#rights = record_get_field_instances(record, CFG_OAI_RIGHTS_FIELD[:3], CFG_OAI_RIGHTS_FIELD[3], CFG_OAI_RIGHTS_FIELD[4])
#license = record_get_field_instances(record, CFG_OAI_LICENSE_FIELD[:3], CFG_OAI_LICENSE_FIELD[3], CFG_OAI_LICENSE_FIELD[4])
#holder = date = rights_uri = contact = statement = terms = publisher = license_uri = ''
#if rights:
#for code, value in rights[0][0]:
#if code == CFG_OAI_RIGHTS_HOLDER_SUBFIELD:
#holder = value
#elif code == CFG_OAI_RIGHTS_DATE_SUBFIELD:
#date = value
#elif code == CFG_OAI_RIGHTS_URI_SUBFIELD:
#rights_uri = value
#elif code == CFG_OAI_RIGHTS_CONTACT_SUBFIELD:
#contact = value
#elif CFG_OAI_RIGHTS_STATEMENT_SUBFIELD:
#statement = value
#if license:
#for code, value in license[0][0]:
#if code == CFG_OAI_LICENSE_TERMS_SUBFIELD:
#terms = value
#elif code == CFG_OAI_LICENSE_PUBLISHER_SUBFIELD:
#publisher = value
#elif code == CFG_OAI_LICENSE_URI_SUBFIELD:
#license_uri = value
def print_record(recid, prefix='marcxml', verb='ListRecords', set_spec=None, set_last_updated=None):
"""Prints record 'recid' formatted according to 'prefix'.
- if record does not exist, return nothing.
- if record has been deleted and CFG_OAI_DELETED_POLICY is
'transient' or 'deleted', then return only header, with status
'deleted'.
- if record has been deleted and CFG_OAI_DELETED_POLICY is 'no',
then return nothing.
"""
record_exists_result = record_exists(recid) == 1
if record_exists_result:
sets = get_field(recid, CFG_OAI_SET_FIELD)
if set_spec is not None and not set_spec in sets and not [set_ for set_ in sets if set_.startswith("%s:" % set_spec)]:
## the record is not in the requested set, and is not
## in any subset
record_exists_result = False
if record_exists_result:
status = None
else:
status = 'deleted'
if not record_exists_result and CFG_OAI_DELETED_POLICY not in ('persistent', 'transient'):
return ""
idents = get_field(recid, CFG_OAI_ID_FIELD)
if not idents:
return ""
## FIXME: Move these checks in a bibtask
#try:
#assert idents, "No OAI ID for record %s, please do your checks!" % recid
#except AssertionError, err:
#register_exception(alert_admin=True)
#return ""
#try:
#assert len(idents) == 1, "More than OAI ID found for recid %s. Considering only the first one, but please do your checks: %s" % (recid, idents)
#except AssertionError, err:
#register_exception(alert_admin=True)
ident = idents[0]
header_body = EscapedXMLString('')
header_body += X.identifier()(ident)
if set_last_updated:
header_body += X.datestamp()(max(get_modification_date(recid), set_last_updated))
else:
header_body += X.datestamp()(get_modification_date(recid))
for set_spec in get_field(recid, CFG_OAI_SET_FIELD):
if set_spec and set_spec != CFG_OAI_REPOSITORY_GLOBAL_SET_SPEC:
# Print only if field not empty
header_body += X.setSpec()(set_spec)
header = X.header(status=status)(header_body)
if verb == 'ListIdentifiers':
return header
else:
if record_exists_result:
metadata_body = format_record(recid, CFG_OAI_METADATA_FORMATS[prefix][0])
metadata = X.metadata(body=metadata_body)
provenance_body = get_record_provenance(recid)
if provenance_body:
provenance = X.about(body=provenance_body)
else:
provenance = ''
rights_body = get_record_rights(recid)
if rights_body:
rights = X.about(body=rights_body)
else:
rights = ''
else:
metadata = ''
provenance = ''
rights = ''
return X.record()(header, metadata, provenance, rights)
def oai_list_metadata_formats(argd):
"""Generates response to oai_list_metadata_formats verb."""
if argd.get('identifier'):
recid = oai_get_recid(argd['identifier'])
_record_exists = record_exists(recid)
if _record_exists != 1 and (_record_exists != -1 or CFG_OAI_DELETED_POLICY == "no"):
return oai_error(argd, [("idDoesNotExist", "invalid record Identifier: %s" % argd['identifier'])])
out = ""
for prefix, (dummy, schema, namespace) in CFG_OAI_METADATA_FORMATS.items():
out += X.metadataFormat()(
X.metadataPrefix(prefix),
X.schema(schema),
X.metadataNamespace(namespace)
)
return oai_header(argd, "ListMetadataFormats") + out + oai_footer("ListMetadataFormats")
def oai_list_records_or_identifiers(req, argd):
"""Generates response to oai_list_records verb."""
verb = argd['verb']
resumption_token_was_specified = False
# check if the resumption_token did not expire
if argd.get('resumptionToken'):
resumption_token_was_specified = True
try:
cache = oai_cache_load(argd['resumptionToken'])
last_recid = cache['last_recid']
argd = cache['argd']
complete_list = cache['complete_list']
complete_list = filter_out_based_on_date_range(complete_list, argd.get('from', ''), argd.get('until', ''))
except Exception:
register_exception(alert_admin=True)
req.write(oai_error(argd, [("badResumptionToken", "ResumptionToken expired or invalid: %s" % argd['resumptionToken'])]))
return
else:
last_recid = 0
complete_list = oai_get_recid_list(argd.get('set', ""), argd.get('from', ""), argd.get('until', ""))
if not complete_list: # noRecordsMatch error
req.write(oai_error(argd, [("noRecordsMatch", "no records correspond to the request")]))
return
cursor = 0
for cursor, recid in enumerate(complete_list):
## Let's fast-forward the cursor to point after the last recid that was
## disseminated successfully
if recid > last_recid:
break
set_last_updated = get_set_last_update(argd.get('set', ""))
req.write(oai_header(argd, verb))
for recid in list(complete_list)[cursor:cursor+CFG_OAI_LOAD]:
req.write(print_record(recid, argd['metadataPrefix'], verb=verb, set_spec=argd.get('set'), set_last_updated=set_last_updated))
if list(complete_list)[cursor+CFG_OAI_LOAD:]:
resumption_token = oai_generate_resumption_token(argd.get('set', ''))
cache = {
'argd': argd,
'last_recid': recid,
'complete_list': complete_list.fastdump(),
}
oai_cache_dump(resumption_token, cache)
expdate = oai_get_response_date(CFG_OAI_EXPIRE)
req.write(X.resumptionToken(expirationDate=expdate, cursor=cursor, completeListSize=len(complete_list))(resumption_token))
elif resumption_token_was_specified:
## Since a resumptionToken was used we shall put a last empty resumptionToken
req.write(X.resumptionToken(cursor=cursor, completeListSize=len(complete_list))(""))
req.write(oai_footer(verb))
oai_cache_gc()
def oai_list_sets(argd):
"""
Lists available sets for OAI metadata harvesting.
"""
out = ""
# note: no flow control in ListSets
sets = get_all_sets().values()
if not sets:
return oai_error(argd, [("noSetHierarchy", "No sets have been configured for this repository")])
for set_ in sets:
out += " <set>\n"
out += X.setSpec()(set_[0]) + X.setName()(set_[1])
if set_[2]:
out += X.setDescription()(set_[2])
out = out + " </set>\n"
return oai_header(argd, "ListSets") + out + oai_footer("ListSets")
def oai_get_record(argd):
"""Returns record 'identifier' according to 'metadataPrefix' format for OAI metadata harvesting.
- if record does not exist, return oai_error 'idDoesNotExist'.
- if record has been deleted and CFG_OAI_DELETED_POLICY is
'transient' or 'deleted', then return only header, with status
'deleted'.
- if record has been deleted and CFG_OAI_DELETED_POLICY is 'no',
then return oai_error 'idDoesNotExist'.
"""
recid = oai_get_recid(argd['identifier'])
_record_exists = record_exists(recid)
if _record_exists == 1 or \
(_record_exists == -1 and CFG_OAI_DELETED_POLICY != 'no'):
out = print_record(recid, argd['metadataPrefix'], _record_exists)
out = oai_header(argd, "GetRecord") + out + oai_footer("GetRecord")
else:
return oai_error(argd, [("idDoesNotExist", "invalid record Identifier: %s" % argd['identifier'])])
return out
def oai_identify(argd):
"""Generates a response to oai_identify verb.
script_url - *str* URL of the script used to access the
service. This is made necessary since the gateway
can be accessed either via /oai2d or /oai2d/ (or for
backward compatibility: oai2d.py or oai2d.py/), and
that the base URL must be returned in the Identify
response
"""
out = X.repositoryName()(CFG_SITE_NAME)
out += X.baseURL()(CFG_SITE_URL + '/oai2d')
out += X.protocolVersion()("2.0")
out += X.adminEmail()(CFG_SITE_SUPPORT_EMAIL)
out += X.earliestDatestamp()(get_earliest_datestamp())
out += X.deletedRecord()(CFG_OAI_DELETED_POLICY)
out += X.granularity()("YYYY-MM-DDThh:mm:ssZ")
if CFG_WEBSTYLE_HTTP_USE_COMPRESSION:
out += X.compression()('deflate')
out += X.description("""<oai-identifier xmlns="http://www.openarchives.org/OAI/2.0/oai-identifier"
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://www.openarchives.org/OAI/2.0/oai-identifier
http://www.openarchives.org/OAI/2.0/oai-identifier.xsd">""" +
X.scheme()("oai") +
X.repositoryIdentifier()(CFG_OAI_ID_PREFIX) +
X.delimiter()(":") +
X.sampleIdentifier()(CFG_OAI_SAMPLE_IDENTIFIER) +
"""</oai-identifier>""")
out += CFG_OAI_IDENTIFY_DESCRIPTION % {'CFG_SITE_URL': EscapedXMLString(CFG_SITE_URL)}
if CFG_OAI_FRIENDS:
friends = """<friends xmlns="http://www.openarchives.org/OAI/2.0/friends/"
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://www.openarchives.org/OAI/2.0/friends/
http://www.openarchives.org/OAI/2.0/friends.xsd">"""
for baseurl in CFG_OAI_FRIENDS:
friends += X.baseURL()(baseurl)
friends += """</friends>"""
out += X.description(friends)
out = oai_header(argd, "Identify") + out + oai_footer("Identify")
return out
def get_utc_now():
"""
Return current UTC time in the OAI-PMH format.
"""
return datetime.datetime.utcnow().strftime('%Y-%m-%dT%H:%M:%SZ')
def oai_build_request_element(argd=None):
"""
Build the request tag.
"""
if argd is None:
argd = {}
return X.responseDate()(get_utc_now()) + X.request(**argd)("%s/oai2d" % CFG_SITE_URL)
def oai_get_request_url():
"""Generates requesturl tag for OAI."""
requesturl = CFG_SITE_URL + "/oai2d"
return requesturl
def oai_get_response_date(delay=0):
"""Generates responseDate tag for OAI."""
return time.strftime("%Y-%m-%dT%H:%M:%SZ", time.gmtime(time.time() + delay))
def oai_get_recid(identifier):
"""Returns the first database BIB ID for the OAI identifier 'identifier', if it exists."""
recid = None
if identifier:
query = "SELECT DISTINCT(bb.id_bibrec) FROM bib%sx AS bx, bibrec_bib%sx AS bb WHERE bx.tag=%%s AND bb.id_bibxxx=bx.id AND bx.value=%%s" % (CFG_OAI_ID_FIELD[0:2], CFG_OAI_ID_FIELD[0:2])
res = run_sql(query, (CFG_OAI_ID_FIELD, identifier))
for row in res:
recid = row[0]
return recid
def get_set_last_update(set_spec=""):
"""
Returns the last_update of a given set (or of all sets) in UTC
"""
if set_spec:
last_update = run_sql("SELECT DATE_FORMAT(MAX(last_updated),'%%Y-%%m-%%d %%H:%%i:%%s') FROM oaiREPOSITORY WHERE setSpec=%s", (set_spec, ))[0][0]
else:
last_update = run_sql("SELECT DATE_FORMAT(MAX(last_updated),'%Y-%m-%d %H:%i:%s') FROM oaiREPOSITORY")[0][0]
if last_update:
return localtime_to_utc(last_update)
else:
return None
def filter_out_based_on_date_range(recids, fromdate="", untildate="", set_spec=None):
""" Filter out recids based on date range."""
if fromdate:
fromdate = normalize_date(fromdate, "T00:00:00Z")
else:
fromdate = get_earliest_datestamp()
fromdate = utc_to_localtime(fromdate)
if untildate:
untildate = normalize_date(untildate, "T23:59:59Z")
else:
untildate = get_latest_datestamp()
untildate = utc_to_localtime(untildate)
if set_spec is not None: ## either it has a value or it empty, thus meaning all records
last_updated = get_set_last_update(set_spec)
if last_updated is not None:
last_updated = utc_to_localtime(last_updated)
if last_updated > fromdate:
fromdate = utc_to_localtime(get_earliest_datestamp())
recids = intbitset(recids) ## Let's clone :-)
if fromdate and untildate:
recids &= intbitset(run_sql("SELECT id FROM bibrec WHERE modification_date BETWEEN %s AND %s", (fromdate, untildate)))
elif fromdate:
recids &= intbitset(run_sql("SELECT id FROM bibrec WHERE modification_date >= %s", (fromdate, )))
elif untildate:
recids &= intbitset(run_sql("SELECT id FROM bibrec WHERE modification_date <= %s", (untildate, )))
return recids - get_all_restricted_recids()
def oai_get_recid_list(set_spec="", fromdate="", untildate=""):
"""
Returns list of recids for the OAI set 'set', modified from 'fromdate' until 'untildate'.
"""
ret = intbitset()
if not set_spec:
ret |= search_unit_in_bibxxx(p='*', f=CFG_OAI_SET_FIELD, type='e')
if CFG_OAI_DELETED_POLICY != 'no':
ret |= search_unit_in_bibxxx(p='*', f=CFG_OAI_PREVIOUS_SET_FIELD, type='e')
else:
ret |= search_unit_in_bibxxx(p=set_spec, f=CFG_OAI_SET_FIELD, type='e')
ret |= search_unit_in_bibxxx(p='%s:*' % set_spec, f=CFG_OAI_SET_FIELD, type='e')
if CFG_OAI_DELETED_POLICY != 'no':
ret |= search_unit_in_bibxxx(p=set_spec, f=CFG_OAI_PREVIOUS_SET_FIELD, type='e')
ret |= search_unit_in_bibxxx(p='%s:*' % set_spec, f=CFG_OAI_PREVIOUS_SET_FIELD, type='e')
if CFG_OAI_DELETED_POLICY == 'no':
ret -= search_unit_in_bibxxx(p='DELETED', f='980__%', type='e')
if CFG_CERN_SITE:
ret -= search_unit_in_bibxxx(p='DUMMY', f='980__%', type='e')
return filter_out_based_on_date_range(ret, fromdate, untildate, set_spec)
def oai_generate_resumption_token(set_spec):
"""Generates unique ID for resumption token management."""
fd, name = tempfile.mkstemp(dir=os.path.join(CFG_CACHEDIR, 'RTdata'), prefix='%s___' % set_spec)
os.close(fd)
return os.path.basename(name)
def oai_delete_resumption_tokens_for_set(set_spec):
"""
In case a set is modified by the admin interface, this will delete
any resumption token that is now invalid.
"""
aset = set_spec
while aset:
for name in iglob(os.path.join(CFG_CACHEDIR, 'RTdata', '%s___*' % set_spec)):
os.remove(name)
aset = aset.rsplit(":", 1)[0]
for name in iglob(os.path.join(CFG_CACHEDIR, 'RTdata', '___*')):
os.remove(name)
def oai_cache_dump(resumption_token, cache):
"""
Given a resumption_token and the cache, stores the cache.
"""
cPickle.dump(cache, open(os.path.join(CFG_CACHEDIR, 'RTdata', resumption_token), 'w'), -1)
def oai_cache_load(resumption_token):
"""
Restores the cache from the resumption_token.
"""
fullpath = os.path.join(CFG_CACHEDIR, 'RTdata', resumption_token)
if os.path.dirname(os.path.abspath(fullpath)) != os.path.abspath(os.path.join(CFG_CACHEDIR, 'RTdata')):
raise ValueError("Invalid path")
return cPickle.load(open(fullpath))
def oai_cache_gc():
"""
OAI Cache Garbage Collector.
"""
for file_ in os.listdir(os.path.join(CFG_CACHEDIR, 'RTdata')):
filename = os.path.join(os.path.join(CFG_CACHEDIR, 'RTdata', file_))
# cache entry expires when not modified during a specified period of time
if ((time.time() - os.path.getmtime(filename)) > CFG_OAI_EXPIRE):
try:
os.remove(filename)
except OSError, e:
# Most probably the cache was already deleted
pass
def get_all_sets():
"""
Return all the sets.
"""
res = run_sql("SELECT setSpec, setName, setDescription FROM oaiREPOSITORY")
ret = {}
for row in res:
ret[row[0]] = row
## Let's expand with all the set that exist in the DB
for a_set in get_all_field_values(CFG_OAI_SET_FIELD):
if a_set not in ret:
ret[a_set] = (a_set, a_set, '')
## Let's expand with all the supersets
for a_set in ret.keys():
while ':' in a_set:
try:
a_set = a_set.rsplit(":", 1)[0]
except AttributeError:
a_set = ':'.join(a_set.split(":")[:-1])
if a_set not in ret:
ret[a_set] = (a_set, a_set, '')
if CFG_OAI_REPOSITORY_GLOBAL_SET_SPEC in ret:
## Let's remove the special global set
del ret[CFG_OAI_REPOSITORY_GLOBAL_SET_SPEC]
if '' in ret:
## '' is not a valid setSpec but might be in the MARC
del ret['']
return ret
def check_argd(argd):
"""
Check OAI arguments
Also transform them from lists to strings.
"""
errors = []
## no several times the same argument
bad_arguments_error = False
for param, value in argd.iteritems():
if len(value) > 1 and not bad_arguments_error:
errors.append(("badArgument", "More than one value specified for the %s argument: %s" % (param, value)))
bad_arguments_error = True ## This is needed only once
if len(value) > 0:
argd[param] = value[0]
else:
argd[param] = ''
## principal argument required
if argd['verb'] not in CFG_VERBS:
errors.append(("badVerb", "Illegal OAI verb: %s" % argd['verb']))
## defined argd
for param in argd.keys():
if not param in CFG_VERBS.get(argd['verb'], []) and param != 'verb' \
and not bad_arguments_error:
errors.append(("badArgument", "The request includes illegal arguments for the given verb: %s" % param))
bad_arguments_error = True
break # Indicate only once
## resumptionToken exclusive
if argd.get('resumptionToken', '') != "" and \
len(argd.keys()) != 2 and not bad_arguments_error:
errors.append(("badArgument", "The resumptionToken was specified together with other arguments"))
bad_arguments_error = True
if argd.get('resumptionToken', None) == '':
errors.append(("badResumptionToken", "ResumptionToken invalid: %s" % argd.get('resumptionToken', None)))
## datestamp formats
if argd.has_key('from') and \
'from' in CFG_VERBS.get(argd['verb'], []):
from_length = len(argd['from'])
if check_date(argd['from']) == "":
errors.append(("badArgument", "Bad datestamp format in from: %s" % argd['from']))
else:
from_length = 0
if argd.has_key('until') and \
'until' in CFG_VERBS.get(argd['verb'], []):
until_length = len(argd['until'])
if check_date(argd['until']) == "":
errors.append(("badArgument", "Bad datestamp format in until: %s" % argd['until']))
else:
until_length = 0
if from_length != 0:
if until_length != 0:
if from_length != until_length:
errors.append(("badArgument", "From and until have two different formats: %s Vs. %s" % (from_length, until_length)))
if argd.has_key('from') and argd.has_key('until') \
and argd['from'] > argd['until'] and \
'from' in CFG_VERBS.get(argd['verb'], []) and \
'until' in CFG_VERBS.get(argd['verb'], []):
errors.append(("badArgument", "from argument comes after until argument: %s > %s" % (argd['from'], argd['until'])))
## Identify exclusive
if argd['verb'] == "Identify" and \
len(argd.keys()) != 1:
if not bad_arguments_error: # Do not repeat this error
errors.append(("badArgument", "The request includes illegal arguments"))
bad_arguments_error = True
## parameters for GetRecord
if argd['verb'] == "GetRecord" and \
not argd.has_key('identifier'):
errors.append(("badArgument", "Record identifier missing"))
if argd['verb'] == "GetRecord" and \
not argd.has_key('metadataPrefix'):
errors.append(("badArgument", "Missing metadataPrefix"))
## parameters for ListRecords and ListIdentifiers
if (argd['verb'] == "ListRecords" or argd['verb'] == "ListIdentifiers") and \
(not argd.has_key('resumptionToken') and not argd.has_key('metadataPrefix')):
errors.append(("badArgument", "Missing metadataPrefix"))
## Metadata prefix defined and valid
if argd.has_key('metadataPrefix') and \
not argd['metadataPrefix'] in CFG_OAI_METADATA_FORMATS:
errors.append(("cannotDisseminateFormat", "Chosen format is not supported. Valid formats are: %s" % ', '.join(CFG_OAI_METADATA_FORMATS.keys())))
return errors
def oai_profile():
"""
Runs a benchmark
"""
from cStringIO import StringIO
oai_list_records_or_identifiers(StringIO(), argd={"metadataPrefix": "oai_dc", "verb": "ListRecords"})
oai_list_records_or_identifiers(StringIO(), argd={"metadataPrefix": "marcxml", "verb" :"ListRecords"})
oai_list_records_or_identifiers(StringIO(), argd={"metadataPrefix": "oai_dc", "verb": "ListIdentifiers"})
return
if __name__ == "__main__":
import profile
import pstats
profile.run('oai_profile()', "oai_profile")
p = pstats.Stats("oai_profile")
p.strip_dirs().sort_stats("cumulative").print_stats()
diff --git a/invenio/legacy/oairepository/webinterface.py b/invenio/legacy/oairepository/webinterface.py
index 302f7c67b..8ec7f6b47 100644
--- a/invenio/legacy/oairepository/webinterface.py
+++ b/invenio/legacy/oairepository/webinterface.py
@@ -1,148 +1,148 @@
## This file is part of Invenio.
## Copyright (C) 2006, 2007, 2008, 2009, 2010, 2011, 2012 CERN.
##
## Invenio is free software; you can redistribute it and/or
## modify it under the terms of the GNU General Public License as
## published by the Free Software Foundation; either version 2 of the
## License, or (at your option) any later version.
##
## Invenio is distributed in the hope that it will be useful, but
## WITHOUT ANY WARRANTY; without even the implied warranty of
## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
## General Public License for more details.
##
## You should have received a copy of the GNU General Public License
## along with Invenio; if not, write to the Free Software Foundation, Inc.,
## 59 Temple Place, Suite 330, Boston, MA 02111-1307, USA.
"""Invenio OAI provider interface, compliant with OAI-PMH/2.0"""
__revision__ = "$Id$"
import os
import time
import cStringIO
from invenio.utils import apache
from invenio import oai_repository_server
-from invenio.errorlib import register_exception
+from invenio.ext.logging import register_exception
from invenio.config import CFG_CACHEDIR, CFG_OAI_SLEEP, CFG_DEVEL_SITE, \
CFG_ETCDIR
from invenio.webinterface_handler import wash_urlargd, WebInterfaceDirectory
CFG_VALIDATE_RESPONSES = False
OAI_PMH_VALIDATOR = None
if CFG_DEVEL_SITE:
try:
from lxml import etree
OAI_PMH_VALIDATOR = etree.XMLSchema(etree.parse(open(os.path.join(CFG_ETCDIR, 'oairepository', 'OAI-PMH.xsd'))))
CFG_VALIDATE_RESPONSES = True
except ImportError:
pass
class WebInterfaceOAIProviderPages(WebInterfaceDirectory):
"""Defines the set of /oai2d OAI provider pages."""
_exports = ['']
def __call__(self, req, form):
"""OAI repository interface"""
# Clean input arguments. The protocol specifies that an error
# has to be returned if the same argument is specified several
# times. Eg:
# oai2d?verb=ListIdentifiers&metadataPrefix=marcxml&metadataPrefix=marcxml
# So keep the arguments as list for now so that check_argd can
# return an error if needed (check_argd also transforms these
# lists into strings)
argd = wash_urlargd(form, {'verb': (list, []),
'metadataPrefix': (list, []),
'from': (list, []),
'until': (list, []),
'set': (list, []),
'identifier': (list, []),
'resumptionToken': (list, []),
})
if CFG_VALIDATE_RESPONSES:
req.track_writings = True
## wash_urlargd(..) function cleaned everything, but also added
## unwanted parameters. Remove them now
for param in argd.keys():
if not param in form and param != 'verb':
del argd[param]
## wash_urlargd(..) function also removed unknown parameters
## that we would like to keep in order to send back an error
## as required by the protocol. But we do not need that value,
## so set it to empty string.
for param in form.keys():
if param not in argd.keys():
argd[param] = ''
## But still remove 'ln' parameter that was automatically added.
if argd.has_key('ln'):
del argd['ln']
## check request for OAI compliancy
## also transform all the list arguments into string
oai_errors = oai_repository_server.check_argd(argd)
## check availability (OAI requests for Identify, ListSets and
## ListMetadataFormats are served immediately, otherwise we
## shall wait for CFG_OAI_SLEEP seconds between requests):
if os.path.exists("%s/RTdata/RTdata" % CFG_CACHEDIR) and (argd['verb'] not in ["Identify", "ListMetadataFormats", "ListSets"] and not argd.get('resumptionToken')):
time_gap = int(time.time() - os.path.getmtime("%s/RTdata/RTdata" % CFG_CACHEDIR))
if(time_gap < CFG_OAI_SLEEP):
req.headers_out["Status-Code"] = "503"
req.headers_out["Retry-After"] = "%d" % (CFG_OAI_SLEEP - time_gap)
req.status = apache.HTTP_SERVICE_UNAVAILABLE
return "Retry after %d seconds" % (CFG_OAI_SLEEP - time_gap)
command = "touch %s/RTdata/RTdata" % CFG_CACHEDIR
os.system(command)
## create OAI response
req.content_type = "text/xml"
req.send_http_header()
if not oai_errors:
## OAI Identify
if argd['verb'] == "Identify":
req.write(oai_repository_server.oai_identify(argd))
## OAI ListSets
elif argd['verb'] == "ListSets":
req.write(oai_repository_server.oai_list_sets(argd))
## OAI ListIdentifiers or OAI ListRecords
elif argd['verb'] in ("ListIdentifiers", "ListRecords"):
oai_repository_server.oai_list_records_or_identifiers(req, argd)
## OAI GetRecord
elif argd['verb'] == "GetRecord":
req.write(oai_repository_server.oai_get_record(argd))
## OAI ListMetadataFormats
elif argd['verb'] == "ListMetadataFormats":
req.write(oai_repository_server.oai_list_metadata_formats(argd))
## Unknown verb
## OAI error
else:
req.write(oai_repository_server.oai_error(argd, oai_errors))
if CFG_VALIDATE_RESPONSES:
req.track_writings = False
try:
OAI_PMH_VALIDATOR.assertValid(etree.parse(cStringIO.StringIO(req.what_was_written)))
except etree.DocumentInvalid:
register_exception(req=req, alert_admin=True)
raise
return "\n"
## Return the same page wether we ask for /oai2d?verb or /oai2d/?verb
index = __call__
diff --git a/invenio/legacy/search_engine/__init__.py b/invenio/legacy/search_engine/__init__.py
index 0480d0df8..d32ca40e5 100644
--- a/invenio/legacy/search_engine/__init__.py
+++ b/invenio/legacy/search_engine/__init__.py
@@ -1,6687 +1,6687 @@
# -*- coding: utf-8 -*-
## This file is part of Invenio.
## Copyright (C) 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011, 2012, 2013 CERN.
##
## Invenio is free software; you can redistribute it and/or
## modify it under the terms of the GNU General Public License as
## published by the Free Software Foundation; either version 2 of the
## License, or (at your option) any later version.
##
## Invenio is distributed in the hope that it will be useful, but
## WITHOUT ANY WARRANTY; without even the implied warranty of
## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
## General Public License for more details.
##
## You should have received a copy of the GNU General Public License
## along with Invenio; if not, write to the Free Software Foundation, Inc.,
## 59 Temple Place, Suite 330, Boston, MA 02111-1307, USA.
# pylint: disable=C0301
"""Invenio Search Engine in mod_python."""
__lastupdated__ = """$Date$"""
__revision__ = "$Id$"
## import general modules:
import cgi
import cStringIO
import copy
import string
import os
import re
import time
import urllib
import urlparse
import zlib
import sys
try:
## import optional module:
import numpy
CFG_NUMPY_IMPORTABLE = True
except:
CFG_NUMPY_IMPORTABLE = False
if sys.hexversion < 0x2040000:
# pylint: disable=W0622
from sets import Set as set
# pylint: enable=W0622
## import Invenio stuff:
from invenio.base.globals import cfg
from invenio.config import \
CFG_CERN_SITE, \
CFG_INSPIRE_SITE, \
CFG_OAI_ID_FIELD, \
CFG_WEBCOMMENT_ALLOW_REVIEWS, \
CFG_WEBSEARCH_CALL_BIBFORMAT, \
CFG_WEBSEARCH_CREATE_SIMILARLY_NAMED_AUTHORS_LINK_BOX, \
CFG_WEBSEARCH_FIELDS_CONVERT, \
CFG_WEBSEARCH_NB_RECORDS_TO_SORT, \
CFG_WEBSEARCH_SEARCH_CACHE_SIZE, \
CFG_WEBSEARCH_SEARCH_CACHE_TIMEOUT, \
CFG_WEBSEARCH_USE_MATHJAX_FOR_FORMATS, \
CFG_WEBSEARCH_USE_ALEPH_SYSNOS, \
CFG_WEBSEARCH_DEF_RECORDS_IN_GROUPS, \
CFG_WEBSEARCH_FULLTEXT_SNIPPETS, \
CFG_WEBSEARCH_DISPLAY_NEAREST_TERMS, \
CFG_WEBSEARCH_WILDCARD_LIMIT, \
CFG_BIBUPLOAD_SERIALIZE_RECORD_STRUCTURE, \
CFG_BIBUPLOAD_EXTERNAL_SYSNO_TAG, \
CFG_BIBRANK_SHOW_DOWNLOAD_GRAPHS, \
CFG_WEBSEARCH_SYNONYM_KBRS, \
CFG_SITE_LANG, \
CFG_SITE_NAME, \
CFG_LOGDIR, \
CFG_BIBFORMAT_HIDDEN_TAGS, \
CFG_SITE_URL, \
CFG_ACCESS_CONTROL_LEVEL_ACCOUNTS, \
CFG_SOLR_URL, \
CFG_WEBSEARCH_DETAILED_META_FORMAT, \
CFG_SITE_RECORD, \
CFG_WEBSEARCH_PREV_NEXT_HIT_LIMIT, \
CFG_WEBSEARCH_VIEWRESTRCOLL_POLICY, \
CFG_BIBSORT_BUCKETS, \
CFG_XAPIAN_ENABLED, \
CFG_BIBINDEX_CHARS_PUNCTUATION
from invenio.modules.search.errors import \
InvenioWebSearchUnknownCollectionError, \
InvenioWebSearchWildcardLimitError
from invenio.search_engine_utils import get_fieldvalues, get_fieldvalues_alephseq_like
from invenio.legacy.bibrecord import create_record, record_xml_output
from invenio.bibrank_record_sorter import get_bibrank_methods, is_method_valid, rank_records as rank_records_bibrank
from invenio.bibrank_downloads_similarity import register_page_view_event, calculate_reading_similarity_list
from invenio.bibindex_engine_stemmer import stem
from invenio.bibindex_tokenizers.BibIndexDefaultTokenizer import BibIndexDefaultTokenizer
from invenio.bibindex_tokenizers.BibIndexCJKTokenizer import BibIndexCJKTokenizer, is_there_any_CJK_character_in_text
from invenio.bibindex_engine_utils import author_name_requires_phrase_search
from invenio.bibindex_engine_washer import wash_index_term, lower_index_term, wash_author_name
from invenio.bibindex_engine_config import CFG_BIBINDEX_SYNONYM_MATCH_TYPE
from invenio.bibindex_engine_utils import get_idx_indexer
from invenio.modules.formatter import format_record, format_records, get_output_format_content_type, create_excel
from invenio.modules.formatter.config import CFG_BIBFORMAT_USE_OLD_BIBFORMAT
from invenio.bibrank_downloads_grapher import create_download_history_graph_and_box
from invenio.bibknowledge import get_kbr_values
from invenio.data_cacher import DataCacher
from invenio.websearch_external_collections import print_external_results_overview, perform_external_collection_search
from invenio.access_control_admin import acc_get_action_id
from invenio.access_control_config import VIEWRESTRCOLL, \
CFG_ACC_GRANT_AUTHOR_RIGHTS_TO_EMAILS_IN_TAGS, \
CFG_ACC_GRANT_VIEWER_RIGHTS_TO_EMAILS_IN_TAGS
from invenio.websearchadminlib import get_detailed_page_tabs, get_detailed_page_tabs_counts
from invenio.intbitset import intbitset
from invenio.dbquery import DatabaseError, deserialize_via_marshal, InvenioDbQueryWildcardLimitError
from invenio.access_control_engine import acc_authorize_action
-from invenio.errorlib import register_exception
+from invenio.ext.logging import register_exception
from invenio.utils.text import encode_for_xml, wash_for_utf8, strip_accents
from invenio.htmlutils import get_mathjax_header
from invenio.htmlutils import nmtoken_from_string
import invenio.template
webstyle_templates = invenio.template.load('webstyle')
webcomment_templates = invenio.template.load('webcomment')
from invenio.bibrank_citation_searcher import calculate_cited_by_list, \
calculate_co_cited_with_list, get_records_with_num_cites, get_self_cited_by, \
get_refersto_hitset, get_citedby_hitset
from invenio.bibrank_citation_grapher import create_citation_history_graph_and_box
from invenio.dbquery import run_sql, run_sql_with_limit, wash_table_column_name, \
get_table_update_time
from invenio.webuser import getUid, collect_user_info, session_param_set
from invenio.webpage import pageheaderonly, pagefooteronly, create_error_box, write_warning
from invenio.base.i18n import gettext_set_language
from invenio.search_engine_query_parser import SearchQueryParenthesisedParser, \
SpiresToInvenioSyntaxConverter
from invenio.utils import apache
from invenio.solrutils_bibindex_searcher import solr_get_bitset
from invenio.xapianutils_bibindex_searcher import xapian_get_bitset
try:
import invenio.template
websearch_templates = invenio.template.load('websearch')
except:
pass
from invenio.websearch_external_collections import calculate_hosted_collections_results, do_calculate_hosted_collections_results
from invenio.websearch_external_collections_config import CFG_HOSTED_COLLECTION_TIMEOUT_ANTE_SEARCH
from invenio.websearch_external_collections_config import CFG_HOSTED_COLLECTION_TIMEOUT_POST_SEARCH
from invenio.websearch_external_collections_config import CFG_EXTERNAL_COLLECTION_MAXRESULTS
VIEWRESTRCOLL_ID = acc_get_action_id(VIEWRESTRCOLL)
## global vars:
cfg_nb_browse_seen_records = 100 # limit of the number of records to check when browsing certain collection
cfg_nicely_ordered_collection_list = 0 # do we propose collection list nicely ordered or alphabetical?
## precompile some often-used regexp for speed reasons:
re_word = re.compile('[\s]')
re_quotes = re.compile('[\'\"]')
re_doublequote = re.compile('\"')
re_logical_and = re.compile('\sand\s', re.I)
re_logical_or = re.compile('\sor\s', re.I)
re_logical_not = re.compile('\snot\s', re.I)
re_operators = re.compile(r'\s([\+\-\|])\s')
re_pattern_wildcards_after_spaces = re.compile(r'(\s)[\*\%]+')
re_pattern_single_quotes = re.compile("'(.*?)'")
re_pattern_double_quotes = re.compile("\"(.*?)\"")
re_pattern_parens_quotes = re.compile(r'[\'\"]{1}[^\'\"]*(\([^\'\"]*\))[^\'\"]*[\'\"]{1}')
re_pattern_regexp_quotes = re.compile("\/(.*?)\/")
re_pattern_spaces_after_colon = re.compile(r'(:\s+)')
re_pattern_short_words = re.compile(r'([\s\"]\w{1,3})[\*\%]+')
re_pattern_space = re.compile("__SPACE__")
re_pattern_today = re.compile("\$TODAY\$")
re_pattern_parens = re.compile(r'\([^\)]+\s+[^\)]+\)')
re_punctuation_followed_by_space = re.compile(CFG_BIBINDEX_CHARS_PUNCTUATION + '\s')
## em possible values
EM_REPOSITORY={"body" : "B",
"header" : "H",
"footer" : "F",
"search_box" : "S",
"see_also_box" : "L",
"basket" : "K",
"alert" : "A",
"search_info" : "I",
"overview" : "O",
"all_portalboxes" : "P",
"te_portalbox" : "Pte",
"tp_portalbox" : "Ptp",
"np_portalbox" : "Pnp",
"ne_portalbox" : "Pne",
"lt_portalbox" : "Plt",
"rt_portalbox" : "Prt"};
class RestrictedCollectionDataCacher(DataCacher):
def __init__(self):
def cache_filler():
ret = []
try:
res = run_sql("""SELECT DISTINCT ar.value
FROM accROLE_accACTION_accARGUMENT raa JOIN accARGUMENT ar ON raa.id_accARGUMENT = ar.id
WHERE ar.keyword = 'collection' AND raa.id_accACTION = %s""", (VIEWRESTRCOLL_ID,), run_on_slave=True)
except Exception:
# database problems, return empty cache
return []
for coll in res:
ret.append(coll[0])
return ret
def timestamp_verifier():
return max(get_table_update_time('accROLE_accACTION_accARGUMENT'), get_table_update_time('accARGUMENT'))
DataCacher.__init__(self, cache_filler, timestamp_verifier)
def collection_restricted_p(collection, recreate_cache_if_needed=True):
if recreate_cache_if_needed:
restricted_collection_cache.recreate_cache_if_needed()
return collection in restricted_collection_cache.cache
try:
restricted_collection_cache.is_ok_p
except Exception:
restricted_collection_cache = RestrictedCollectionDataCacher()
def ziplist(*lists):
"""Just like zip(), but returns lists of lists instead of lists of tuples
Example:
zip([f1, f2, f3], [p1, p2, p3], [op1, op2, '']) =>
[(f1, p1, op1), (f2, p2, op2), (f3, p3, '')]
ziplist([f1, f2, f3], [p1, p2, p3], [op1, op2, '']) =>
[[f1, p1, op1], [f2, p2, op2], [f3, p3, '']]
FIXME: This is handy to have, and should live somewhere else, like
miscutil.really_useful_functions or something.
XXX: Starting in python 2.6, the same can be achieved (faster) by
using itertools.izip_longest(); when the minimum recommended Python
is bumped, we should use that instead.
"""
def l(*items):
return list(items)
return map(l, *lists)
def get_permitted_restricted_collections(user_info, recreate_cache_if_needed=True):
"""Return a list of collection that are restricted but for which the user
is authorized."""
if recreate_cache_if_needed:
restricted_collection_cache.recreate_cache_if_needed()
ret = []
for collection in restricted_collection_cache.cache:
if acc_authorize_action(user_info, 'viewrestrcoll', collection=collection)[0] == 0:
ret.append(collection)
return ret
def get_all_restricted_recids():
"""
Return the set of all the restricted recids, i.e. the ids of those records
which belong to at least one restricted collection.
"""
ret = intbitset()
for collection in restricted_collection_cache.cache:
ret |= get_collection_reclist(collection)
return ret
def get_restricted_collections_for_recid(recid, recreate_cache_if_needed=True):
"""
Return the list of restricted collection names to which recid belongs.
"""
if recreate_cache_if_needed:
restricted_collection_cache.recreate_cache_if_needed()
collection_reclist_cache.recreate_cache_if_needed()
return [collection for collection in restricted_collection_cache.cache if recid in get_collection_reclist(collection, recreate_cache_if_needed=False)]
def is_user_owner_of_record(user_info, recid):
"""
Check if the user is owner of the record, i.e. he is the submitter
and/or belongs to a owner-like group authorized to 'see' the record.
@param user_info: the user_info dictionary that describe the user.
@type user_info: user_info dictionary
@param recid: the record identifier.
@type recid: positive integer
@return: True if the user is 'owner' of the record; False otherwise
@rtype: bool
"""
authorized_emails_or_group = []
for tag in CFG_ACC_GRANT_AUTHOR_RIGHTS_TO_EMAILS_IN_TAGS:
authorized_emails_or_group.extend(get_fieldvalues(recid, tag))
for email_or_group in authorized_emails_or_group:
if email_or_group in user_info['group']:
return True
email = email_or_group.strip().lower()
if user_info['email'].strip().lower() == email:
return True
return False
###FIXME: This method needs to be refactorized
def is_user_viewer_of_record(user_info, recid):
"""
Check if the user is allow to view the record based in the marc tags
inside CFG_ACC_GRANT_VIEWER_RIGHTS_TO_EMAILS_IN_TAGS
i.e. his email is inside the 506__m tag or he is inside an e-group listed
in the 506__m tag
@param user_info: the user_info dictionary that describe the user.
@type user_info: user_info dictionary
@param recid: the record identifier.
@type recid: positive integer
@return: True if the user is 'allow to view' the record; False otherwise
@rtype: bool
"""
authorized_emails_or_group = []
for tag in CFG_ACC_GRANT_VIEWER_RIGHTS_TO_EMAILS_IN_TAGS:
authorized_emails_or_group.extend(get_fieldvalues(recid, tag))
for email_or_group in authorized_emails_or_group:
if email_or_group in user_info['group']:
return True
email = email_or_group.strip().lower()
if user_info['email'].strip().lower() == email:
return True
return False
def check_user_can_view_record(user_info, recid):
"""
Check if the user is authorized to view the given recid. The function
grants access in two cases: either user has author rights on this
record, or he has view rights to the primary collection this record
belongs to.
@param user_info: the user_info dictionary that describe the user.
@type user_info: user_info dictionary
@param recid: the record identifier.
@type recid: positive integer
@return: (0, ''), when authorization is granted, (>0, 'message') when
authorization is not granted
@rtype: (int, string)
"""
policy = CFG_WEBSEARCH_VIEWRESTRCOLL_POLICY.strip().upper()
if isinstance(recid, str):
recid = int(recid)
## At this point, either webcoll has not yet run or there are some
## restricted collections. Let's see first if the user own the record.
if is_user_owner_of_record(user_info, recid):
## Perfect! It's authorized then!
return (0, '')
if is_user_viewer_of_record(user_info, recid):
## Perfect! It's authorized then!
return (0, '')
restricted_collections = get_restricted_collections_for_recid(recid, recreate_cache_if_needed=False)
if not restricted_collections and record_public_p(recid):
## The record is public and not part of any restricted collection
return (0, '')
if restricted_collections:
## If there are restricted collections the user must be authorized to all/any of them (depending on the policy)
auth_code, auth_msg = 0, ''
for collection in restricted_collections:
(auth_code, auth_msg) = acc_authorize_action(user_info, VIEWRESTRCOLL, collection=collection)
if auth_code and policy != 'ANY':
## Ouch! the user is not authorized to this collection
return (auth_code, auth_msg)
elif auth_code == 0 and policy == 'ANY':
## Good! At least one collection is authorized
return (0, '')
## Depending on the policy, the user will be either authorized or not
return auth_code, auth_msg
if is_record_in_any_collection(recid, recreate_cache_if_needed=False):
## the record is not in any restricted collection
return (0, '')
elif record_exists(recid) > 0:
## We are in the case where webcoll has not run.
## Let's authorize SUPERADMIN
(auth_code, auth_msg) = acc_authorize_action(user_info, VIEWRESTRCOLL, collection=None)
if auth_code == 0:
return (0, '')
else:
## Too bad. Let's print a nice message:
return (1, """The record you are trying to access has just been
submitted to the system and needs to be assigned to the
proper collections. It is currently restricted for security reasons
until the assignment will be fully completed. Please come back later to
properly access this record.""")
else:
## The record either does not exists or has been deleted.
## Let's handle these situations outside of this code.
return (0, '')
class IndexStemmingDataCacher(DataCacher):
"""
Provides cache for stemming information for word/phrase indexes.
This class is not to be used directly; use function
get_index_stemming_language() instead.
"""
def __init__(self):
def cache_filler():
try:
res = run_sql("""SELECT id, stemming_language FROM idxINDEX""")
except DatabaseError:
# database problems, return empty cache
return {}
return dict(res)
def timestamp_verifier():
return get_table_update_time('idxINDEX')
DataCacher.__init__(self, cache_filler, timestamp_verifier)
try:
index_stemming_cache.is_ok_p
except Exception:
index_stemming_cache = IndexStemmingDataCacher()
def get_index_stemming_language(index_id, recreate_cache_if_needed=True):
"""Return stemming langugage for given index."""
if recreate_cache_if_needed:
index_stemming_cache.recreate_cache_if_needed()
return index_stemming_cache.cache[index_id]
class FieldTokenizerDataCacher(DataCacher):
"""
Provides cache for tokenizer information for fields corresponding to indexes.
This class is not to be used directly; use function
get_field_tokenizer_type() instead.
"""
def __init__(self):
def cache_filler():
try:
res = run_sql("""SELECT fld.code, ind.tokenizer FROM idxINDEX AS ind, field AS fld, idxINDEX_field AS indfld WHERE ind.id = indfld.id_idxINDEX AND indfld.id_field = fld.id""")
except DatabaseError:
# database problems, return empty cache
return {}
return dict(res)
def timestamp_verifier():
return get_table_update_time('idxINDEX')
DataCacher.__init__(self, cache_filler, timestamp_verifier)
try:
field_tokenizer_cache.is_ok_p
except Exception:
field_tokenizer_cache = FieldTokenizerDataCacher()
def get_field_tokenizer_type(field_name, recreate_cache_if_needed=True):
"""Return tokenizer type for given field corresponding to an index if applicable."""
if recreate_cache_if_needed:
field_tokenizer_cache.recreate_cache_if_needed()
tokenizer = None
try:
tokenizer = field_tokenizer_cache.cache[field_name]
except KeyError:
return None
return tokenizer
class CollectionRecListDataCacher(DataCacher):
"""
Provides cache for collection reclist hitsets. This class is not
to be used directly; use function get_collection_reclist() instead.
"""
def __init__(self):
def cache_filler():
ret = {}
try:
res = run_sql("SELECT name FROM collection")
except Exception:
# database problems, return empty cache
return {}
for name in res:
ret[name[0]] = None # this will be filled later during runtime by calling get_collection_reclist(coll)
return ret
def timestamp_verifier():
return get_table_update_time('collection')
DataCacher.__init__(self, cache_filler, timestamp_verifier)
try:
if not collection_reclist_cache.is_ok_p:
raise Exception
except Exception:
collection_reclist_cache = CollectionRecListDataCacher()
def get_collection_reclist(coll, recreate_cache_if_needed=True):
"""Return hitset of recIDs that belong to the collection 'coll'."""
if recreate_cache_if_needed:
collection_reclist_cache.recreate_cache_if_needed()
if coll not in collection_reclist_cache.cache:
return intbitset() # collection does not exist; return empty set
if not collection_reclist_cache.cache[coll]:
# collection's reclist not in the cache yet, so calculate it
# and fill the cache:
reclist = intbitset()
query = "SELECT nbrecs,reclist FROM collection WHERE name=%s"
res = run_sql(query, (coll, ), 1)
if res:
try:
reclist = intbitset(res[0][1])
except:
pass
collection_reclist_cache.cache[coll] = reclist
# finally, return reclist:
return collection_reclist_cache.cache[coll]
def get_available_output_formats(visible_only=False):
"""
Return the list of available output formats. When visible_only is
True, returns only those output formats that have visibility flag
set to 1.
"""
formats = []
query = "SELECT code,name FROM format"
if visible_only:
query += " WHERE visibility='1'"
query += " ORDER BY name ASC"
res = run_sql(query)
if res:
# propose found formats:
for code, name in res:
formats.append({ 'value' : code,
'text' : name
})
else:
formats.append({'value' : 'hb',
'text' : "HTML brief"
})
return formats
# Flask cache for search results.
from invenio.modules.search.cache import search_results_cache, get_search_results_cache_key
class CollectionI18nNameDataCacher(DataCacher):
"""
Provides cache for I18N collection names. This class is not to be
used directly; use function get_coll_i18nname() instead.
"""
def __init__(self):
def cache_filler():
ret = {}
try:
res = run_sql("SELECT c.name,cn.ln,cn.value FROM collectionname AS cn, collection AS c WHERE cn.id_collection=c.id AND cn.type='ln'") # ln=long name
except Exception:
# database problems
return {}
for c, ln, i18nname in res:
if i18nname:
if not ret.has_key(c):
ret[c] = {}
ret[c][ln] = i18nname
return ret
def timestamp_verifier():
return get_table_update_time('collectionname')
DataCacher.__init__(self, cache_filler, timestamp_verifier)
try:
if not collection_i18nname_cache.is_ok_p:
raise Exception
except Exception:
collection_i18nname_cache = CollectionI18nNameDataCacher()
def get_coll_i18nname(c, ln=CFG_SITE_LANG, verify_cache_timestamp=True):
"""
Return nicely formatted collection name (of the name type `ln'
(=long name)) for collection C in language LN.
This function uses collection_i18nname_cache, but it verifies
whether the cache is up-to-date first by default. This
verification step is performed by checking the DB table update
time. So, if you call this function 1000 times, it can get very
slow because it will do 1000 table update time verifications, even
though collection names change not that often.
Hence the parameter VERIFY_CACHE_TIMESTAMP which, when set to
False, will assume the cache is already up-to-date. This is
useful namely in the generation of collection lists for the search
results page.
"""
if verify_cache_timestamp:
collection_i18nname_cache.recreate_cache_if_needed()
out = c
try:
out = collection_i18nname_cache.cache[c][ln]
except KeyError:
pass # translation in LN does not exist
return out
class FieldI18nNameDataCacher(DataCacher):
"""
Provides cache for I18N field names. This class is not to be used
directly; use function get_field_i18nname() instead.
"""
def __init__(self):
def cache_filler():
ret = {}
try:
res = run_sql("SELECT f.name,fn.ln,fn.value FROM fieldname AS fn, field AS f WHERE fn.id_field=f.id AND fn.type='ln'") # ln=long name
except Exception:
# database problems, return empty cache
return {}
for f, ln, i18nname in res:
if i18nname:
if not ret.has_key(f):
ret[f] = {}
ret[f][ln] = i18nname
return ret
def timestamp_verifier():
return get_table_update_time('fieldname')
DataCacher.__init__(self, cache_filler, timestamp_verifier)
try:
if not field_i18nname_cache.is_ok_p:
raise Exception
except Exception:
field_i18nname_cache = FieldI18nNameDataCacher()
def get_field_i18nname(f, ln=CFG_SITE_LANG, verify_cache_timestamp=True):
"""
Return nicely formatted field name (of type 'ln', 'long name') for
field F in language LN.
If VERIFY_CACHE_TIMESTAMP is set to True, then verify DB timestamp
and field I18N name cache timestamp and refresh cache from the DB
if needed. Otherwise don't bother checking DB timestamp and
return the cached value. (This is useful when get_field_i18nname
is called inside a loop.)
"""
if verify_cache_timestamp:
field_i18nname_cache.recreate_cache_if_needed()
out = f
try:
out = field_i18nname_cache.cache[f][ln]
except KeyError:
pass # translation in LN does not exist
return out
def get_alphabetically_ordered_collection_list(level=0, ln=CFG_SITE_LANG):
"""Returns nicely ordered (score respected) list of collections, more exactly list of tuples
(collection name, printable collection name).
Suitable for create_search_box()."""
out = []
res = run_sql("SELECT name FROM collection ORDER BY name ASC")
for c_name in res:
c_name = c_name[0]
# make a nice printable name (e.g. truncate c_printable for
# long collection names in given language):
c_printable_fullname = get_coll_i18nname(c_name, ln, False)
c_printable = wash_index_term(c_printable_fullname, 30, False)
if c_printable != c_printable_fullname:
c_printable = c_printable + "..."
if level:
c_printable = " " + level * '-' + " " + c_printable
out.append([c_name, c_printable])
return out
def get_nicely_ordered_collection_list(collid=1, level=0, ln=CFG_SITE_LANG):
"""Returns nicely ordered (score respected) list of collections, more exactly list of tuples
(collection name, printable collection name).
Suitable for create_search_box()."""
colls_nicely_ordered = []
res = run_sql("""SELECT c.name,cc.id_son FROM collection_collection AS cc, collection AS c
WHERE c.id=cc.id_son AND cc.id_dad=%s ORDER BY score DESC""", (collid, ))
for c, cid in res:
# make a nice printable name (e.g. truncate c_printable for
# long collection names in given language):
c_printable_fullname = get_coll_i18nname(c, ln, False)
c_printable = wash_index_term(c_printable_fullname, 30, False)
if c_printable != c_printable_fullname:
c_printable = c_printable + "..."
if level:
c_printable = " " + level * '-' + " " + c_printable
colls_nicely_ordered.append([c, c_printable])
colls_nicely_ordered = colls_nicely_ordered + get_nicely_ordered_collection_list(cid, level+1, ln=ln)
return colls_nicely_ordered
def get_index_id_from_field(field):
"""
Return index id with name corresponding to FIELD, or the first
index id where the logical field code named FIELD is indexed.
Return zero in case there is no index defined for this field.
Example: field='author', output=4.
"""
out = 0
if not field:
field = 'global' # empty string field means 'global' index (field 'anyfield')
# first look in the index table:
res = run_sql("""SELECT id FROM idxINDEX WHERE name=%s""", (field,))
if res:
out = res[0][0]
return out
# not found in the index table, now look in the field table:
res = run_sql("""SELECT w.id FROM idxINDEX AS w, idxINDEX_field AS wf, field AS f
WHERE f.code=%s AND wf.id_field=f.id AND w.id=wf.id_idxINDEX
LIMIT 1""", (field,))
if res:
out = res[0][0]
return out
def get_words_from_pattern(pattern):
"""
Returns list of whitespace-separated words from pattern, removing any
trailing punctuation-like signs from words in pattern.
"""
words = {}
# clean trailing punctuation signs inside pattern
pattern = re_punctuation_followed_by_space.sub(' ', pattern)
for word in string.split(pattern):
if not words.has_key(word):
words[word] = 1
return words.keys()
def create_basic_search_units(req, p, f, m=None, of='hb'):
"""Splits search pattern and search field into a list of independently searchable units.
- A search unit consists of '(operator, pattern, field, type, hitset)' tuples where
'operator' is set union (|), set intersection (+) or set exclusion (-);
'pattern' is either a word (e.g. muon*) or a phrase (e.g. 'nuclear physics');
'field' is either a code like 'title' or MARC tag like '100__a';
'type' is the search type ('w' for word file search, 'a' for access file search).
- Optionally, the function accepts the match type argument 'm'.
If it is set (e.g. from advanced search interface), then it
performs this kind of matching. If it is not set, then a guess is made.
'm' can have values: 'a'='all of the words', 'o'='any of the words',
'p'='phrase/substring', 'r'='regular expression',
'e'='exact value'.
- Warnings are printed on req (when not None) in case of HTML output formats."""
opfts = [] # will hold (o,p,f,t,h) units
# FIXME: quick hack for the journal index
if f == 'journal':
opfts.append(['+', p, f, 'w'])
return opfts
## check arguments: is desired matching type set?
if m:
## A - matching type is known; good!
if m == 'e':
# A1 - exact value:
opfts.append(['+', p, f, 'a']) # '+' since we have only one unit
elif m == 'p':
# A2 - phrase/substring:
opfts.append(['+', "%" + p + "%", f, 'a']) # '+' since we have only one unit
elif m == 'r':
# A3 - regular expression:
opfts.append(['+', p, f, 'r']) # '+' since we have only one unit
elif m == 'a' or m == 'w':
# A4 - all of the words:
p = strip_accents(p) # strip accents for 'w' mode, FIXME: delete when not needed
for word in get_words_from_pattern(p):
opfts.append(['+', word, f, 'w']) # '+' in all units
elif m == 'o':
# A5 - any of the words:
p = strip_accents(p) # strip accents for 'w' mode, FIXME: delete when not needed
for word in get_words_from_pattern(p):
if len(opfts)==0:
opfts.append(['+', word, f, 'w']) # '+' in the first unit
else:
opfts.append(['|', word, f, 'w']) # '|' in further units
else:
if of.startswith("h"):
write_warning("Matching type '%s' is not implemented yet." % cgi.escape(m), "Warning", req=req)
opfts.append(['+', "%" + p + "%", f, 'w'])
else:
## B - matching type is not known: let us try to determine it by some heuristics
if f and p[0] == '"' and p[-1] == '"':
## B0 - does 'p' start and end by double quote, and is 'f' defined? => doing ACC search
opfts.append(['+', p[1:-1], f, 'a'])
elif f in ('author', 'firstauthor', 'exactauthor', 'exactfirstauthor', 'authorityauthor') and author_name_requires_phrase_search(p):
## B1 - do we search in author, and does 'p' contain space/comma/dot/etc?
## => doing washed ACC search
opfts.append(['+', p, f, 'a'])
elif f and p[0] == "'" and p[-1] == "'":
## B0bis - does 'p' start and end by single quote, and is 'f' defined? => doing ACC search
opfts.append(['+', '%' + p[1:-1] + '%', f, 'a'])
elif f and p[0] == "/" and p[-1] == "/":
## B0ter - does 'p' start and end by a slash, and is 'f' defined? => doing regexp search
opfts.append(['+', p[1:-1], f, 'r'])
elif f and string.find(p, ',') >= 0:
## B1 - does 'p' contain comma, and is 'f' defined? => doing ACC search
opfts.append(['+', p, f, 'a'])
elif f and str(f[0:2]).isdigit():
## B2 - does 'f' exist and starts by two digits? => doing ACC search
opfts.append(['+', p, f, 'a'])
else:
## B3 - doing WRD search, but maybe ACC too
# search units are separated by spaces unless the space is within single or double quotes
# so, let us replace temporarily any space within quotes by '__SPACE__'
p = re_pattern_single_quotes.sub(lambda x: "'"+string.replace(x.group(1), ' ', '__SPACE__')+"'", p)
p = re_pattern_double_quotes.sub(lambda x: "\""+string.replace(x.group(1), ' ', '__SPACE__')+"\"", p)
p = re_pattern_regexp_quotes.sub(lambda x: "/"+string.replace(x.group(1), ' ', '__SPACE__')+"/", p)
# and spaces after colon as well:
p = re_pattern_spaces_after_colon.sub(lambda x: string.replace(x.group(1), ' ', '__SPACE__'), p)
# wash argument:
p = re_logical_and.sub(" ", p)
p = re_logical_or.sub(" |", p)
p = re_logical_not.sub(" -", p)
p = re_operators.sub(r' \1', p)
for pi in string.split(p): # iterate through separated units (or items, as "pi" stands for "p item")
pi = re_pattern_space.sub(" ", pi) # replace back '__SPACE__' by ' '
# firstly, determine set operator
if pi[0] == '+' or pi[0] == '-' or pi[0] == '|':
oi = pi[0]
pi = pi[1:]
else:
# okay, there is no operator, so let us decide what to do by default
oi = '+' # by default we are doing set intersection...
# secondly, determine search pattern and field:
if string.find(pi, ":") > 0:
fi, pi = string.split(pi, ":", 1)
fi = wash_field(fi)
# test whether fi is a real index code or a MARC-tag defined code:
if fi in get_fieldcodes() or '00' <= fi[:2] <= '99':
pass
else:
# it is not, so join it back:
fi, pi = f, fi + ":" + pi
else:
fi, pi = f, pi
# wash 'fi' argument:
fi = wash_field(fi)
# wash 'pi' argument:
pi = pi.strip() # strip eventual spaces
if re_quotes.match(pi):
# B3a - quotes are found => do ACC search (phrase search)
if pi[0] == '"' and pi[-1] == '"':
pi = string.replace(pi, '"', '') # remove quote signs
opfts.append([oi, pi, fi, 'a'])
elif pi[0] == "'" and pi[-1] == "'":
pi = string.replace(pi, "'", "") # remove quote signs
opfts.append([oi, "%" + pi + "%", fi, 'a'])
else: # unbalanced quotes, so fall back to WRD query:
opfts.append([oi, pi, fi, 'w'])
elif pi.startswith('/') and pi.endswith('/'):
# B3b - pi has slashes around => do regexp search
opfts.append([oi, pi[1:-1], fi, 'r'])
elif fi and len(fi) > 1 and str(fi[0]).isdigit() and str(fi[1]).isdigit():
# B3c - fi exists and starts by two digits => do ACC search
opfts.append([oi, pi, fi, 'a'])
elif fi and not get_index_id_from_field(fi) and get_field_name(fi):
# B3d - logical field fi exists but there is no WRD index for fi => try ACC search
opfts.append([oi, pi, fi, 'a'])
else:
# B3e - general case => do WRD search
pi = strip_accents(pi) # strip accents for 'w' mode, FIXME: delete when not needed
for pii in get_words_from_pattern(pi):
opfts.append([oi, pii, fi, 'w'])
## sanity check:
for i in range(0, len(opfts)):
try:
pi = opfts[i][1]
if pi == '*':
if of.startswith("h"):
write_warning("Ignoring standalone wildcard word.", "Warning", req=req)
del opfts[i]
if pi == '' or pi == ' ':
fi = opfts[i][2]
if fi:
if of.startswith("h"):
write_warning("Ignoring empty <em>%s</em> search term." % fi, "Warning", req=req)
del opfts[i]
except:
pass
## replace old logical field names if applicable:
if CFG_WEBSEARCH_FIELDS_CONVERT:
opfts = [[o, p, wash_field(f), t] for o, p, f, t in opfts]
## return search units:
return opfts
def page_start(req, of, cc, aas, ln, uid, title_message=None,
description='', keywords='', recID=-1, tab='', p='', em=''):
"""
Start page according to given output format.
@param title_message: title of the page, not escaped for HTML
@param description: description of the page, not escaped for HTML
@param keywords: keywords of the page, not escaped for HTML
"""
_ = gettext_set_language(ln)
if not req or isinstance(req, cStringIO.OutputType):
return # we were called from CLI
if not title_message:
title_message = _("Search Results")
content_type = get_output_format_content_type(of)
if of.startswith('x'):
if of == 'xr':
# we are doing RSS output
req.content_type = "application/rss+xml"
req.send_http_header()
req.write("""<?xml version="1.0" encoding="UTF-8"?>\n""")
else:
# we are doing XML output:
req.content_type = get_output_format_content_type(of, 'text/xml')
req.send_http_header()
req.write("""<?xml version="1.0" encoding="UTF-8"?>\n""")
elif of.startswith('t') or str(of[0:3]).isdigit():
# we are doing plain text output:
req.content_type = "text/plain"
req.send_http_header()
elif of == "intbitset":
req.content_type = "application/octet-stream"
req.send_http_header()
elif of == "id":
pass # nothing to do, we shall only return list of recIDs
elif content_type == 'text/html':
# we are doing HTML output:
req.content_type = "text/html"
req.send_http_header()
if not description:
description = "%s %s." % (cc, _("Search Results"))
if not keywords:
keywords = "%s, WebSearch, %s" % (get_coll_i18nname(CFG_SITE_NAME, ln, False), get_coll_i18nname(cc, ln, False))
## generate RSS URL:
argd = {}
if req.args:
argd = cgi.parse_qs(req.args)
rssurl = websearch_templates.build_rss_url(argd)
## add MathJax if displaying single records (FIXME: find
## eventual better place to this code)
if of.lower() in CFG_WEBSEARCH_USE_MATHJAX_FOR_FORMATS:
metaheaderadd = get_mathjax_header(req.is_https())
else:
metaheaderadd = ''
# Add metadata in meta tags for Google scholar-esque harvesting...
# only if we have a detailed meta format and we are looking at a
# single record
if (recID != -1 and CFG_WEBSEARCH_DETAILED_META_FORMAT):
metaheaderadd += format_record(recID, \
CFG_WEBSEARCH_DETAILED_META_FORMAT, \
ln = ln)
## generate navtrail:
navtrail = create_navtrail_links(cc, aas, ln)
if navtrail != '':
navtrail += ' &gt; '
if (tab != '' or ((of != '' or of.lower() != 'hd') and of != 'hb')) and \
recID != -1:
# If we are not in information tab in HD format, customize
# the nav. trail to have a link back to main record. (Due
# to the way perform_request_search() works, hb
# (lowercase) is equal to hd)
navtrail += ' <a class="navtrail" href="%s/%s/%s">%s</a>' % \
(CFG_SITE_URL, CFG_SITE_RECORD, recID, cgi.escape(title_message))
if (of != '' or of.lower() != 'hd') and of != 'hb':
# Export
format_name = of
query = "SELECT name FROM format WHERE code=%s"
res = run_sql(query, (of,))
if res:
format_name = res[0][0]
navtrail += ' &gt; ' + format_name
else:
# Discussion, citations, etc. tabs
tab_label = get_detailed_page_tabs(cc, ln=ln)[tab]['label']
navtrail += ' &gt; ' + _(tab_label)
else:
navtrail += cgi.escape(title_message)
if p:
# we are serving search/browse results pages, so insert pattern:
navtrail += ": " + cgi.escape(p)
title_message = p + " - " + title_message
body_css_classes = []
if cc:
# we know the collection, lets allow page styles based on cc
#collection names may not satisfy rules for css classes which
#are something like: -?[_a-zA-Z]+[_a-zA-Z0-9-]*
#however it isn't clear what we should do about cases with
#numbers, so we leave them to fail. Everything else becomes "_"
css = nmtoken_from_string(cc).replace('.','_').replace('-','_').replace(':','_')
body_css_classes.append(css)
## finally, print page header:
if em == '' or EM_REPOSITORY["header"] in em:
req.write(pageheaderonly(req=req, title=title_message,
navtrail=navtrail,
description=description,
keywords=keywords,
metaheaderadd=metaheaderadd,
uid=uid,
language=ln,
navmenuid='search',
navtrail_append_title_p=0,
rssurl=rssurl,
body_css_classes=body_css_classes))
req.write(websearch_templates.tmpl_search_pagestart(ln=ln))
else:
req.content_type = content_type
req.send_http_header()
def page_end(req, of="hb", ln=CFG_SITE_LANG, em=""):
"End page according to given output format: e.g. close XML tags, add HTML footer, etc."
if of == "id":
return [] # empty recID list
if of == "intbitset":
return intbitset()
if not req:
return # we were called from CLI
if of.startswith('h'):
req.write(websearch_templates.tmpl_search_pageend(ln = ln)) # pagebody end
if em == "" or EM_REPOSITORY["footer"] in em:
req.write(pagefooteronly(lastupdated=__lastupdated__, language=ln, req=req))
return
def create_page_title_search_pattern_info(p, p1, p2, p3):
"""Create the search pattern bit for the page <title> web page
HTML header. Basically combine p and (p1,p2,p3) together so that
the page header may be filled whether we are in the Simple Search
or Advanced Search interface contexts."""
out = ""
if p:
out = p
else:
out = p1
if p2:
out += ' ' + p2
if p3:
out += ' ' + p3
return out
def create_inputdate_box(name="d1", selected_year=0, selected_month=0, selected_day=0, ln=CFG_SITE_LANG):
"Produces 'From Date', 'Until Date' kind of selection box. Suitable for search options."
_ = gettext_set_language(ln)
box = ""
# day
box += """<select name="%sd">""" % name
box += """<option value="">%s""" % _("any day")
for day in range(1, 32):
box += """<option value="%02d"%s>%02d""" % (day, is_selected(day, selected_day), day)
box += """</select>"""
# month
box += """<select name="%sm">""" % name
box += """<option value="">%s""" % _("any month")
# trailing space in May distinguishes short/long form of the month name
for mm, month in [(1, _("January")), (2, _("February")), (3, _("March")), (4, _("April")), \
(5, _("May ")), (6, _("June")), (7, _("July")), (8, _("August")), \
(9, _("September")), (10, _("October")), (11, _("November")), (12, _("December"))]:
box += """<option value="%02d"%s>%s""" % (mm, is_selected(mm, selected_month), month.strip())
box += """</select>"""
# year
box += """<select name="%sy">""" % name
box += """<option value="">%s""" % _("any year")
this_year = int(time.strftime("%Y", time.localtime()))
for year in range(this_year-20, this_year+1):
box += """<option value="%d"%s>%d""" % (year, is_selected(year, selected_year), year)
box += """</select>"""
return box
def create_search_box(cc, colls, p, f, rg, sf, so, sp, rm, of, ot, aas,
ln, p1, f1, m1, op1, p2, f2, m2, op2, p3, f3,
m3, sc, pl, d1y, d1m, d1d, d2y, d2m, d2d, dt, jrec, ec,
action="", em=""):
"""Create search box for 'search again in the results page' functionality."""
if em != "" and EM_REPOSITORY["search_box"] not in em:
if EM_REPOSITORY["body"] in em and cc != CFG_SITE_NAME:
return '''
<h1 class="headline">%(ccname)s</h1>''' % {'ccname' : cgi.escape(cc), }
else:
return ""
# load the right message language
_ = gettext_set_language(ln)
# some computations
cc_intl = get_coll_i18nname(cc, ln, False)
cc_colID = get_colID(cc)
colls_nicely_ordered = []
if cfg_nicely_ordered_collection_list:
colls_nicely_ordered = get_nicely_ordered_collection_list(ln=ln)
else:
colls_nicely_ordered = get_alphabetically_ordered_collection_list(ln=ln)
colls_nice = []
for (cx, cx_printable) in colls_nicely_ordered:
if not cx.startswith("Unnamed collection"):
colls_nice.append({ 'value' : cx,
'text' : cx_printable
})
coll_selects = []
if colls and colls[0] != CFG_SITE_NAME:
# some collections are defined, so print these first, and only then print 'add another collection' heading:
for c in colls:
if c:
temp = []
temp.append({ 'value' : CFG_SITE_NAME,
'text' : '*** %s ***' % _("any public collection")
})
# this field is used to remove the current collection from the ones to be searched.
temp.append({ 'value' : '',
'text' : '*** %s ***' % _("remove this collection")
})
for val in colls_nice:
# print collection:
if not cx.startswith("Unnamed collection"):
temp.append({ 'value' : val['value'],
'text' : val['text'],
'selected' : (c == re.sub("^[\s\-]*","", val['value']))
})
coll_selects.append(temp)
coll_selects.append([{ 'value' : '',
'text' : '*** %s ***' % _("add another collection")
}] + colls_nice)
else: # we searched in CFG_SITE_NAME, so print 'any public collection' heading
coll_selects.append([{ 'value' : CFG_SITE_NAME,
'text' : '*** %s ***' % _("any public collection")
}] + colls_nice)
## ranking methods
ranks = [{
'value' : '',
'text' : "- %s %s -" % (_("OR").lower (), _("rank by")),
}]
for (code, name) in get_bibrank_methods(cc_colID, ln):
# propose found rank methods:
ranks.append({
'value' : code,
'text' : name,
})
formats = get_available_output_formats(visible_only=True)
# show collections in the search box? (not if there is only one
# collection defined, and not if we are in light search)
show_colls = True
show_title = True
if len(collection_reclist_cache.cache.keys()) == 1 or \
aas == -1:
show_colls = False
show_title = False
if cc == CFG_SITE_NAME:
show_title = False
if CFG_INSPIRE_SITE:
show_title = False
return websearch_templates.tmpl_search_box(
ln = ln,
aas = aas,
cc_intl = cc_intl,
cc = cc,
ot = ot,
sp = sp,
action = action,
fieldslist = get_searchwithin_fields(ln=ln, colID=cc_colID),
f1 = f1,
f2 = f2,
f3 = f3,
m1 = m1,
m2 = m2,
m3 = m3,
p1 = p1,
p2 = p2,
p3 = p3,
op1 = op1,
op2 = op2,
rm = rm,
p = p,
f = f,
coll_selects = coll_selects,
d1y = d1y, d2y = d2y, d1m = d1m, d2m = d2m, d1d = d1d, d2d = d2d,
dt = dt,
sort_fields = get_sortby_fields(ln=ln, colID=cc_colID),
sf = sf,
so = so,
ranks = ranks,
sc = sc,
rg = rg,
formats = formats,
of = of,
pl = pl,
jrec = jrec,
ec = ec,
show_colls = show_colls,
show_title = show_title and (em=="" or EM_REPOSITORY["body"] in em)
)
def create_exact_author_browse_help_link(p=None, p1=None, p2=None, p3=None, f=None, f1=None, f2=None, f3=None,
rm=None, cc=None, ln=None, jrec=None, rg=None, aas=0, action=""):
"""Creates a link to help switch from author to exact author while browsing"""
if action == 'browse':
search_fields = (f, f1, f2, f3)
if ('author' in search_fields) or ('firstauthor' in search_fields):
def add_exact(field):
if field == 'author' or field == 'firstauthor':
return 'exact' + field
return field
(fe, f1e, f2e, f3e) = map(add_exact, search_fields)
link_name = f or f1
link_name = (link_name == 'firstauthor' and 'exact first author') or 'exact author'
return websearch_templates.tmpl_exact_author_browse_help_link(p=p, p1=p1, p2=p2, p3=p3, f=fe, f1=f1e, f2=f2e, f3=f3e,
rm=rm, cc=cc, ln=ln, jrec=jrec, rg=rg, aas=aas, action=action,
link_name=link_name)
return ""
def create_navtrail_links(cc=CFG_SITE_NAME, aas=0, ln=CFG_SITE_LANG, self_p=1, tab=''):
"""Creates navigation trail links, i.e. links to collection
ancestors (except Home collection). If aas==1, then links to
Advanced Search interfaces; otherwise Simple Search.
"""
dads = []
for dad in get_coll_ancestors(cc):
if dad != CFG_SITE_NAME: # exclude Home collection
dads.append ((dad, get_coll_i18nname(dad, ln, False)))
if self_p and cc != CFG_SITE_NAME:
dads.append((cc, get_coll_i18nname(cc, ln, False)))
return websearch_templates.tmpl_navtrail_links(
aas=aas, ln=ln, dads=dads)
def get_searchwithin_fields(ln='en', colID=None):
"""Retrieves the fields name used in the 'search within' selection box for the collection ID colID."""
res = None
if colID:
res = run_sql("""SELECT f.code,f.name FROM field AS f, collection_field_fieldvalue AS cff
WHERE cff.type='sew' AND cff.id_collection=%s AND cff.id_field=f.id
ORDER BY cff.score DESC, f.name ASC""", (colID,))
if not res:
res = run_sql("SELECT code,name FROM field ORDER BY name ASC")
fields = [{
'value' : '',
'text' : get_field_i18nname("any field", ln, False)
}]
for field_code, field_name in res:
if field_code and field_code != "anyfield":
fields.append({ 'value' : field_code,
'text' : get_field_i18nname(field_name, ln, False)
})
return fields
def get_sortby_fields(ln='en', colID=None):
"""Retrieves the fields name used in the 'sort by' selection box for the collection ID colID."""
_ = gettext_set_language(ln)
res = None
if colID:
res = run_sql("""SELECT DISTINCT(f.code),f.name FROM field AS f, collection_field_fieldvalue AS cff
WHERE cff.type='soo' AND cff.id_collection=%s AND cff.id_field=f.id
ORDER BY cff.score DESC, f.name ASC""", (colID,))
if not res:
# no sort fields defined for this colID, try to take Home collection:
res = run_sql("""SELECT DISTINCT(f.code),f.name FROM field AS f, collection_field_fieldvalue AS cff
WHERE cff.type='soo' AND cff.id_collection=%s AND cff.id_field=f.id
ORDER BY cff.score DESC, f.name ASC""", (1,))
if not res:
# no sort fields defined for the Home collection, take all sort fields defined wherever they are:
res = run_sql("""SELECT DISTINCT(f.code),f.name FROM field AS f, collection_field_fieldvalue AS cff
WHERE cff.type='soo' AND cff.id_field=f.id
ORDER BY cff.score DESC, f.name ASC""",)
fields = [{
'value' : '',
'text' : _("latest first")
}]
for field_code, field_name in res:
if field_code and field_code != "anyfield":
fields.append({ 'value' : field_code,
'text' : get_field_i18nname(field_name, ln, False)
})
return fields
def create_andornot_box(name='op', value='', ln='en'):
"Returns HTML code for the AND/OR/NOT selection box."
_ = gettext_set_language(ln)
out = """
<select name="%s">
<option value="a"%s>%s
<option value="o"%s>%s
<option value="n"%s>%s
</select>
""" % (name,
is_selected('a', value), _("AND"),
is_selected('o', value), _("OR"),
is_selected('n', value), _("AND NOT"))
return out
def create_matchtype_box(name='m', value='', ln='en'):
"Returns HTML code for the 'match type' selection box."
_ = gettext_set_language(ln)
out = """
<select name="%s">
<option value="a"%s>%s
<option value="o"%s>%s
<option value="e"%s>%s
<option value="p"%s>%s
<option value="r"%s>%s
</select>
""" % (name,
is_selected('a', value), _("All of the words:"),
is_selected('o', value), _("Any of the words:"),
is_selected('e', value), _("Exact phrase:"),
is_selected('p', value), _("Partial phrase:"),
is_selected('r', value), _("Regular expression:"))
return out
def is_selected(var, fld):
"Checks if the two are equal, and if yes, returns ' selected'. Useful for select boxes."
if type(var) is int and type(fld) is int:
if var == fld:
return " selected"
elif str(var) == str(fld):
return " selected"
elif fld and len(fld)==3 and fld[0] == "w" and var == fld[1:]:
return " selected"
return ""
def wash_colls(cc, c, split_colls=0, verbose=0):
"""Wash collection list by checking whether user has deselected
anything under 'Narrow search'. Checks also if cc is a list or not.
Return list of cc, colls_to_display, colls_to_search since the list
of collections to display is different from that to search in.
This is because users might have chosen 'split by collection'
functionality.
The behaviour of "collections to display" depends solely whether
user has deselected a particular collection: e.g. if it started
from 'Articles and Preprints' page, and deselected 'Preprints',
then collection to display is 'Articles'. If he did not deselect
anything, then collection to display is 'Articles & Preprints'.
The behaviour of "collections to search in" depends on the
'split_colls' parameter:
* if is equal to 1, then we can wash the colls list down
and search solely in the collection the user started from;
* if is equal to 0, then we are splitting to the first level
of collections, i.e. collections as they appear on the page
we started to search from;
The function raises exception
InvenioWebSearchUnknownCollectionError
if cc or one of c collections is not known.
"""
colls_out = []
colls_out_for_display = []
# list to hold the hosted collections to be searched and displayed
hosted_colls_out = []
debug = ""
if verbose:
debug += "<br />"
debug += "<br />1) --- initial parameters ---"
debug += "<br />cc : %s" % cc
debug += "<br />c : %s" % c
debug += "<br />"
# check what type is 'cc':
if type(cc) is list:
for ci in cc:
if collection_reclist_cache.cache.has_key(ci):
# yes this collection is real, so use it:
cc = ci
break
else:
# check once if cc is real:
if not collection_reclist_cache.cache.has_key(cc):
if cc:
raise InvenioWebSearchUnknownCollectionError(cc)
else:
cc = CFG_SITE_NAME # cc is not set, so replace it with Home collection
# check type of 'c' argument:
if type(c) is list:
colls = c
else:
colls = [c]
if verbose:
debug += "<br />2) --- after check for the integrity of cc and the being or not c a list ---"
debug += "<br />cc : %s" % cc
debug += "<br />c : %s" % c
debug += "<br />"
# remove all 'unreal' collections:
colls_real = []
for coll in colls:
if collection_reclist_cache.cache.has_key(coll):
colls_real.append(coll)
else:
if coll:
raise InvenioWebSearchUnknownCollectionError(coll)
colls = colls_real
if verbose:
debug += "<br />3) --- keeping only the real colls of c ---"
debug += "<br />colls : %s" % colls
debug += "<br />"
# check if some real collections remain:
if len(colls)==0:
colls = [cc]
if verbose:
debug += "<br />4) --- in case no colls were left we use cc directly ---"
debug += "<br />colls : %s" % colls
debug += "<br />"
# then let us check the list of non-restricted "real" sons of 'cc' and compare it to 'coll':
res = run_sql("""SELECT c.name FROM collection AS c,
collection_collection AS cc,
collection AS ccc
WHERE c.id=cc.id_son AND cc.id_dad=ccc.id
AND ccc.name=%s AND cc.type='r'""", (cc,))
# list that holds all the non restricted sons of cc that are also not hosted collections
l_cc_nonrestricted_sons_and_nonhosted_colls = []
res_hosted = run_sql("""SELECT c.name FROM collection AS c,
collection_collection AS cc,
collection AS ccc
WHERE c.id=cc.id_son AND cc.id_dad=ccc.id
AND ccc.name=%s AND cc.type='r'
AND (c.dbquery NOT LIKE 'hostedcollection:%%' OR c.dbquery IS NULL)""", (cc,))
for row_hosted in res_hosted:
l_cc_nonrestricted_sons_and_nonhosted_colls.append(row_hosted[0])
l_cc_nonrestricted_sons_and_nonhosted_colls.sort()
l_cc_nonrestricted_sons = []
l_c = colls[:]
for row in res:
if not collection_restricted_p(row[0]):
l_cc_nonrestricted_sons.append(row[0])
l_c.sort()
l_cc_nonrestricted_sons.sort()
if l_cc_nonrestricted_sons == l_c:
colls_out_for_display = [cc] # yep, washing permitted, it is sufficient to display 'cc'
# the following elif is a hack that preserves the above funcionality when we start searching from
# the frontpage with some hosted collections deselected (either by default or manually)
elif set(l_cc_nonrestricted_sons_and_nonhosted_colls).issubset(set(l_c)):
colls_out_for_display = colls
split_colls = 0
else:
colls_out_for_display = colls # nope, we need to display all 'colls' successively
# remove duplicates:
#colls_out_for_display_nondups=filter(lambda x, colls_out_for_display=colls_out_for_display: colls_out_for_display[x-1] not in colls_out_for_display[x:], range(1, len(colls_out_for_display)+1))
#colls_out_for_display = map(lambda x, colls_out_for_display=colls_out_for_display:colls_out_for_display[x-1], colls_out_for_display_nondups)
#colls_out_for_display = list(set(colls_out_for_display))
#remove duplicates while preserving the order
set_out = set()
colls_out_for_display = [coll for coll in colls_out_for_display if coll not in set_out and not set_out.add(coll)]
if verbose:
debug += "<br />5) --- decide whether colls_out_for_diplay should be colls or is it sufficient for it to be cc; remove duplicates ---"
debug += "<br />colls_out_for_display : %s" % colls_out_for_display
debug += "<br />"
# FIXME: The below quoted part of the code has been commented out
# because it prevents searching in individual restricted daughter
# collections when both parent and all its public daughter
# collections were asked for, in addition to some restricted
# daughter collections. The removal was introduced for hosted
# collections, so we may want to double check in this context.
# the following piece of code takes care of removing collections whose ancestors are going to be searched anyway
# list to hold the collections to be removed
#colls_to_be_removed = []
# first calculate the collections that can safely be removed
#for coll in colls_out_for_display:
# for ancestor in get_coll_ancestors(coll):
# #if ancestor in colls_out_for_display: colls_to_be_removed.append(coll)
# if ancestor in colls_out_for_display and not is_hosted_collection(coll): colls_to_be_removed.append(coll)
# secondly remove the collections
#for coll in colls_to_be_removed:
# colls_out_for_display.remove(coll)
if verbose:
debug += "<br />6) --- remove collections that have ancestors about to be search, unless they are hosted ---"
debug += "<br />colls_out_for_display : %s" % colls_out_for_display
debug += "<br />"
# calculate the hosted collections to be searched.
if colls_out_for_display == [cc]:
if is_hosted_collection(cc):
hosted_colls_out.append(cc)
else:
for coll in get_coll_sons(cc):
if is_hosted_collection(coll):
hosted_colls_out.append(coll)
else:
for coll in colls_out_for_display:
if is_hosted_collection(coll):
hosted_colls_out.append(coll)
if verbose:
debug += "<br />7) --- calculate the hosted_colls_out ---"
debug += "<br />hosted_colls_out : %s" % hosted_colls_out
debug += "<br />"
# second, let us decide on collection splitting:
if split_colls == 0:
# type A - no sons are wanted
colls_out = colls_out_for_display
else:
# type B - sons (first-level descendants) are wanted
for coll in colls_out_for_display:
coll_sons = get_coll_sons(coll)
if coll_sons == []:
colls_out.append(coll)
else:
for coll_son in coll_sons:
if not is_hosted_collection(coll_son):
colls_out.append(coll_son)
#else:
# colls_out = colls_out + coll_sons
# remove duplicates:
#colls_out_nondups=filter(lambda x, colls_out=colls_out: colls_out[x-1] not in colls_out[x:], range(1, len(colls_out)+1))
#colls_out = map(lambda x, colls_out=colls_out:colls_out[x-1], colls_out_nondups)
#colls_out = list(set(colls_out))
#remove duplicates while preserving the order
set_out = set()
colls_out = [coll for coll in colls_out if coll not in set_out and not set_out.add(coll)]
if verbose:
debug += "<br />8) --- calculate the colls_out; remove duplicates ---"
debug += "<br />colls_out : %s" % colls_out
debug += "<br />"
# remove the hosted collections from the collections to be searched
if hosted_colls_out:
for coll in hosted_colls_out:
try:
colls_out.remove(coll)
except ValueError:
# in case coll was not found in colls_out
pass
if verbose:
debug += "<br />9) --- remove the hosted_colls from the colls_out ---"
debug += "<br />colls_out : %s" % colls_out
return (cc, colls_out_for_display, colls_out, hosted_colls_out, debug)
def get_synonym_terms(term, kbr_name, match_type, use_memoise=False):
"""
Return list of synonyms for TERM by looking in KBR_NAME in
MATCH_TYPE style.
@param term: search-time term or index-time term
@type term: str
@param kbr_name: knowledge base name
@type kbr_name: str
@param match_type: specifies how the term matches against the KBR
before doing the lookup. Could be `exact' (default),
'leading_to_comma', `leading_to_number'.
@type match_type: str
@param use_memoise: can we memoise while doing lookups?
@type use_memoise: bool
@return: list of term synonyms
@rtype: list of strings
"""
dterms = {}
## exact match is default:
term_for_lookup = term
term_remainder = ''
## but maybe match different term:
if match_type == CFG_BIBINDEX_SYNONYM_MATCH_TYPE['leading_to_comma']:
mmm = re.match(r'^(.*?)(\s*,.*)$', term)
if mmm:
term_for_lookup = mmm.group(1)
term_remainder = mmm.group(2)
elif match_type == CFG_BIBINDEX_SYNONYM_MATCH_TYPE['leading_to_number']:
mmm = re.match(r'^(.*?)(\s*\d.*)$', term)
if mmm:
term_for_lookup = mmm.group(1)
term_remainder = mmm.group(2)
## FIXME: workaround: escaping SQL wild-card signs, since KBR's
## exact search is doing LIKE query, so would match everything:
term_for_lookup = term_for_lookup.replace('%', '\%')
## OK, now find synonyms:
for kbr_values in get_kbr_values(kbr_name,
searchkey=term_for_lookup,
searchtype='e',
use_memoise=use_memoise):
for kbr_value in kbr_values:
dterms[kbr_value + term_remainder] = 1
## return list of term synonyms:
return dterms.keys()
def wash_output_format(format):
"""Wash output format FORMAT. Currently only prevents input like
'of=9' for backwards-compatible format that prints certain fields
only. (for this task, 'of=tm' is preferred)"""
if str(format[0:3]).isdigit() and len(format) != 6:
# asked to print MARC tags, but not enough digits,
# so let's switch back to HTML brief default
return 'hb'
else:
return format
def wash_pattern(p):
"""Wash pattern passed by URL. Check for sanity of the wildcard by
removing wildcards if they are appended to extremely short words
(1-3 letters). TODO: instead of this approximative treatment, it
will be much better to introduce a temporal limit, e.g. to kill a
query if it does not finish in 10 seconds."""
# strip accents:
# p = strip_accents(p) # FIXME: when available, strip accents all the time
# add leading/trailing whitespace for the two following wildcard-sanity checking regexps:
p = " " + p + " "
# replace spaces within quotes by __SPACE__ temporarily:
p = re_pattern_single_quotes.sub(lambda x: "'"+string.replace(x.group(1), ' ', '__SPACE__')+"'", p)
p = re_pattern_double_quotes.sub(lambda x: "\""+string.replace(x.group(1), ' ', '__SPACE__')+"\"", p)
p = re_pattern_regexp_quotes.sub(lambda x: "/"+string.replace(x.group(1), ' ', '__SPACE__')+"/", p)
# get rid of unquoted wildcards after spaces:
p = re_pattern_wildcards_after_spaces.sub("\\1", p)
# get rid of extremely short words (1-3 letters with wildcards):
#p = re_pattern_short_words.sub("\\1", p)
# replace back __SPACE__ by spaces:
p = re_pattern_space.sub(" ", p)
# replace special terms:
p = re_pattern_today.sub(time.strftime("%Y-%m-%d", time.localtime()), p)
# remove unnecessary whitespace:
p = string.strip(p)
# remove potentially wrong UTF-8 characters:
p = wash_for_utf8(p)
return p
def wash_field(f):
"""Wash field passed by URL."""
if f:
# get rid of unnecessary whitespace and make it lowercase
# (e.g. Author -> author) to better suit iPhone etc input
# mode:
f = f.strip().lower()
# wash legacy 'f' field names, e.g. replace 'wau' or `au' by
# 'author', if applicable:
if CFG_WEBSEARCH_FIELDS_CONVERT:
f = CFG_WEBSEARCH_FIELDS_CONVERT.get(f, f)
return f
def wash_dates(d1="", d1y=0, d1m=0, d1d=0, d2="", d2y=0, d2m=0, d2d=0):
"""
Take user-submitted date arguments D1 (full datetime string) or
(D1Y, D1M, D1Y) year, month, day tuple and D2 or (D2Y, D2M, D2Y)
and return (YYY1-M1-D2 H1:M1:S2, YYY2-M2-D2 H2:M2:S2) datetime
strings in the YYYY-MM-DD HH:MM:SS format suitable for time
restricted searching.
Note that when both D1 and (D1Y, D1M, D1D) parameters are present,
the precedence goes to D1. Ditto for D2*.
Note that when (D1Y, D1M, D1D) are taken into account, some values
may be missing and are completed e.g. to 01 or 12 according to
whether it is the starting or the ending date.
"""
datetext1, datetext2 = "", ""
# sanity checking:
if d1 == "" and d1y == 0 and d1m == 0 and d1d == 0 and d2 == "" and d2y == 0 and d2m == 0 and d2d == 0:
return ("", "") # nothing selected, so return empty values
# wash first (starting) date:
if d1:
# full datetime string takes precedence:
datetext1 = d1
else:
# okay, first date passed as (year,month,day):
if d1y:
datetext1 += "%04d" % d1y
else:
datetext1 += "0000"
if d1m:
datetext1 += "-%02d" % d1m
else:
datetext1 += "-01"
if d1d:
datetext1 += "-%02d" % d1d
else:
datetext1 += "-01"
datetext1 += " 00:00:00"
# wash second (ending) date:
if d2:
# full datetime string takes precedence:
datetext2 = d2
else:
# okay, second date passed as (year,month,day):
if d2y:
datetext2 += "%04d" % d2y
else:
datetext2 += "9999"
if d2m:
datetext2 += "-%02d" % d2m
else:
datetext2 += "-12"
if d2d:
datetext2 += "-%02d" % d2d
else:
datetext2 += "-31" # NOTE: perhaps we should add max(datenumber) in
# given month, but for our quering it's not
# needed, 31 will always do
datetext2 += " 00:00:00"
# okay, return constructed YYYY-MM-DD HH:MM:SS datetexts:
return (datetext1, datetext2)
def is_hosted_collection(coll):
"""Check if the given collection is a hosted one; i.e. its dbquery starts with hostedcollection:
Returns True if it is, False if it's not or if the result is empty or if the query failed"""
res = run_sql("SELECT dbquery FROM collection WHERE name=%s", (coll, ))
try:
return res[0][0].startswith("hostedcollection:")
except:
return False
def get_colID(c):
"Return collection ID for collection name C. Return None if no match found."
colID = None
res = run_sql("SELECT id FROM collection WHERE name=%s", (c,), 1)
if res:
colID = res[0][0]
return colID
def get_coll_normalised_name(c):
"""Returns normalised collection name (case sensitive) for collection name
C (case insensitive).
Returns None if no match found."""
try:
return run_sql("SELECT name FROM collection WHERE name=%s", (c,))[0][0]
except:
return None
def get_coll_ancestors(coll):
"Returns a list of ancestors for collection 'coll'."
coll_ancestors = []
coll_ancestor = coll
while 1:
res = run_sql("""SELECT c.name FROM collection AS c
LEFT JOIN collection_collection AS cc ON c.id=cc.id_dad
LEFT JOIN collection AS ccc ON ccc.id=cc.id_son
WHERE ccc.name=%s ORDER BY cc.id_dad ASC LIMIT 1""",
(coll_ancestor,))
if res:
coll_name = res[0][0]
coll_ancestors.append(coll_name)
coll_ancestor = coll_name
else:
break
# ancestors found, return reversed list:
coll_ancestors.reverse()
return coll_ancestors
def get_coll_sons(coll, type='r', public_only=1):
"""Return a list of sons (first-level descendants) of type 'type' for collection 'coll'.
If public_only, then return only non-restricted son collections.
"""
coll_sons = []
query = "SELECT c.name FROM collection AS c "\
"LEFT JOIN collection_collection AS cc ON c.id=cc.id_son "\
"LEFT JOIN collection AS ccc ON ccc.id=cc.id_dad "\
"WHERE cc.type=%s AND ccc.name=%s"
query += " ORDER BY cc.score DESC"
res = run_sql(query, (type, coll))
for name in res:
if not public_only or not collection_restricted_p(name[0]):
coll_sons.append(name[0])
return coll_sons
class CollectionAllChildrenDataCacher(DataCacher):
"""Cache for all children of a collection (regular & virtual, public & private)"""
def __init__(self):
def cache_filler():
def get_all_children(coll, type='r', public_only=1):
"""Return a list of all children of type 'type' for collection 'coll'.
If public_only, then return only non-restricted child collections.
If type='*', then return both regular and virtual collections.
"""
children = []
if type == '*':
sons = get_coll_sons(coll, 'r', public_only) + get_coll_sons(coll, 'v', public_only)
else:
sons = get_coll_sons(coll, type, public_only)
for child in sons:
children.append(child)
children.extend(get_all_children(child, type, public_only))
return children
ret = {}
collections = collection_reclist_cache.cache.keys()
for collection in collections:
ret[collection] = get_all_children(collection, '*', public_only=0)
return ret
def timestamp_verifier():
return max(get_table_update_time('collection'), get_table_update_time('collection_collection'))
DataCacher.__init__(self, cache_filler, timestamp_verifier)
try:
if not collection_allchildren_cache.is_ok_p:
raise Exception
except Exception:
collection_allchildren_cache = CollectionAllChildrenDataCacher()
def get_collection_allchildren(coll, recreate_cache_if_needed=True):
"""Returns the list of all children of a collection."""
if recreate_cache_if_needed:
collection_allchildren_cache.recreate_cache_if_needed()
if coll not in collection_allchildren_cache.cache:
return [] # collection does not exist; return empty list
return collection_allchildren_cache.cache[coll]
def get_coll_real_descendants(coll, type='_', get_hosted_colls=True):
"""Return a list of all descendants of collection 'coll' that are defined by a 'dbquery'.
IOW, we need to decompose compound collections like "A & B" into "A" and "B" provided
that "A & B" has no associated database query defined.
"""
coll_sons = []
res = run_sql("""SELECT c.name,c.dbquery FROM collection AS c
LEFT JOIN collection_collection AS cc ON c.id=cc.id_son
LEFT JOIN collection AS ccc ON ccc.id=cc.id_dad
WHERE ccc.name=%s AND cc.type LIKE %s ORDER BY cc.score DESC""",
(coll, type,))
for name, dbquery in res:
if dbquery: # this is 'real' collection, so return it:
if get_hosted_colls:
coll_sons.append(name)
else:
if not dbquery.startswith("hostedcollection:"):
coll_sons.append(name)
else: # this is 'composed' collection, so recurse:
coll_sons.extend(get_coll_real_descendants(name))
return coll_sons
def browse_pattern_phrases(req, colls, p, f, rg, ln=CFG_SITE_LANG):
"""Returns either biliographic phrases or words indexes."""
## is p enclosed in quotes? (coming from exact search)
if p.startswith('"') and p.endswith('"'):
p = p[1:-1]
p_orig = p
## okay, "real browse" follows:
## FIXME: the maths in the get_nearest_terms_in_bibxxx is just a test
if not f and string.find(p, ":") > 0: # does 'p' contain ':'?
f, p = string.split(p, ":", 1)
## do we search in words indexes?
# FIXME uncomment this
#if not f:
# return browse_in_bibwords(req, p, f)
coll_hitset = intbitset()
for coll_name in colls:
coll_hitset |= get_collection_reclist(coll_name)
index_id = get_index_id_from_field(f)
if index_id != 0:
browsed_phrases_in_colls = get_nearest_terms_in_idxphrase_with_collection(p, index_id, rg/2, rg/2, coll_hitset)
else:
browsed_phrases = get_nearest_terms_in_bibxxx(p, f, (rg+1)/2+1, (rg-1)/2+1)
while not browsed_phrases:
# try again and again with shorter and shorter pattern:
try:
p = p[:-1]
browsed_phrases = get_nearest_terms_in_bibxxx(p, f, (rg+1)/2+1, (rg-1)/2+1)
except:
# probably there are no hits at all:
#req.write(_("No values found."))
return []
## try to check hits in these particular collection selection:
browsed_phrases_in_colls = []
if 0:
for phrase in browsed_phrases:
phrase_hitset = intbitset()
phrase_hitsets = search_pattern("", phrase, f, 'e')
for coll in colls:
phrase_hitset.union_update(phrase_hitsets[coll])
if len(phrase_hitset) > 0:
# okay, this phrase has some hits in colls, so add it:
browsed_phrases_in_colls.append([phrase, len(phrase_hitset)])
## were there hits in collections?
if browsed_phrases_in_colls == []:
if browsed_phrases != []:
#write_warning(req, """<p>No match close to <em>%s</em> found in given collections.
#Please try different term.<p>Displaying matches in any collection...""" % p_orig)
## try to get nbhits for these phrases in any collection:
for phrase in browsed_phrases:
nbhits = get_nbhits_in_bibxxx(phrase, f, coll_hitset)
if nbhits > 0:
browsed_phrases_in_colls.append([phrase, nbhits])
return browsed_phrases_in_colls
def browse_pattern(req, colls, p, f, rg, ln=CFG_SITE_LANG):
"""Displays either biliographic phrases or words indexes."""
# load the right message language
_ = gettext_set_language(ln)
browsed_phrases_in_colls = browse_pattern_phrases(req, colls, p, f, rg, ln)
if len(browsed_phrases_in_colls) == 0:
req.write(_("No values found."))
return
## display results now:
out = websearch_templates.tmpl_browse_pattern(
f=f,
fn=get_field_i18nname(get_field_name(f) or f, ln, False),
ln=ln,
browsed_phrases_in_colls=browsed_phrases_in_colls,
colls=colls,
rg=rg,
)
req.write(out)
return
def browse_in_bibwords(req, p, f, ln=CFG_SITE_LANG):
"""Browse inside words indexes."""
if not p:
return
_ = gettext_set_language(ln)
urlargd = {}
urlargd.update(req.argd)
urlargd['action'] = 'search'
nearest_box = create_nearest_terms_box(urlargd, p, f, 'w', ln=ln, intro_text_p=0)
req.write(websearch_templates.tmpl_search_in_bibwords(
p = p,
f = f,
ln = ln,
nearest_box = nearest_box
))
return
def search_pattern(req=None, p=None, f=None, m=None, ap=0, of="id", verbose=0, ln=CFG_SITE_LANG, display_nearest_terms_box=True, wl=0):
"""Search for complex pattern 'p' within field 'f' according to
matching type 'm'. Return hitset of recIDs.
The function uses multi-stage searching algorithm in case of no
exact match found. See the Search Internals document for
detailed description.
The 'ap' argument governs whether an alternative patterns are to
be used in case there is no direct hit for (p,f,m). For
example, whether to replace non-alphanumeric characters by
spaces if it would give some hits. See the Search Internals
document for detailed description. (ap=0 forbits the
alternative pattern usage, ap=1 permits it.)
'ap' is also internally used for allowing hidden tag search
(for requests coming from webcoll, for example). In this
case ap=-9
The 'of' argument governs whether to print or not some
information to the user in case of no match found. (Usually it
prints the information in case of HTML formats, otherwise it's
silent).
The 'verbose' argument controls the level of debugging information
to be printed (0=least, 9=most).
All the parameters are assumed to have been previously washed.
This function is suitable as a mid-level API.
"""
_ = gettext_set_language(ln)
hitset_empty = intbitset()
# sanity check:
if not p:
hitset_full = intbitset(trailing_bits=1)
hitset_full.discard(0)
# no pattern, so return all universe
return hitset_full
# search stage 1: break up arguments into basic search units:
if verbose and of.startswith("h"):
t1 = os.times()[4]
basic_search_units = create_basic_search_units(req, p, f, m, of)
if verbose and of.startswith("h"):
t2 = os.times()[4]
write_warning("Search stage 1: basic search units are: %s" % cgi.escape(repr(basic_search_units)), req=req)
write_warning("Search stage 1: execution took %.2f seconds." % (t2 - t1), req=req)
# search stage 2: do search for each search unit and verify hit presence:
if verbose and of.startswith("h"):
t1 = os.times()[4]
basic_search_units_hitsets = []
#prepare hiddenfield-related..
myhiddens = CFG_BIBFORMAT_HIDDEN_TAGS
can_see_hidden = False
if req:
user_info = collect_user_info(req)
can_see_hidden = user_info.get('precached_canseehiddenmarctags', False)
if not req and ap == -9: # special request, coming from webcoll
can_see_hidden = True
if can_see_hidden:
myhiddens = []
if CFG_INSPIRE_SITE and of.startswith('h'):
# fulltext/caption search warnings for INSPIRE:
fields_to_be_searched = [f for o, p, f, m in basic_search_units]
if 'fulltext' in fields_to_be_searched:
write_warning( _("Warning: full-text search is only available for a subset of papers mostly from %(x_range_from_year)s-%(x_range_to_year)s.") % \
{'x_range_from_year': '2006',
'x_range_to_year': '2012'}, req=req)
elif 'caption' in fields_to_be_searched:
write_warning(_("Warning: figure caption search is only available for a subset of papers mostly from %(x_range_from_year)s-%(x_range_to_year)s.") % \
{'x_range_from_year': '2008',
'x_range_to_year': '2012'}, req=req)
for idx_unit in xrange(len(basic_search_units)):
bsu_o, bsu_p, bsu_f, bsu_m = basic_search_units[idx_unit]
if bsu_f and len(bsu_f) < 2:
if of.startswith("h"):
write_warning(_("There is no index %s. Searching for %s in all fields." % (bsu_f, bsu_p)), req=req)
bsu_f = ''
bsu_m = 'w'
if of.startswith("h") and verbose:
write_warning(_('Instead searching %s.' % str([bsu_o, bsu_p, bsu_f, bsu_m])), req=req)
try:
basic_search_unit_hitset = search_unit(bsu_p, bsu_f, bsu_m, wl)
except InvenioWebSearchWildcardLimitError, excp:
basic_search_unit_hitset = excp.res
if of.startswith("h"):
write_warning(_("Search term too generic, displaying only partial results..."), req=req)
# FIXME: print warning if we use native full-text indexing
if bsu_f == 'fulltext' and bsu_m != 'w' and of.startswith('h') and not CFG_SOLR_URL:
write_warning(_("No phrase index available for fulltext yet, looking for word combination..."), req=req)
#check that the user is allowed to search with this tag
#if he/she tries it
if bsu_f and len(bsu_f) > 1 and bsu_f[0].isdigit() and bsu_f[1].isdigit():
for htag in myhiddens:
ltag = len(htag)
samelenfield = bsu_f[0:ltag]
if samelenfield == htag: #user searches by a hidden tag
#we won't show you anything..
basic_search_unit_hitset = intbitset()
if verbose >= 9 and of.startswith("h"):
write_warning("Pattern %s hitlist omitted since \
it queries in a hidden tag %s" %
(cgi.escape(repr(bsu_p)), repr(myhiddens)), req=req)
display_nearest_terms_box = False #..and stop spying, too.
if verbose >= 9 and of.startswith("h"):
write_warning("Search stage 1: pattern %s gave hitlist %s" % (cgi.escape(bsu_p), basic_search_unit_hitset), req=req)
if len(basic_search_unit_hitset) > 0 or \
ap<1 or \
bsu_o=="|" or \
((idx_unit+1)<len(basic_search_units) and basic_search_units[idx_unit+1][0]=="|"):
# stage 2-1: this basic search unit is retained, since
# either the hitset is non-empty, or the approximate
# pattern treatment is switched off, or the search unit
# was joined by an OR operator to preceding/following
# units so we do not require that it exists
basic_search_units_hitsets.append(basic_search_unit_hitset)
else:
# stage 2-2: no hits found for this search unit, try to replace non-alphanumeric chars inside pattern:
if re.search(r'[^a-zA-Z0-9\s\:]', bsu_p) and bsu_f != 'refersto' and bsu_f != 'citedby':
if bsu_p.startswith('"') and bsu_p.endswith('"'): # is it ACC query?
bsu_pn = re.sub(r'[^a-zA-Z0-9\s\:]+', "*", bsu_p)
else: # it is WRD query
bsu_pn = re.sub(r'[^a-zA-Z0-9\s\:]+', " ", bsu_p)
if verbose and of.startswith('h') and req:
write_warning("Trying (%s,%s,%s)" % (cgi.escape(bsu_pn), cgi.escape(bsu_f), cgi.escape(bsu_m)), req=req)
basic_search_unit_hitset = search_pattern(req=None, p=bsu_pn, f=bsu_f, m=bsu_m, of="id", ln=ln, wl=wl)
if len(basic_search_unit_hitset) > 0:
# we retain the new unit instead
if of.startswith('h'):
write_warning(_("No exact match found for %(x_query1)s, using %(x_query2)s instead...") % \
{'x_query1': "<em>" + cgi.escape(bsu_p) + "</em>",
'x_query2': "<em>" + cgi.escape(bsu_pn) + "</em>"}, req=req)
basic_search_units[idx_unit][1] = bsu_pn
basic_search_units_hitsets.append(basic_search_unit_hitset)
else:
# stage 2-3: no hits found either, propose nearest indexed terms:
if of.startswith('h') and display_nearest_terms_box:
if req:
if bsu_f == "recid":
write_warning(_("Requested record does not seem to exist."), req=req)
else:
write_warning(create_nearest_terms_box(req.argd, bsu_p, bsu_f, bsu_m, ln=ln), req=req)
return hitset_empty
else:
# stage 2-3: no hits found either, propose nearest indexed terms:
if of.startswith('h') and display_nearest_terms_box:
if req:
if bsu_f == "recid":
write_warning(_("Requested record does not seem to exist."), req=req)
else:
write_warning(create_nearest_terms_box(req.argd, bsu_p, bsu_f, bsu_m, ln=ln), req=req)
return hitset_empty
if verbose and of.startswith("h"):
t2 = os.times()[4]
for idx_unit in range(0, len(basic_search_units)):
write_warning("Search stage 2: basic search unit %s gave %d hits." %
(basic_search_units[idx_unit][1:], len(basic_search_units_hitsets[idx_unit])), req=req)
write_warning("Search stage 2: execution took %.2f seconds." % (t2 - t1), req=req)
# search stage 3: apply boolean query for each search unit:
if verbose and of.startswith("h"):
t1 = os.times()[4]
# let the initial set be the complete universe:
hitset_in_any_collection = intbitset(trailing_bits=1)
hitset_in_any_collection.discard(0)
for idx_unit in xrange(len(basic_search_units)):
this_unit_operation = basic_search_units[idx_unit][0]
this_unit_hitset = basic_search_units_hitsets[idx_unit]
if this_unit_operation == '+':
hitset_in_any_collection.intersection_update(this_unit_hitset)
elif this_unit_operation == '-':
hitset_in_any_collection.difference_update(this_unit_hitset)
elif this_unit_operation == '|':
hitset_in_any_collection.union_update(this_unit_hitset)
else:
if of.startswith("h"):
write_warning("Invalid set operation %s." % cgi.escape(this_unit_operation), "Error", req=req)
if len(hitset_in_any_collection) == 0:
# no hits found, propose alternative boolean query:
if of.startswith('h') and display_nearest_terms_box:
nearestterms = []
for idx_unit in range(0, len(basic_search_units)):
bsu_o, bsu_p, bsu_f, bsu_m = basic_search_units[idx_unit]
if bsu_p.startswith("%") and bsu_p.endswith("%"):
bsu_p = "'" + bsu_p[1:-1] + "'"
bsu_nbhits = len(basic_search_units_hitsets[idx_unit])
# create a similar query, but with the basic search unit only
argd = {}
argd.update(req.argd)
argd['p'] = bsu_p
argd['f'] = bsu_f
nearestterms.append((bsu_p, bsu_nbhits, argd))
text = websearch_templates.tmpl_search_no_boolean_hits(
ln=ln, nearestterms=nearestterms)
write_warning(text, req=req)
if verbose and of.startswith("h"):
t2 = os.times()[4]
write_warning("Search stage 3: boolean query gave %d hits." % len(hitset_in_any_collection), req=req)
write_warning("Search stage 3: execution took %.2f seconds." % (t2 - t1), req=req)
return hitset_in_any_collection
def search_pattern_parenthesised(req=None, p=None, f=None, m=None, ap=0, of="id", verbose=0, ln=CFG_SITE_LANG, display_nearest_terms_box=True, wl=0):
"""Search for complex pattern 'p' containing parenthesis within field 'f' according to
matching type 'm'. Return hitset of recIDs.
For more details on the parameters see 'search_pattern'
"""
_ = gettext_set_language(ln)
spires_syntax_converter = SpiresToInvenioSyntaxConverter()
spires_syntax_query = False
# if the pattern uses SPIRES search syntax, convert it to Invenio syntax
if spires_syntax_converter.is_applicable(p):
spires_syntax_query = True
p = spires_syntax_converter.convert_query(p)
# sanity check: do not call parenthesised parser for search terms
# like U(1) but still call it for searches like ('U(1)' | 'U(2)'):
if not re_pattern_parens.search(re_pattern_parens_quotes.sub('_', p)):
return search_pattern(req, p, f, m, ap, of, verbose, ln, display_nearest_terms_box=display_nearest_terms_box, wl=wl)
# Try searching with parentheses
try:
parser = SearchQueryParenthesisedParser()
# get a hitset with all recids
result_hitset = intbitset(trailing_bits=1)
# parse the query. The result is list of [op1, expr1, op2, expr2, ..., opN, exprN]
parsing_result = parser.parse_query(p)
if verbose and of.startswith("h"):
write_warning("Search stage 1: search_pattern_parenthesised() searched %s." % repr(p), req=req)
write_warning("Search stage 1: search_pattern_parenthesised() returned %s." % repr(parsing_result), req=req)
# go through every pattern
# calculate hitset for it
# combine pattern's hitset with the result using the corresponding operator
for index in xrange(0, len(parsing_result)-1, 2 ):
current_operator = parsing_result[index]
current_pattern = parsing_result[index+1]
if CFG_INSPIRE_SITE and spires_syntax_query:
# setting ap=0 to turn off approximate matching for 0 results.
# Doesn't work well in combinations.
# FIXME: The right fix involves collecting statuses for each
# hitset, then showing a nearest terms box exactly once,
# outside this loop.
ap = 0
display_nearest_terms_box = False
# obtain a hitset for the current pattern
current_hitset = search_pattern(req, current_pattern, f, m, ap, of, verbose, ln, display_nearest_terms_box=display_nearest_terms_box, wl=wl)
# combine the current hitset with resulting hitset using the current operator
if current_operator == '+':
result_hitset = result_hitset & current_hitset
elif current_operator == '-':
result_hitset = result_hitset - current_hitset
elif current_operator == '|':
result_hitset = result_hitset | current_hitset
else:
assert False, "Unknown operator in search_pattern_parenthesised()"
return result_hitset
# If searching with parenteses fails, perform search ignoring parentheses
except SyntaxError:
write_warning(_("Search syntax misunderstood. Ignoring all parentheses in the query. If this doesn't help, please check your search and try again."), req=req)
# remove the parentheses in the query. Current implementation removes all the parentheses,
# but it could be improved to romove only these that are not inside quotes
p = p.replace('(', ' ')
p = p.replace(')', ' ')
return search_pattern(req, p, f, m, ap, of, verbose, ln, display_nearest_terms_box=display_nearest_terms_box, wl=wl)
def search_unit(p, f=None, m=None, wl=0, ignore_synonyms=None):
"""Search for basic search unit defined by pattern 'p' and field
'f' and matching type 'm'. Return hitset of recIDs.
All the parameters are assumed to have been previously washed.
'p' is assumed to be already a ``basic search unit'' so that it
is searched as such and is not broken up in any way. Only
wildcard and span queries are being detected inside 'p'.
If CFG_WEBSEARCH_SYNONYM_KBRS is set and we are searching in
one of the indexes that has defined runtime synonym knowledge
base, then look up there and automatically enrich search
results with results for synonyms.
In case the wildcard limit (wl) is greater than 0 and this limit
is reached an InvenioWebSearchWildcardLimitError will be raised.
In case you want to call this function with no limit for the
wildcard queries, wl should be 0.
Parameter 'ignore_synonyms' is a list of terms for which we
should not try to further find a synonym.
This function is suitable as a low-level API.
"""
## create empty output results set:
hitset = intbitset()
if not p: # sanity checking
return hitset
tokenizer = get_field_tokenizer_type(f)
hitset_cjk = intbitset()
if tokenizer == "BibIndexCJKTokenizer":
if is_there_any_CJK_character_in_text(p):
cjk_tok = BibIndexCJKTokenizer()
chars = cjk_tok.tokenize_for_words(p)
for char in chars:
hitset_cjk |= search_unit_in_bibwords(char, f, m, wl)
## eventually look up runtime synonyms:
hitset_synonyms = intbitset()
if CFG_WEBSEARCH_SYNONYM_KBRS.has_key(f):
if ignore_synonyms is None:
ignore_synonyms = []
ignore_synonyms.append(p)
for p_synonym in get_synonym_terms(p,
CFG_WEBSEARCH_SYNONYM_KBRS[f][0],
CFG_WEBSEARCH_SYNONYM_KBRS[f][1]):
if p_synonym != p and \
not p_synonym in ignore_synonyms:
hitset_synonyms |= search_unit(p_synonym, f, m, wl,
ignore_synonyms)
## look up hits:
if f == 'fulltext' and get_idx_indexer('fulltext') == 'SOLR' and CFG_SOLR_URL:
# redirect to Solr
try:
return search_unit_in_solr(p, f, m)
except:
# There were troubles with getting full-text search
# results from Solr. Let us alert the admin of these
# problems and let us simply return empty results to the
# end user.
register_exception()
return hitset
elif f == 'fulltext' and get_idx_indexer('fulltext') == 'XAPIAN' and CFG_XAPIAN_ENABLED:
# redirect to Xapian
try:
return search_unit_in_xapian(p, f, m)
except:
# There were troubles with getting full-text search
# results from Xapian. Let us alert the admin of these
# problems and let us simply return empty results to the
# end user.
register_exception()
return hitset
if f == 'datecreated':
hitset = search_unit_in_bibrec(p, p, 'c')
elif f == 'datemodified':
hitset = search_unit_in_bibrec(p, p, 'm')
elif f == 'refersto':
# we are doing search by the citation count
hitset = search_unit_refersto(p)
elif f == 'rawref':
from invenio.refextract_api import search_from_reference
field, pattern = search_from_reference(p)
return search_unit(pattern, field)
elif f == 'citedby':
# we are doing search by the citation count
hitset = search_unit_citedby(p)
elif f == 'collection':
# we are doing search by the collection name or MARC field
hitset = search_unit_collection(p, m, wl=wl)
elif f == 'tag':
module_found = False
try:
from invenio.modules.tags.search_units import search_unit_in_tags
module_found = True
except:
# WebTag module is disabled, so ignore 'tag' selector
pass
if module_found:
return search_unit_in_tags(p)
elif m == 'a' or m == 'r':
# we are doing either phrase search or regexp search
if f == 'fulltext':
# FIXME: workaround for not having phrase index yet
return search_pattern(None, p, f, 'w')
index_id = get_index_id_from_field(f)
if index_id != 0:
if m == 'a' and index_id in get_idxpair_field_ids():
#for exact match on the admin configured fields we are searching in the pair tables
hitset = search_unit_in_idxpairs(p, f, m, wl)
else:
hitset = search_unit_in_idxphrases(p, f, m, wl)
else:
hitset = search_unit_in_bibxxx(p, f, m, wl)
# if not hitset and m == 'a' and (p[0] != '%' and p[-1] != '%'):
# #if we have no results by doing exact matching, do partial matching
# #for removing the distinction between simple and double quotes
# hitset = search_unit_in_bibxxx('%' + p + '%', f, m, wl)
elif p.startswith("cited:"):
# we are doing search by the citation count
hitset = search_unit_by_times_cited(p[6:])
else:
# we are doing bibwords search by default
hitset = search_unit_in_bibwords(p, f, m, wl=wl)
## merge synonym results and return total:
hitset |= hitset_synonyms
hitset |= hitset_cjk
return hitset
def get_idxpair_field_ids():
"""Returns the list of ids for the fields that idxPAIRS should be used on"""
index_dict = dict(run_sql("SELECT name, id FROM idxINDEX"))
return [index_dict[field] for field in index_dict if field in cfg['CFG_WEBSEARCH_IDXPAIRS_FIELDS']]
def search_unit_in_bibwords(word, f, m=None, decompress=zlib.decompress, wl=0):
"""Searches for 'word' inside bibwordsX table for field 'f' and returns hitset of recIDs."""
set = intbitset() # will hold output result set
set_used = 0 # not-yet-used flag, to be able to circumvent set operations
limit_reached = 0 # flag for knowing if the query limit has been reached
# if no field is specified, search in the global index.
f = f or 'anyfield'
index_id = get_index_id_from_field(f)
if index_id:
bibwordsX = "idxWORD%02dF" % index_id
stemming_language = get_index_stemming_language(index_id)
else:
return intbitset() # word index f does not exist
# wash 'word' argument and run query:
if f.endswith('count') and word.endswith('+'):
# field count query of the form N+ so transform N+ to N->99999:
word = word[:-1] + '->99999'
word = string.replace(word, '*', '%') # we now use '*' as the truncation character
words = string.split(word, "->", 1) # check for span query
if len(words) == 2:
word0 = re_word.sub('', words[0])
word1 = re_word.sub('', words[1])
if stemming_language:
word0 = lower_index_term(word0)
word1 = lower_index_term(word1)
word0 = stem(word0, stemming_language)
word1 = stem(word1, stemming_language)
word0_washed = wash_index_term(word0)
word1_washed = wash_index_term(word1)
if f.endswith('count'):
# field count query; convert to integers in order
# to have numerical behaviour for 'BETWEEN n1 AND n2' query
try:
word0_washed = int(word0_washed)
word1_washed = int(word1_washed)
except ValueError:
pass
try:
res = run_sql_with_limit("SELECT term,hitlist FROM %s WHERE term BETWEEN %%s AND %%s" % bibwordsX,
(word0_washed, word1_washed), wildcard_limit = wl)
except InvenioDbQueryWildcardLimitError, excp:
res = excp.res
limit_reached = 1 # set the limit reached flag to true
else:
if f == 'journal':
pass # FIXME: quick hack for the journal index
else:
word = re_word.sub('', word)
if stemming_language:
word = lower_index_term(word)
word = stem(word, stemming_language)
if string.find(word, '%') >= 0: # do we have wildcard in the word?
if f == 'journal':
# FIXME: quick hack for the journal index
# FIXME: we can run a sanity check here for all indexes
res = ()
else:
try:
res = run_sql_with_limit("SELECT term,hitlist FROM %s WHERE term LIKE %%s" % bibwordsX,
(wash_index_term(word),), wildcard_limit = wl)
except InvenioDbQueryWildcardLimitError, excp:
res = excp.res
limit_reached = 1 # set the limit reached flag to true
else:
res = run_sql("SELECT term,hitlist FROM %s WHERE term=%%s" % bibwordsX,
(wash_index_term(word),))
# fill the result set:
for word, hitlist in res:
hitset_bibwrd = intbitset(hitlist)
# add the results:
if set_used:
set.union_update(hitset_bibwrd)
else:
set = hitset_bibwrd
set_used = 1
#check to see if the query limit was reached
if limit_reached:
#raise an exception, so we can print a nice message to the user
raise InvenioWebSearchWildcardLimitError(set)
# okay, return result set:
return set
def search_unit_in_idxpairs(p, f, type, wl=0):
"""Searches for pair 'p' inside idxPAIR table for field 'f' and
returns hitset of recIDs found."""
limit_reached = 0 # flag for knowing if the query limit has been reached
do_exact_search = True # flag to know when it makes sense to try to do exact matching
result_set = intbitset()
#determine the idxPAIR table to read from
index_id = get_index_id_from_field(f)
if not index_id:
return intbitset()
stemming_language = get_index_stemming_language(index_id)
pairs_tokenizer = BibIndexDefaultTokenizer(stemming_language)
idxpair_table_washed = wash_table_column_name("idxPAIR%02dF" % index_id)
if p.startswith("%") and p.endswith("%"):
p = p[1:-1]
original_pattern = p
p = string.replace(p, '*', '%') # we now use '*' as the truncation character
queries_releated_vars = [] # contains tuples of (query_addons, query_params, use_query_limit)
#is it a span query?
ps = string.split(p, "->", 1)
if len(ps) == 2 and not (ps[0].endswith(' ') or ps[1].startswith(' ')):
#so we are dealing with a span query
pairs_left = pairs_tokenizer.tokenize_for_pairs(ps[0])
pairs_right = pairs_tokenizer.tokenize_for_pairs(ps[1])
if not pairs_left or not pairs_right:
# we are not actually dealing with pairs but with words
return search_unit_in_bibwords(original_pattern, f, type, wl)
elif len(pairs_left) != len(pairs_right):
# it is kind of hard to know what the user actually wanted
# we have to do: foo bar baz -> qux xyz, so let's swith to phrase
return search_unit_in_idxphrases(original_pattern, f, type, wl)
elif len(pairs_left) > 1 and \
len(pairs_right) > 1 and \
pairs_left[:-1] != pairs_right[:-1]:
# again we have something like: foo bar baz -> abc xyz qux
# so we'd better switch to phrase
return search_unit_in_idxphrases(original_pattern, f, type, wl)
else:
# finally, we can treat the search using idxPairs
# at this step we have either: foo bar -> abc xyz
# or foo bar abc -> foo bar xyz
queries_releated_vars = [("BETWEEN %s AND %s", (pairs_left[-1], pairs_right[-1]), True)]
for pair in pairs_left[:-1]:# which should be equal with pairs_right[:-1]
queries_releated_vars.append(("= %s", (pair, ), False))
do_exact_search = False # no exact search for span queries
elif string.find(p, '%') > -1:
#tokenizing p will remove the '%', so we have to make sure it stays
replacement = 'xxxxxxxxxx' #hopefuly this will not clash with anything in the future
p = string.replace(p, '%', replacement)
pairs = pairs_tokenizer.tokenize_for_pairs(p)
if not pairs:
# we are not actually dealing with pairs but with words
return search_unit_in_bibwords(original_pattern, f, type, wl)
queries_releated_vars = []
for pair in pairs:
if string.find(pair, replacement) > -1:
pair = string.replace(pair, replacement, '%') #we replace back the % sign
queries_releated_vars.append(("LIKE %s", (pair, ), True))
else:
queries_releated_vars.append(("= %s", (pair, ), False))
do_exact_search = False
else:
#normal query
pairs = pairs_tokenizer.tokenize_for_pairs(p)
if not pairs:
# we are not actually dealing with pairs but with words
return search_unit_in_bibwords(original_pattern, f, type, wl)
queries_releated_vars = []
for pair in pairs:
queries_releated_vars.append(("= %s", (pair, ), False))
first_results = 1 # flag to know if it's the first set of results or not
for query_var in queries_releated_vars:
query_addons = query_var[0]
query_params = query_var[1]
use_query_limit = query_var[2]
if use_query_limit:
try:
res = run_sql_with_limit("SELECT term, hitlist FROM %s WHERE term %s" \
% (idxpair_table_washed, query_addons), query_params, wildcard_limit=wl) #kwalitee:disable=sql
except InvenioDbQueryWildcardLimitError, excp:
res = excp.res
limit_reached = 1 # set the limit reached flag to true
else:
res = run_sql("SELECT term, hitlist FROM %s WHERE term %s" \
% (idxpair_table_washed, query_addons), query_params) #kwalitee:disable=sql
if not res:
return intbitset()
for pair, hitlist in res:
hitset_idxpairs = intbitset(hitlist)
if first_results:
result_set = hitset_idxpairs
first_results = 0
else:
result_set.intersection_update(hitset_idxpairs)
#check to see if the query limit was reached
if limit_reached:
#raise an exception, so we can print a nice message to the user
raise InvenioWebSearchWildcardLimitError(result_set)
# check if we need to eliminate the false positives
if cfg['CFG_WEBSEARCH_IDXPAIRS_EXACT_SEARCH'] and do_exact_search:
# we need to eliminate the false positives
idxphrase_table_washed = wash_table_column_name("idxPHRASE%02dR" % index_id)
not_exact_search = intbitset()
for recid in result_set:
res = run_sql("SELECT termlist FROM %s WHERE id_bibrec %s" %(idxphrase_table_washed, '=%s'), (recid, )) #kwalitee:disable=sql
if res:
termlist = deserialize_via_marshal(res[0][0])
if not [term for term in termlist if term.lower().find(p.lower()) > -1]:
not_exact_search.add(recid)
else:
not_exact_search.add(recid)
# remove the recs that are false positives from the final result
result_set.difference_update(not_exact_search)
return result_set
def search_unit_in_idxphrases(p, f, type, wl=0):
"""Searches for phrase 'p' inside idxPHRASE*F table for field 'f' and returns hitset of recIDs found.
The search type is defined by 'type' (e.g. equals to 'r' for a regexp search)."""
# call word search method in some cases:
if f.endswith('count'):
return search_unit_in_bibwords(p, f, wl=wl)
set = intbitset() # will hold output result set
set_used = 0 # not-yet-used flag, to be able to circumvent set operations
limit_reached = 0 # flag for knowing if the query limit has been reached
use_query_limit = False # flag for knowing if to limit the query results or not
# deduce in which idxPHRASE table we will search:
idxphraseX = "idxPHRASE%02dF" % get_index_id_from_field("anyfield")
if f:
index_id = get_index_id_from_field(f)
if index_id:
idxphraseX = "idxPHRASE%02dF" % index_id
else:
return intbitset() # phrase index f does not exist
# detect query type (exact phrase, partial phrase, regexp):
if type == 'r':
query_addons = "REGEXP %s"
query_params = (p,)
use_query_limit = True
else:
p = string.replace(p, '*', '%') # we now use '*' as the truncation character
ps = string.split(p, "->", 1) # check for span query:
if len(ps) == 2 and not (ps[0].endswith(' ') or ps[1].startswith(' ')):
query_addons = "BETWEEN %s AND %s"
query_params = (ps[0], ps[1])
use_query_limit = True
else:
if string.find(p, '%') > -1:
query_addons = "LIKE %s"
query_params = (p,)
use_query_limit = True
else:
query_addons = "= %s"
query_params = (p,)
# special washing for fuzzy author index:
if f in ('author', 'firstauthor', 'exactauthor', 'exactfirstauthor', 'authorityauthor'):
query_params_washed = ()
for query_param in query_params:
query_params_washed += (wash_author_name(query_param),)
query_params = query_params_washed
# perform search:
if use_query_limit:
try:
res = run_sql_with_limit("SELECT term,hitlist FROM %s WHERE term %s" % (idxphraseX, query_addons),
query_params, wildcard_limit=wl)
except InvenioDbQueryWildcardLimitError, excp:
res = excp.res
limit_reached = 1 # set the limit reached flag to true
else:
res = run_sql("SELECT term,hitlist FROM %s WHERE term %s" % (idxphraseX, query_addons), query_params)
# fill the result set:
for word, hitlist in res:
hitset_bibphrase = intbitset(hitlist)
# add the results:
if set_used:
set.union_update(hitset_bibphrase)
else:
set = hitset_bibphrase
set_used = 1
#check to see if the query limit was reached
if limit_reached:
#raise an exception, so we can print a nice message to the user
raise InvenioWebSearchWildcardLimitError(set)
# okay, return result set:
return set
def search_unit_in_bibxxx(p, f, type, wl=0):
"""Searches for pattern 'p' inside bibxxx tables for field 'f' and returns hitset of recIDs found.
The search type is defined by 'type' (e.g. equals to 'r' for a regexp search)."""
# call word search method in some cases:
if f == 'journal' or f.endswith('count'):
return search_unit_in_bibwords(p, f, wl=wl)
p_orig = p # saving for eventual future 'no match' reporting
limit_reached = 0 # flag for knowing if the query limit has been reached
use_query_limit = False # flag for knowing if to limit the query results or not
query_addons = "" # will hold additional SQL code for the query
query_params = () # will hold parameters for the query (their number may vary depending on TYPE argument)
# wash arguments:
f = string.replace(f, '*', '%') # replace truncation char '*' in field definition
if type == 'r':
query_addons = "REGEXP %s"
query_params = (p,)
use_query_limit = True
else:
p = string.replace(p, '*', '%') # we now use '*' as the truncation character
ps = string.split(p, "->", 1) # check for span query:
if len(ps) == 2 and not (ps[0].endswith(' ') or ps[1].startswith(' ')):
query_addons = "BETWEEN %s AND %s"
query_params = (ps[0], ps[1])
use_query_limit = True
else:
if string.find(p, '%') > -1:
query_addons = "LIKE %s"
query_params = (p,)
use_query_limit = True
else:
query_addons = "= %s"
query_params = (p,)
# construct 'tl' which defines the tag list (MARC tags) to search in:
tl = []
if len(f) >= 2 and str(f[0]).isdigit() and str(f[1]).isdigit():
tl.append(f) # 'f' seems to be okay as it starts by two digits
else:
# deduce desired MARC tags on the basis of chosen 'f'
tl = get_field_tags(f)
if not tl:
# f index does not exist, nevermind
pass
# okay, start search:
l = [] # will hold list of recID that matched
for t in tl:
# deduce into which bibxxx table we will search:
digit1, digit2 = int(t[0]), int(t[1])
bx = "bib%d%dx" % (digit1, digit2)
bibx = "bibrec_bib%d%dx" % (digit1, digit2)
# construct and run query:
if t == "001":
if query_addons.find('BETWEEN') > -1 or query_addons.find('=') > -1:
# verify that the params are integers (to avoid returning record 123 when searching for 123foo)
try:
query_params = tuple(int(param) for param in query_params)
except ValueError:
return intbitset()
if use_query_limit:
try:
res = run_sql_with_limit("SELECT id FROM bibrec WHERE id %s" % query_addons,
query_params, wildcard_limit=wl)
except InvenioDbQueryWildcardLimitError, excp:
res = excp.res
limit_reached = 1 # set the limit reached flag to true
else:
res = run_sql("SELECT id FROM bibrec WHERE id %s" % query_addons,
query_params)
else:
query = "SELECT bibx.id_bibrec FROM %s AS bx LEFT JOIN %s AS bibx ON bx.id=bibx.id_bibxxx WHERE bx.value %s" % \
(bx, bibx, query_addons)
if len(t) != 6 or t[-1:]=='%':
# wildcard query, or only the beginning of field 't'
# is defined, so add wildcard character:
query += " AND bx.tag LIKE %s"
query_params_and_tag = query_params + (t + '%',)
else:
# exact query for 't':
query += " AND bx.tag=%s"
query_params_and_tag = query_params + (t,)
if use_query_limit:
try:
res = run_sql_with_limit(query, query_params_and_tag, wildcard_limit=wl)
except InvenioDbQueryWildcardLimitError, excp:
res = excp.res
limit_reached = 1 # set the limit reached flag to true
else:
res = run_sql(query, query_params_and_tag)
# fill the result set:
for id_bibrec in res:
if id_bibrec[0]:
l.append(id_bibrec[0])
# check no of hits found:
nb_hits = len(l)
# okay, return result set:
set = intbitset(l)
#check to see if the query limit was reached
if limit_reached:
#raise an exception, so we can print a nice message to the user
raise InvenioWebSearchWildcardLimitError(set)
return set
def search_unit_in_solr(p, f=None, m=None):
"""
Query a Solr index and return an intbitset corresponding
to the result. Parameters (p,f,m) are usual search unit ones.
"""
if m and (m == 'a' or m == 'r'): # phrase/regexp query
if p.startswith('%') and p.endswith('%'):
p = p[1:-1] # fix for partial phrase
p = '"' + p + '"'
return solr_get_bitset(f, p)
def search_unit_in_xapian(p, f=None, m=None):
"""
Query a Xapian index and return an intbitset corresponding
to the result. Parameters (p,f,m) are usual search unit ones.
"""
if m and (m == 'a' or m == 'r'): # phrase/regexp query
if p.startswith('%') and p.endswith('%'):
p = p[1:-1] # fix for partial phrase
p = '"' + p + '"'
return xapian_get_bitset(f, p)
def search_unit_in_bibrec(datetext1, datetext2, type='c'):
"""
Return hitset of recIDs found that were either created or modified
(according to 'type' arg being 'c' or 'm') from datetext1 until datetext2, inclusive.
Does not pay attention to pattern, collection, anything. Useful
to intersect later on with the 'real' query.
"""
set = intbitset()
if type and type.startswith("m"):
type = "modification_date"
else:
type = "creation_date" # by default we are searching for creation dates
parts = datetext1.split('->')
if len(parts) > 1 and datetext1 == datetext2:
datetext1 = parts[0]
datetext2 = parts[1]
if datetext1 == datetext2:
res = run_sql("SELECT id FROM bibrec WHERE %s LIKE %%s" % (type,),
(datetext1 + '%',))
else:
res = run_sql("SELECT id FROM bibrec WHERE %s>=%%s AND %s<=%%s" % (type, type),
(datetext1, datetext2))
for row in res:
set += row[0]
return set
def search_unit_by_times_cited(p):
"""
Return histset of recIDs found that are cited P times.
Usually P looks like '10->23'.
"""
numstr = '"'+p+'"'
#this is sort of stupid but since we may need to
#get the records that do _not_ have cites, we have to
#know the ids of all records, too
#but this is needed only if bsu_p is 0 or 0 or 0->0
allrecs = []
if p == 0 or p == "0" or \
p.startswith("0->") or p.endswith("->0"):
allrecs = intbitset(run_sql("SELECT id FROM bibrec"))
return get_records_with_num_cites(numstr, allrecs)
def search_unit_refersto(query):
"""
Search for records satisfying the query (e.g. author:ellis) and
return list of records referred to by these records.
"""
if query:
ahitset = search_pattern(p=query)
if ahitset:
return get_refersto_hitset(ahitset)
else:
return intbitset([])
else:
return intbitset([])
def search_unit_citedby(query):
"""
Search for records satisfying the query (e.g. author:ellis) and
return list of records cited by these records.
"""
if query:
ahitset = search_pattern(p=query)
if ahitset:
return get_citedby_hitset(ahitset)
else:
return intbitset([])
else:
return intbitset([])
def search_unit_collection(query, m, wl=None):
"""
Search for records satisfying the query (e.g. collection:"BOOK" or
collection:"Books") and return list of records in the collection.
"""
if len(query):
ahitset = get_collection_reclist(query)
if not ahitset:
return search_unit_in_bibwords(query, 'collection', m, wl=wl)
return ahitset
else:
return intbitset([])
def get_records_that_can_be_displayed(user_info,
hitset_in_any_collection,
current_coll=CFG_SITE_NAME,
colls=None):
"""
Return records that can be displayed.
"""
records_that_can_be_displayed = intbitset()
if colls is None:
colls = [current_coll]
# let's get the restricted collections the user has rights to view
permitted_restricted_collections = user_info.get('precached_permitted_restricted_collections', [])
policy = CFG_WEBSEARCH_VIEWRESTRCOLL_POLICY.strip().upper()
current_coll_children = get_collection_allchildren(current_coll) # real & virtual
# add all restricted collections, that the user has access to, and are under the current collection
# do not use set here, in order to maintain a specific order:
# children of 'cc' (real, virtual, restricted), rest of 'c' that are not cc's children
colls_to_be_displayed = [coll for coll in current_coll_children if coll in colls or coll in permitted_restricted_collections]
colls_to_be_displayed.extend([coll for coll in colls if coll not in colls_to_be_displayed])
if policy == 'ANY':# the user needs to have access to at least one collection that restricts the records
#we need this to be able to remove records that are both in a public and restricted collection
permitted_recids = intbitset()
notpermitted_recids = intbitset()
for collection in restricted_collection_cache.cache:
if collection in permitted_restricted_collections:
permitted_recids |= get_collection_reclist(collection)
else:
notpermitted_recids |= get_collection_reclist(collection)
records_that_can_be_displayed = hitset_in_any_collection - (notpermitted_recids - permitted_recids)
else:# the user needs to have access to all collections that restrict a records
notpermitted_recids = intbitset()
for collection in restricted_collection_cache.cache:
if collection not in permitted_restricted_collections:
notpermitted_recids |= get_collection_reclist(collection)
records_that_can_be_displayed = hitset_in_any_collection - notpermitted_recids
if records_that_can_be_displayed.is_infinite():
# We should not return infinite results for user.
records_that_can_be_displayed = intbitset()
for coll in colls_to_be_displayed:
records_that_can_be_displayed |= get_collection_reclist(coll)
return records_that_can_be_displayed
def intersect_results_with_collrecs(req, hitset_in_any_collection, colls, ap=0, of="hb", verbose=0, ln=CFG_SITE_LANG, display_nearest_terms_box=True):
"""Return dict of hitsets given by intersection of hitset with the collection universes."""
_ = gettext_set_language(ln)
# search stage 4: intersect with the collection universe
if verbose and of.startswith("h"):
t1 = os.times()[4]
results = {} # all final results
results_nbhits = 0
# calculate the list of recids (restricted or not) that the user has rights to access and we should display (only those)
if not req or isinstance(req, cStringIO.OutputType): # called from CLI
user_info = {}
for coll in colls:
results[coll] = hitset_in_any_collection & get_collection_reclist(coll)
results_nbhits += len(results[coll])
records_that_can_be_displayed = hitset_in_any_collection
permitted_restricted_collections = []
else:
user_info = collect_user_info(req)
# let's get the restricted collections the user has rights to view
if user_info['guest'] == '1':
permitted_restricted_collections = []
## For guest users that are actually authorized to some restricted
## collection (by virtue of the IP address in a FireRole rule)
## we explicitly build the list of permitted_restricted_collections
for coll in colls:
if collection_restricted_p(coll) and (acc_authorize_action(user_info, 'viewrestrcoll', collection=coll)[0] == 0):
permitted_restricted_collections.append(coll)
else:
permitted_restricted_collections = user_info.get('precached_permitted_restricted_collections', [])
# let's build the list of the both public and restricted
# child collections of the collection from which the user
# started his/her search. This list of children colls will be
# used in the warning proposing a search in that collections
try:
current_coll = req.argd['cc'] # current_coll: coll from which user started his/her search
except:
from flask import request
current_coll = request.args.get('cc', CFG_SITE_NAME) # current_coll: coll from which user started his/her search
current_coll_children = get_collection_allchildren(current_coll) # real & virtual
# add all restricted collections, that the user has access to, and are under the current collection
# do not use set here, in order to maintain a specific order:
# children of 'cc' (real, virtual, restricted), rest of 'c' that are not cc's children
colls_to_be_displayed = [coll for coll in current_coll_children if coll in colls or coll in permitted_restricted_collections]
colls_to_be_displayed.extend([coll for coll in colls if coll not in colls_to_be_displayed])
records_that_can_be_displayed = get_records_that_can_be_displayed(
user_info,
hitset_in_any_collection,
current_coll, colls)
for coll in colls_to_be_displayed:
results[coll] = results.get(coll, intbitset()).union_update(records_that_can_be_displayed & get_collection_reclist(coll))
results_nbhits += len(results[coll])
if results_nbhits == 0:
# no hits found, try to search in Home and restricted and/or hidden collections:
results = {}
results_in_Home = records_that_can_be_displayed & get_collection_reclist(CFG_SITE_NAME)
results_in_restricted_collections = intbitset()
results_in_hidden_collections = intbitset()
for coll in permitted_restricted_collections:
if not get_coll_ancestors(coll): # hidden collection
results_in_hidden_collections.union_update(records_that_can_be_displayed & get_collection_reclist(coll))
else:
results_in_restricted_collections.union_update(records_that_can_be_displayed & get_collection_reclist(coll))
# in this way, we do not count twice, records that are both in Home collection and in a restricted collection
total_results = len(results_in_Home.union(results_in_restricted_collections))
if total_results > 0:
# some hits found in Home and/or restricted collections, so propose this search:
if of.startswith("h") and display_nearest_terms_box:
url = websearch_templates.build_search_url(req.argd, cc=CFG_SITE_NAME, c=[])
len_colls_to_display = len(colls_to_be_displayed)
# trim the list of collections to first two, since it might get very large
write_warning(_("No match found in collection %(x_collection)s. Other collections gave %(x_url_open)s%(x_nb_hits)d hits%(x_url_close)s.") %\
{'x_collection': '<em>' + \
string.join([get_coll_i18nname(coll, ln, False) for coll in colls_to_be_displayed[:2]], ', ') + \
(len_colls_to_display > 2 and ' et al' or '') + '</em>',
'x_url_open': '<a class="nearestterms" href="%s">' % (url),
'x_nb_hits': total_results,
'x_url_close': '</a>'}, req=req)
# display the hole list of collections in a comment
if len_colls_to_display > 2:
write_warning("<!--No match found in collection <em>%(x_collection)s</em>.-->" %\
{'x_collection': string.join([get_coll_i18nname(coll, ln, False) for coll in colls_to_be_displayed], ', ')},
req=req)
else:
# no hits found, either user is looking for a document and he/she has not rights
# or user is looking for a hidden document:
if of.startswith("h") and display_nearest_terms_box:
if len(results_in_hidden_collections) > 0:
write_warning(_("No public collection matched your query. "
"If you were looking for a hidden document, please type "
"the correct URL for this record."), req=req)
else:
write_warning(_("No public collection matched your query. "
"If you were looking for a non-public document, please choose "
"the desired restricted collection first."), req=req)
if verbose and of.startswith("h"):
t2 = os.times()[4]
write_warning("Search stage 4: intersecting with collection universe gave %d hits." % results_nbhits, req=req)
write_warning("Search stage 4: execution took %.2f seconds." % (t2 - t1), req=req)
return results
def intersect_results_with_hitset(req, results, hitset, ap=0, aptext="", of="hb"):
"""Return intersection of search 'results' (a dict of hitsets
with collection as key) with the 'hitset', i.e. apply
'hitset' intersection to each collection within search
'results'.
If the final set is to be empty, and 'ap'
(approximate pattern) is true, and then print the `warningtext'
and return the original 'results' set unchanged. If 'ap' is
false, then return empty results set.
"""
if ap:
results_ap = copy.deepcopy(results)
else:
results_ap = {} # will return empty dict in case of no hits found
nb_total = 0
final_results = {}
for coll in results.keys():
final_results[coll] = results[coll].intersection(hitset)
nb_total += len(final_results[coll])
if nb_total == 0:
if of.startswith("h"):
write_warning(aptext, req=req)
final_results = results_ap
return final_results
def create_similarly_named_authors_link_box(author_name, ln=CFG_SITE_LANG):
"""Return a box similar to ``Not satisfied...'' one by proposing
author searches for similar names. Namely, take AUTHOR_NAME
and the first initial of the firstame (after comma) and look
into author index whether authors with e.g. middle names exist.
Useful mainly for CERN Library that sometimes contains name
forms like Ellis-N, Ellis-Nick, Ellis-Nicolas all denoting the
same person. The box isn't proposed if no similarly named
authors are found to exist.
"""
# return nothing if not configured:
if CFG_WEBSEARCH_CREATE_SIMILARLY_NAMED_AUTHORS_LINK_BOX == 0:
return ""
# return empty box if there is no initial:
if re.match(r'[^ ,]+, [^ ]', author_name) is None:
return ""
# firstly find name comma initial:
author_name_to_search = re.sub(r'^([^ ,]+, +[^ ,]).*$', '\\1', author_name)
# secondly search for similar name forms:
similar_author_names = {}
for name in author_name_to_search, strip_accents(author_name_to_search):
for tag in get_field_tags("author"):
# deduce into which bibxxx table we will search:
digit1, digit2 = int(tag[0]), int(tag[1])
bx = "bib%d%dx" % (digit1, digit2)
bibx = "bibrec_bib%d%dx" % (digit1, digit2)
if len(tag) != 6 or tag[-1:]=='%':
# only the beginning of field 't' is defined, so add wildcard character:
res = run_sql("""SELECT bx.value FROM %s AS bx
WHERE bx.value LIKE %%s AND bx.tag LIKE %%s""" % bx,
(name + "%", tag + "%"))
else:
res = run_sql("""SELECT bx.value FROM %s AS bx
WHERE bx.value LIKE %%s AND bx.tag=%%s""" % bx,
(name + "%", tag))
for row in res:
similar_author_names[row[0]] = 1
# remove the original name and sort the list:
try:
del similar_author_names[author_name]
except KeyError:
pass
# thirdly print the box:
out = ""
if similar_author_names:
out_authors = similar_author_names.keys()
out_authors.sort()
tmp_authors = []
for out_author in out_authors:
nbhits = get_nbhits_in_bibxxx(out_author, "author")
if nbhits:
tmp_authors.append((out_author, nbhits))
out += websearch_templates.tmpl_similar_author_names(
authors=tmp_authors, ln=ln)
return out
def create_nearest_terms_box(urlargd, p, f, t='w', n=5, ln=CFG_SITE_LANG, intro_text_p=True):
"""Return text box containing list of 'n' nearest terms above/below 'p'
for the field 'f' for matching type 't' (words/phrases) in
language 'ln'.
Propose new searches according to `urlargs' with the new words.
If `intro_text_p' is true, then display the introductory message,
otherwise print only the nearest terms in the box content.
"""
# load the right message language
_ = gettext_set_language(ln)
if not CFG_WEBSEARCH_DISPLAY_NEAREST_TERMS:
return _("Your search did not match any records. Please try again.")
nearest_terms = []
if not p: # sanity check
p = "."
if p.startswith('%') and p.endswith('%'):
p = p[1:-1] # fix for partial phrase
index_id = get_index_id_from_field(f)
if f == 'fulltext':
if CFG_SOLR_URL:
return _("No match found, please enter different search terms.")
else:
# FIXME: workaround for not having native phrase index yet
t = 'w'
# special indexes:
if f == 'refersto':
return _("There are no records referring to %s.") % cgi.escape(p)
if f == 'citedby':
return _("There are no records cited by %s.") % cgi.escape(p)
# look for nearest terms:
if t == 'w':
nearest_terms = get_nearest_terms_in_bibwords(p, f, n, n)
if not nearest_terms:
return _("No word index is available for %s.") % \
('<em>' + cgi.escape(get_field_i18nname(get_field_name(f) or f, ln, False)) + '</em>')
else:
nearest_terms = []
if index_id:
nearest_terms = get_nearest_terms_in_idxphrase(p, index_id, n, n)
if f == 'datecreated' or f == 'datemodified':
nearest_terms = get_nearest_terms_in_bibrec(p, f, n, n)
if not nearest_terms:
nearest_terms = get_nearest_terms_in_bibxxx(p, f, n, n)
if not nearest_terms:
return _("No phrase index is available for %s.") % \
('<em>' + cgi.escape(get_field_i18nname(get_field_name(f) or f, ln, False)) + '</em>')
terminfo = []
for term in nearest_terms:
if t == 'w':
hits = get_nbhits_in_bibwords(term, f)
else:
if index_id:
hits = get_nbhits_in_idxphrases(term, f)
elif f == 'datecreated' or f == 'datemodified':
hits = get_nbhits_in_bibrec(term, f)
else:
hits = get_nbhits_in_bibxxx(term, f)
argd = {}
argd.update(urlargd)
# check which fields contained the requested parameter, and replace it.
for (px, fx) in ('p', 'f'), ('p1', 'f1'), ('p2', 'f2'), ('p3', 'f3'):
if px in argd:
argd_px = argd[px]
if t == 'w':
# p was stripped of accents, to do the same:
argd_px = strip_accents(argd_px)
#argd[px] = string.replace(argd_px, p, term, 1)
#we need something similar, but case insensitive
pattern_index = string.find(argd_px.lower(), p.lower())
if pattern_index > -1:
argd[px] = argd_px[:pattern_index] + term + argd_px[pattern_index+len(p):]
break
#this is doing exactly the same as:
#argd[px] = re.sub('(?i)' + re.escape(p), term, argd_px, 1)
#but is ~4x faster (2us vs. 8.25us)
terminfo.append((term, hits, argd))
intro = ""
if intro_text_p: # add full leading introductory text
if f:
intro = _("Search term %(x_term)s inside index %(x_index)s did not match any record. Nearest terms in any collection are:") % \
{'x_term': "<em>" + cgi.escape(p.startswith("%") and p.endswith("%") and p[1:-1] or p) + "</em>",
'x_index': "<em>" + cgi.escape(get_field_i18nname(get_field_name(f) or f, ln, False)) + "</em>"}
else:
intro = _("Search term %s did not match any record. Nearest terms in any collection are:") % \
("<em>" + cgi.escape(p.startswith("%") and p.endswith("%") and p[1:-1] or p) + "</em>")
return websearch_templates.tmpl_nearest_term_box(p=p, ln=ln, f=f, terminfo=terminfo,
intro=intro)
def get_nearest_terms_in_bibwords(p, f, n_below, n_above):
"""Return list of +n -n nearest terms to word `p' in index for field `f'."""
nearest_words = [] # will hold the (sorted) list of nearest words to return
# deduce into which bibwordsX table we will search:
bibwordsX = "idxWORD%02dF" % get_index_id_from_field("anyfield")
if f:
index_id = get_index_id_from_field(f)
if index_id:
bibwordsX = "idxWORD%02dF" % index_id
else:
return nearest_words
# firstly try to get `n' closest words above `p':
res = run_sql("SELECT term FROM %s WHERE term<%%s ORDER BY term DESC LIMIT %%s" % bibwordsX,
(p, n_above))
for row in res:
nearest_words.append(row[0])
nearest_words.reverse()
# secondly insert given word `p':
nearest_words.append(p)
# finally try to get `n' closest words below `p':
res = run_sql("SELECT term FROM %s WHERE term>%%s ORDER BY term ASC LIMIT %%s" % bibwordsX,
(p, n_below))
for row in res:
nearest_words.append(row[0])
return nearest_words
def get_nearest_terms_in_idxphrase(p, index_id, n_below, n_above):
"""Browse (-n_above, +n_below) closest bibliographic phrases
for the given pattern p in the given field idxPHRASE table,
regardless of collection.
Return list of [phrase1, phrase2, ... , phrase_n]."""
if CFG_INSPIRE_SITE and index_id in (3, 15): # FIXME: workaround due to new fuzzy index
return [p,]
idxphraseX = "idxPHRASE%02dF" % index_id
res_above = run_sql("SELECT term FROM %s WHERE term<%%s ORDER BY term DESC LIMIT %%s" % idxphraseX, (p, n_above))
res_above = map(lambda x: x[0], res_above)
res_above.reverse()
res_below = run_sql("SELECT term FROM %s WHERE term>=%%s ORDER BY term ASC LIMIT %%s" % idxphraseX, (p, n_below))
res_below = map(lambda x: x[0], res_below)
return res_above + res_below
def get_nearest_terms_in_idxphrase_with_collection(p, index_id, n_below, n_above, collection):
"""Browse (-n_above, +n_below) closest bibliographic phrases
for the given pattern p in the given field idxPHRASE table,
considering the collection (intbitset).
Return list of [(phrase1, hitset), (phrase2, hitset), ... , (phrase_n, hitset)]."""
idxphraseX = "idxPHRASE%02dF" % index_id
res_above = run_sql("SELECT term,hitlist FROM %s WHERE term<%%s ORDER BY term DESC LIMIT %%s" % idxphraseX, (p, n_above * 3))
res_above = [(term, intbitset(hitlist) & collection) for term, hitlist in res_above]
res_above = [(term, len(hitlist)) for term, hitlist in res_above if hitlist]
res_below = run_sql("SELECT term,hitlist FROM %s WHERE term>=%%s ORDER BY term ASC LIMIT %%s" % idxphraseX, (p, n_below * 3))
res_below = [(term, intbitset(hitlist) & collection) for term, hitlist in res_below]
res_below = [(term, len(hitlist)) for term, hitlist in res_below if hitlist]
res_above.reverse()
return res_above[-n_above:] + res_below[:n_below]
def get_nearest_terms_in_bibxxx(p, f, n_below, n_above):
"""Browse (-n_above, +n_below) closest bibliographic phrases
for the given pattern p in the given field f, regardless
of collection.
Return list of [phrase1, phrase2, ... , phrase_n]."""
## determine browse field:
if not f and string.find(p, ":") > 0: # does 'p' contain ':'?
f, p = string.split(p, ":", 1)
# FIXME: quick hack for the journal index
if f == 'journal':
return get_nearest_terms_in_bibwords(p, f, n_below, n_above)
## We are going to take max(n_below, n_above) as the number of
## values to ferch from bibXXx. This is needed to work around
## MySQL UTF-8 sorting troubles in 4.0.x. Proper solution is to
## use MySQL 4.1.x or our own idxPHRASE in the future.
index_id = get_index_id_from_field(f)
if index_id:
return get_nearest_terms_in_idxphrase(p, index_id, n_below, n_above)
n_fetch = 2*max(n_below, n_above)
## construct 'tl' which defines the tag list (MARC tags) to search in:
tl = []
if str(f[0]).isdigit() and str(f[1]).isdigit():
tl.append(f) # 'f' seems to be okay as it starts by two digits
else:
# deduce desired MARC tags on the basis of chosen 'f'
tl = get_field_tags(f)
## start browsing to fetch list of hits:
browsed_phrases = {} # will hold {phrase1: 1, phrase2: 1, ..., phraseN: 1} dict of browsed phrases (to make them unique)
# always add self to the results set:
browsed_phrases[p.startswith("%") and p.endswith("%") and p[1:-1] or p] = 1
for t in tl:
# deduce into which bibxxx table we will search:
digit1, digit2 = int(t[0]), int(t[1])
bx = "bib%d%dx" % (digit1, digit2)
bibx = "bibrec_bib%d%dx" % (digit1, digit2)
# firstly try to get `n' closest phrases above `p':
if len(t) != 6 or t[-1:]=='%': # only the beginning of field 't' is defined, so add wildcard character:
res = run_sql("""SELECT bx.value FROM %s AS bx
WHERE bx.value<%%s AND bx.tag LIKE %%s
ORDER BY bx.value DESC LIMIT %%s""" % bx,
(p, t + "%", n_fetch))
else:
res = run_sql("""SELECT bx.value FROM %s AS bx
WHERE bx.value<%%s AND bx.tag=%%s
ORDER BY bx.value DESC LIMIT %%s""" % bx,
(p, t, n_fetch))
for row in res:
browsed_phrases[row[0]] = 1
# secondly try to get `n' closest phrases equal to or below `p':
if len(t) != 6 or t[-1:]=='%': # only the beginning of field 't' is defined, so add wildcard character:
res = run_sql("""SELECT bx.value FROM %s AS bx
WHERE bx.value>=%%s AND bx.tag LIKE %%s
ORDER BY bx.value ASC LIMIT %%s""" % bx,
(p, t + "%", n_fetch))
else:
res = run_sql("""SELECT bx.value FROM %s AS bx
WHERE bx.value>=%%s AND bx.tag=%%s
ORDER BY bx.value ASC LIMIT %%s""" % bx,
(p, t, n_fetch))
for row in res:
browsed_phrases[row[0]] = 1
# select first n words only: (this is needed as we were searching
# in many different tables and so aren't sure we have more than n
# words right; this of course won't be needed when we shall have
# one ACC table only for given field):
phrases_out = browsed_phrases.keys()
phrases_out.sort(lambda x, y: cmp(string.lower(strip_accents(x)),
string.lower(strip_accents(y))))
# find position of self:
try:
idx_p = phrases_out.index(p)
except:
idx_p = len(phrases_out)/2
# return n_above and n_below:
return phrases_out[max(0, idx_p-n_above):idx_p+n_below]
def get_nearest_terms_in_bibrec(p, f, n_below, n_above):
"""Return list of nearest terms and counts from bibrec table.
p is usually a date, and f either datecreated or datemodified.
Note: below/above count is very approximative, not really respected.
"""
col = 'creation_date'
if f == 'datemodified':
col = 'modification_date'
res_above = run_sql("""SELECT DATE_FORMAT(%s,'%%%%Y-%%%%m-%%%%d %%%%H:%%%%i:%%%%s')
FROM bibrec WHERE %s < %%s
ORDER BY %s DESC LIMIT %%s""" % (col, col, col),
(p, n_above))
res_below = run_sql("""SELECT DATE_FORMAT(%s,'%%%%Y-%%%%m-%%%%d %%%%H:%%%%i:%%%%s')
FROM bibrec WHERE %s > %%s
ORDER BY %s ASC LIMIT %%s""" % (col, col, col),
(p, n_below))
out = set([])
for row in res_above:
out.add(row[0])
for row in res_below:
out.add(row[0])
out_list = list(out)
out_list.sort()
return list(out_list)
def get_nbhits_in_bibrec(term, f):
"""Return number of hits in bibrec table. term is usually a date,
and f is either 'datecreated' or 'datemodified'."""
col = 'creation_date'
if f == 'datemodified':
col = 'modification_date'
res = run_sql("SELECT COUNT(*) FROM bibrec WHERE %s LIKE %%s" % (col,),
(term + '%',))
return res[0][0]
def get_nbhits_in_bibwords(word, f):
"""Return number of hits for word 'word' inside words index for field 'f'."""
out = 0
# deduce into which bibwordsX table we will search:
bibwordsX = "idxWORD%02dF" % get_index_id_from_field("anyfield")
if f:
index_id = get_index_id_from_field(f)
if index_id:
bibwordsX = "idxWORD%02dF" % index_id
else:
return 0
if word:
res = run_sql("SELECT hitlist FROM %s WHERE term=%%s" % bibwordsX,
(word,))
for hitlist in res:
out += len(intbitset(hitlist[0]))
return out
def get_nbhits_in_idxphrases(word, f):
"""Return number of hits for word 'word' inside phrase index for field 'f'."""
out = 0
# deduce into which bibwordsX table we will search:
idxphraseX = "idxPHRASE%02dF" % get_index_id_from_field("anyfield")
if f:
index_id = get_index_id_from_field(f)
if index_id:
idxphraseX = "idxPHRASE%02dF" % index_id
else:
return 0
if word:
res = run_sql("SELECT hitlist FROM %s WHERE term=%%s" % idxphraseX,
(word,))
for hitlist in res:
out += len(intbitset(hitlist[0]))
return out
def get_nbhits_in_bibxxx(p, f, in_hitset=None):
"""Return number of hits for word 'word' inside words index for field 'f'."""
## determine browse field:
if not f and string.find(p, ":") > 0: # does 'p' contain ':'?
f, p = string.split(p, ":", 1)
# FIXME: quick hack for the journal index
if f == 'journal':
return get_nbhits_in_bibwords(p, f)
## construct 'tl' which defines the tag list (MARC tags) to search in:
tl = []
if str(f[0]).isdigit() and str(f[1]).isdigit():
tl.append(f) # 'f' seems to be okay as it starts by two digits
else:
# deduce desired MARC tags on the basis of chosen 'f'
tl = get_field_tags(f)
# start searching:
recIDs = {} # will hold dict of {recID1: 1, recID2: 1, ..., } (unique recIDs, therefore)
for t in tl:
# deduce into which bibxxx table we will search:
digit1, digit2 = int(t[0]), int(t[1])
bx = "bib%d%dx" % (digit1, digit2)
bibx = "bibrec_bib%d%dx" % (digit1, digit2)
if len(t) != 6 or t[-1:]=='%': # only the beginning of field 't' is defined, so add wildcard character:
res = run_sql("""SELECT bibx.id_bibrec FROM %s AS bibx, %s AS bx
WHERE bx.value=%%s AND bx.tag LIKE %%s
AND bibx.id_bibxxx=bx.id""" % (bibx, bx),
(p, t + "%"))
else:
res = run_sql("""SELECT bibx.id_bibrec FROM %s AS bibx, %s AS bx
WHERE bx.value=%%s AND bx.tag=%%s
AND bibx.id_bibxxx=bx.id""" % (bibx, bx),
(p, t))
for row in res:
recIDs[row[0]] = 1
if in_hitset is None:
nbhits = len(recIDs)
else:
nbhits = len(intbitset(recIDs.keys()).intersection(in_hitset))
return nbhits
def get_mysql_recid_from_aleph_sysno(sysno):
"""Returns DB's recID for ALEPH sysno passed in the argument (e.g. "002379334CER").
Returns None in case of failure."""
out = None
res = run_sql("""SELECT bb.id_bibrec FROM bibrec_bib97x AS bb, bib97x AS b
WHERE b.value=%s AND b.tag='970__a' AND bb.id_bibxxx=b.id""",
(sysno,))
if res:
out = res[0][0]
return out
def guess_primary_collection_of_a_record(recID):
"""Return primary collection name a record recid belongs to, by
testing 980 identifier.
May lead to bad guesses when a collection is defined dynamically
via dbquery.
In that case, return 'CFG_SITE_NAME'."""
out = CFG_SITE_NAME
dbcollids = get_fieldvalues(recID, "980__a")
for dbcollid in dbcollids:
variants = ("collection:" + dbcollid,
'collection:"' + dbcollid + '"',
"980__a:" + dbcollid,
'980__a:"' + dbcollid + '"',
'980:' + dbcollid ,
'980:"' + dbcollid + '"')
res = run_sql("SELECT name FROM collection WHERE dbquery IN (%s,%s,%s,%s,%s,%s)", variants)
if res:
out = res[0][0]
break
if CFG_CERN_SITE:
recID = int(recID)
# dirty hack for ATLAS collections at CERN:
if out in ('ATLAS Communications', 'ATLAS Internal Notes'):
for alternative_collection in ('ATLAS Communications Physics',
'ATLAS Communications General',
'ATLAS Internal Notes Physics',
'ATLAS Internal Notes General',):
if recID in get_collection_reclist(alternative_collection):
return alternative_collection
# dirty hack for FP
FP_collections = {'DO': ['Current Price Enquiries', 'Archived Price Enquiries'],
'IT': ['Current Invitation for Tenders', 'Archived Invitation for Tenders'],
'MS': ['Current Market Surveys', 'Archived Market Surveys']}
fp_coll_ids = [coll for coll in dbcollids if coll in FP_collections]
for coll in fp_coll_ids:
for coll_name in FP_collections[coll]:
if recID in get_collection_reclist(coll_name):
return coll_name
return out
_re_collection_url = re.compile('/collection/(.+)')
def guess_collection_of_a_record(recID, referer=None, recreate_cache_if_needed=True):
"""Return collection name a record recid belongs to, by first testing
the referer URL if provided and otherwise returning the
primary collection."""
if referer:
dummy, hostname, path, dummy, query, dummy = urlparse.urlparse(referer)
#requests can come from different invenio installations, with different collections
if CFG_SITE_URL.find(hostname) < 0:
return guess_primary_collection_of_a_record(recID)
g = _re_collection_url.match(path)
if g:
name = urllib.unquote_plus(g.group(1))
#check if this collection actually exist (also normalize the name if case-insensitive)
name = get_coll_normalised_name(name)
if name and recID in get_collection_reclist(name):
return name
elif path.startswith('/search'):
if recreate_cache_if_needed:
collection_reclist_cache.recreate_cache_if_needed()
query = cgi.parse_qs(query)
for name in query.get('cc', []) + query.get('c', []):
name = get_coll_normalised_name(name)
if name and recID in get_collection_reclist(name, recreate_cache_if_needed=False):
return name
return guess_primary_collection_of_a_record(recID)
def is_record_in_any_collection(recID, recreate_cache_if_needed=True):
"""Return True if the record belongs to at least one collection. This is a
good, although not perfect, indicator to guess if webcoll has already run
after this record has been entered into the system.
"""
if recreate_cache_if_needed:
collection_reclist_cache.recreate_cache_if_needed()
for name in collection_reclist_cache.cache.keys():
if recID in get_collection_reclist(name, recreate_cache_if_needed=False):
return True
return False
def get_all_collections_of_a_record(recID, recreate_cache_if_needed=True):
"""Return all the collection names a record belongs to.
Note this function is O(n_collections)."""
ret = []
if recreate_cache_if_needed:
collection_reclist_cache.recreate_cache_if_needed()
for name in collection_reclist_cache.cache.keys():
if recID in get_collection_reclist(name, recreate_cache_if_needed=False):
ret.append(name)
return ret
def get_tag_name(tag_value, prolog="", epilog=""):
"""Return tag name from the known tag value, by looking up the 'tag' table.
Return empty string in case of failure.
Example: input='100__%', output=first author'."""
out = ""
res = run_sql("SELECT name FROM tag WHERE value=%s", (tag_value,))
if res:
out = prolog + res[0][0] + epilog
return out
def get_fieldcodes():
"""Returns a list of field codes that may have been passed as 'search options' in URL.
Example: output=['subject','division']."""
out = []
res = run_sql("SELECT DISTINCT(code) FROM field")
for row in res:
out.append(row[0])
return out
def get_field_name(code):
"""Return the corresponding field_name given the field code.
e.g. reportnumber -> report number."""
res = run_sql("SELECT name FROM field WHERE code=%s", (code, ))
if res:
return res[0][0]
else:
return ""
def get_field_tags(field):
"""Returns a list of MARC tags for the field code 'field'.
Returns empty list in case of error.
Example: field='author', output=['100__%','700__%']."""
out = []
query = """SELECT t.value FROM tag AS t, field_tag AS ft, field AS f
WHERE f.code=%s AND ft.id_field=f.id AND t.id=ft.id_tag
ORDER BY ft.score DESC"""
res = run_sql(query, (field, ))
for val in res:
out.append(val[0])
return out
def get_merged_recid(recID):
""" Return the record ID of the record with
which the given record has been merged.
@param recID: deleted record recID
@type recID: int
@return: merged record recID
@rtype: int or None
"""
merged_recid = None
for val in get_fieldvalues(recID, "970__d"):
try:
merged_recid = int(val)
break
except ValueError:
pass
return merged_recid
def record_exists(recID):
"""Return 1 if record RECID exists.
Return 0 if it doesn't exist.
Return -1 if it exists but is marked as deleted.
"""
out = 0
res = run_sql("SELECT id FROM bibrec WHERE id=%s", (recID,), 1)
if res:
try: # if recid is '123foo', mysql will return id=123, and we don't want that
recID = int(recID)
except ValueError:
return 0
# record exists; now check whether it isn't marked as deleted:
dbcollids = get_fieldvalues(recID, "980__%")
if ("DELETED" in dbcollids) or (CFG_CERN_SITE and "DUMMY" in dbcollids):
out = -1 # exists, but marked as deleted
else:
out = 1 # exists fine
return out
def record_empty(recID):
"""
Is this record empty, e.g. has only 001, waiting for integration?
@param recID: the record identifier.
@type recID: int
@return: 1 if the record is empty, 0 otherwise.
@rtype: int
"""
record = get_record(recID)
if record is None or len(record) < 2:
return 1
else:
return 0
def record_public_p(recID, recreate_cache_if_needed=True):
"""Return 1 if the record is public, i.e. if it can be found in the Home collection.
Return 0 otherwise.
"""
return recID in get_collection_reclist(CFG_SITE_NAME, recreate_cache_if_needed=recreate_cache_if_needed)
def get_creation_date(recID, fmt="%Y-%m-%d"):
"Returns the creation date of the record 'recID'."
out = ""
res = run_sql("SELECT DATE_FORMAT(creation_date,%s) FROM bibrec WHERE id=%s", (fmt, recID), 1)
if res:
out = res[0][0]
return out
def get_modification_date(recID, fmt="%Y-%m-%d"):
"Returns the date of last modification for the record 'recID'."
out = ""
res = run_sql("SELECT DATE_FORMAT(modification_date,%s) FROM bibrec WHERE id=%s", (fmt, recID), 1)
if res:
out = res[0][0]
return out
def print_search_info(p, f, sf, so, sp, rm, of, ot, collection=CFG_SITE_NAME, nb_found=-1, jrec=1, rg=CFG_WEBSEARCH_DEF_RECORDS_IN_GROUPS,
aas=0, ln=CFG_SITE_LANG, p1="", p2="", p3="", f1="", f2="", f3="", m1="", m2="", m3="", op1="", op2="",
sc=1, pl_in_url="",
d1y=0, d1m=0, d1d=0, d2y=0, d2m=0, d2d=0, dt="",
cpu_time=-1, middle_only=0, em=""):
"""Prints stripe with the information on 'collection' and 'nb_found' results and CPU time.
Also, prints navigation links (beg/next/prev/end) inside the results set.
If middle_only is set to 1, it will only print the middle box information (beg/netx/prev/end/etc) links.
This is suitable for displaying navigation links at the bottom of the search results page."""
if em != '' and EM_REPOSITORY["search_info"] not in em:
return ""
# sanity check:
if jrec < 1:
jrec = 1
if jrec > nb_found:
jrec = max(nb_found-rg+1, 1)
return websearch_templates.tmpl_print_search_info(
ln = ln,
collection = collection,
aas = aas,
collection_name = get_coll_i18nname(collection, ln, False),
collection_id = get_colID(collection),
middle_only = middle_only,
rg = rg,
nb_found = nb_found,
sf = sf,
so = so,
rm = rm,
of = of,
ot = ot,
p = p,
f = f,
p1 = p1,
p2 = p2,
p3 = p3,
f1 = f1,
f2 = f2,
f3 = f3,
m1 = m1,
m2 = m2,
m3 = m3,
op1 = op1,
op2 = op2,
pl_in_url = pl_in_url,
d1y = d1y,
d1m = d1m,
d1d = d1d,
d2y = d2y,
d2m = d2m,
d2d = d2d,
dt = dt,
jrec = jrec,
sc = sc,
sp = sp,
all_fieldcodes = get_fieldcodes(),
cpu_time = cpu_time,
)
def print_hosted_search_info(p, f, sf, so, sp, rm, of, ot, collection=CFG_SITE_NAME, nb_found=-1, jrec=1, rg=CFG_WEBSEARCH_DEF_RECORDS_IN_GROUPS,
aas=0, ln=CFG_SITE_LANG, p1="", p2="", p3="", f1="", f2="", f3="", m1="", m2="", m3="", op1="", op2="",
sc=1, pl_in_url="",
d1y=0, d1m=0, d1d=0, d2y=0, d2m=0, d2d=0, dt="",
cpu_time=-1, middle_only=0, em=""):
"""Prints stripe with the information on 'collection' and 'nb_found' results and CPU time.
Also, prints navigation links (beg/next/prev/end) inside the results set.
If middle_only is set to 1, it will only print the middle box information (beg/netx/prev/end/etc) links.
This is suitable for displaying navigation links at the bottom of the search results page."""
if em != '' and EM_REPOSITORY["search_info"] not in em:
return ""
# sanity check:
if jrec < 1:
jrec = 1
if jrec > nb_found:
jrec = max(nb_found-rg+1, 1)
return websearch_templates.tmpl_print_hosted_search_info(
ln = ln,
collection = collection,
aas = aas,
collection_name = get_coll_i18nname(collection, ln, False),
collection_id = get_colID(collection),
middle_only = middle_only,
rg = rg,
nb_found = nb_found,
sf = sf,
so = so,
rm = rm,
of = of,
ot = ot,
p = p,
f = f,
p1 = p1,
p2 = p2,
p3 = p3,
f1 = f1,
f2 = f2,
f3 = f3,
m1 = m1,
m2 = m2,
m3 = m3,
op1 = op1,
op2 = op2,
pl_in_url = pl_in_url,
d1y = d1y,
d1m = d1m,
d1d = d1d,
d2y = d2y,
d2m = d2m,
d2d = d2d,
dt = dt,
jrec = jrec,
sc = sc,
sp = sp,
all_fieldcodes = get_fieldcodes(),
cpu_time = cpu_time,
)
def print_results_overview(colls, results_final_nb_total, results_final_nb, cpu_time, ln=CFG_SITE_LANG, ec=[], hosted_colls_potential_results_p=False, em=""):
"""Prints results overview box with links to particular collections below."""
if em != "" and EM_REPOSITORY["overview"] not in em:
return ""
new_colls = []
for coll in colls:
new_colls.append({
'id': get_colID(coll),
'code': coll,
'name': get_coll_i18nname(coll, ln, False),
})
return websearch_templates.tmpl_print_results_overview(
ln = ln,
results_final_nb_total = results_final_nb_total,
results_final_nb = results_final_nb,
cpu_time = cpu_time,
colls = new_colls,
ec = ec,
hosted_colls_potential_results_p = hosted_colls_potential_results_p,
)
def print_hosted_results(url_and_engine, ln=CFG_SITE_LANG, of=None, req=None, no_records_found=False, search_timed_out=False, limit=CFG_EXTERNAL_COLLECTION_MAXRESULTS, em = ""):
"""Prints the full results of a hosted collection"""
if of.startswith("h"):
if no_records_found:
return "<br />No results found."
if search_timed_out:
return "<br />The search engine did not respond in time."
return websearch_templates.tmpl_print_hosted_results(
url_and_engine=url_and_engine,
ln=ln,
of=of,
req=req,
limit=limit,
display_body = em == "" or EM_REPOSITORY["body"] in em,
display_add_to_basket = em == "" or EM_REPOSITORY["basket"] in em)
class BibSortDataCacher(DataCacher):
"""
Cache holding all structures created by bibsort
( _data, data_dict).
"""
def __init__(self, method_name):
self.method_name = method_name
self.method_id = 0
try:
res = run_sql("""SELECT id from bsrMETHOD where name = %s""", (self.method_name,))
except:
self.method_id = 0
if res and res[0]:
self.method_id = res[0][0]
else:
self.method_id = 0
def cache_filler():
method_id = self.method_id
alldicts = {}
if self.method_id == 0:
return {}
try:
res_data = run_sql("""SELECT data_dict_ordered from bsrMETHODDATA \
where id_bsrMETHOD = %s""", (method_id,))
res_buckets = run_sql("""SELECT bucket_no, bucket_data from bsrMETHODDATABUCKET\
where id_bsrMETHOD = %s""", (method_id,))
except Exception:
# database problems, return empty cache
return {}
try:
data_dict_ordered = deserialize_via_marshal(res_data[0][0])
except:
data_dict_ordered = {}
alldicts['data_dict_ordered'] = data_dict_ordered # recid: weight
if not res_buckets:
alldicts['bucket_data'] = {}
return alldicts
for row in res_buckets:
bucket_no = row[0]
try:
bucket_data = intbitset(row[1])
except:
bucket_data = intbitset([])
alldicts.setdefault('bucket_data', {})[bucket_no] = bucket_data
return alldicts
def timestamp_verifier():
method_id = self.method_id
res = run_sql("""SELECT last_updated from bsrMETHODDATA where id_bsrMETHOD = %s""", (method_id,))
try:
update_time_methoddata = str(res[0][0])
except IndexError:
update_time_methoddata = '1970-01-01 00:00:00'
res = run_sql("""SELECT max(last_updated) from bsrMETHODDATABUCKET where id_bsrMETHOD = %s""", (method_id,))
try:
update_time_buckets = str(res[0][0])
except IndexError:
update_time_buckets = '1970-01-01 00:00:00'
return max(update_time_methoddata, update_time_buckets)
DataCacher.__init__(self, cache_filler, timestamp_verifier)
def get_sorting_methods():
if not CFG_BIBSORT_BUCKETS: # we do not want to use buckets
return {}
try: # make sure the method has some data
res = run_sql("""SELECT m.name, m.definition FROM bsrMETHOD m, bsrMETHODDATA md WHERE m.id = md.id_bsrMETHOD""")
except:
return {}
return dict(res)
sorting_methods = get_sorting_methods()
cache_sorted_data = {}
for sorting_method in sorting_methods:
try:
cache_sorted_data[sorting_method].is_ok_p
except Exception:
cache_sorted_data[sorting_method] = BibSortDataCacher(sorting_method)
def get_tags_from_sort_fields(sort_fields):
"""Given a list of sort_fields, return the tags associated with it and
also the name of the field that has no tags associated, to be able to
display a message to the user."""
tags = []
if not sort_fields:
return [], ''
for sort_field in sort_fields:
if sort_field and str(sort_field[0:2]).isdigit():
# sort_field starts by two digits, so this is probably a MARC tag already
tags.append(sort_field)
else:
# let us check the 'field' table
field_tags = get_field_tags(sort_field)
if field_tags:
tags.extend(field_tags)
else:
return [], sort_field
return tags, ''
def rank_records(req, rank_method_code, rank_limit_relevance, hitset_global, pattern=None, verbose=0, sort_order='d', of='hb', ln=CFG_SITE_LANG, rg=None, jrec=None, field=''):
"""Initial entry point for ranking records, acts like a dispatcher.
(i) rank_method_code is in bsrMETHOD, bibsort buckets can be used;
(ii)rank_method_code is not in bsrMETHOD, use bibrank;
"""
if CFG_BIBSORT_BUCKETS and sorting_methods:
for sort_method in sorting_methods:
definition = sorting_methods[sort_method]
if definition.startswith('RNK') and \
definition.replace('RNK:','').strip().lower() == string.lower(rank_method_code):
(solution_recs, solution_scores) = sort_records_bibsort(req, hitset_global, sort_method, '', sort_order, verbose, of, ln, rg, jrec, 'r')
#return (solution_recs, solution_scores, '', '', '')
comment = ''
if verbose > 0:
comment = 'find_citations retlist %s' % [[solution_recs[i], solution_scores[i]] for i in range(len(solution_recs))]
return (solution_recs, solution_scores, '(', ')', comment)
return rank_records_bibrank(rank_method_code, rank_limit_relevance, hitset_global, pattern, verbose, field, rg, jrec)
def sort_records(req, recIDs, sort_field='', sort_order='d', sort_pattern='', verbose=0, of='hb', ln=CFG_SITE_LANG, rg=None, jrec=None):
"""Initial entry point for sorting records, acts like a dispatcher.
(i) sort_field is in the bsrMETHOD, and thus, the BibSort has sorted the data for this field, so we can use the cache;
(ii)sort_field is not in bsrMETHOD, and thus, the cache does not contain any information regarding this sorting method"""
_ = gettext_set_language(ln)
#we should return sorted records up to irec_max(exclusive)
dummy, irec_max = get_interval_for_records_to_sort(len(recIDs), jrec, rg)
#calculate the min index on the reverted list
index_min = max(len(recIDs) - irec_max, 0) #just to be sure that the min index is not negative
#bibsort does not handle sort_pattern for now, use bibxxx
if sort_pattern:
return sort_records_bibxxx(req, recIDs, None, sort_field, sort_order, sort_pattern, verbose, of, ln, rg, jrec)
use_sorting_buckets = True
if not CFG_BIBSORT_BUCKETS or not sorting_methods: #ignore the use of buckets, use old fashion sorting
use_sorting_buckets = False
if not sort_field:
if use_sorting_buckets:
return sort_records_bibsort(req, recIDs, 'latest first', sort_field, sort_order, verbose, of, ln, rg, jrec)
else:
return recIDs[index_min:]
sort_fields = string.split(sort_field, ",")
if len(sort_fields) == 1:
# we have only one sorting_field, check if it is treated by BibSort
for sort_method in sorting_methods:
definition = sorting_methods[sort_method]
if use_sorting_buckets and \
((definition.startswith('FIELD') and \
definition.replace('FIELD:','').strip().lower() == string.lower(sort_fields[0])) or \
sort_method == sort_fields[0]):
#use BibSort
return sort_records_bibsort(req, recIDs, sort_method, sort_field, sort_order, verbose, of, ln, rg, jrec)
#deduce sorting MARC tag out of the 'sort_field' argument:
tags, error_field = get_tags_from_sort_fields(sort_fields)
if error_field:
if use_sorting_buckets:
return sort_records_bibsort(req, recIDs, 'latest first', sort_field, sort_order, verbose, of, ln, rg, jrec)
else:
if of.startswith('h'):
write_warning(_("Sorry, %s does not seem to be a valid sort option. The records will not be sorted.") % cgi.escape(error_field), "Error", req=req)
return recIDs[index_min:]
if tags:
for sort_method in sorting_methods:
definition = sorting_methods[sort_method]
if definition.startswith('MARC') \
and definition.replace('MARC:','').strip().split(',') == tags \
and use_sorting_buckets:
#this list of tags have a designated method in BibSort, so use it
return sort_records_bibsort(req, recIDs, sort_method, sort_field, sort_order, verbose, of, ln, rg, jrec)
#we do not have this sort_field in BibSort tables -> do the old fashion sorting
return sort_records_bibxxx(req, recIDs, tags, sort_field, sort_order, sort_pattern, verbose, of, ln, rg, jrec)
return recIDs[index_min:]
def sort_records_bibsort(req, recIDs, sort_method, sort_field='', sort_order='d', verbose=0, of='hb', ln=CFG_SITE_LANG, rg=None, jrec=None, sort_or_rank = 's'):
"""This function orders the recIDs list, based on a sorting method(sort_field) using the BibSortDataCacher for speed"""
_ = gettext_set_language(ln)
#sanity check
if sort_method not in sorting_methods:
if sort_or_rank == 'r':
return rank_records_bibrank(sort_method, 0, recIDs, None, verbose)
else:
return sort_records_bibxxx(req, recIDs, None, sort_field, sort_order, '', verbose, of, ln, rg, jrec)
if verbose >= 3 and of.startswith('h'):
write_warning("Sorting (using BibSort cache) by method %s (definition %s)." \
% (cgi.escape(repr(sort_method)), cgi.escape(repr(sorting_methods[sort_method]))), req=req)
#we should return sorted records up to irec_max(exclusive)
dummy, irec_max = get_interval_for_records_to_sort(len(recIDs), jrec, rg)
solution = intbitset([])
input_recids = intbitset(recIDs)
cache_sorted_data[sort_method].recreate_cache_if_needed()
sort_cache = cache_sorted_data[sort_method].cache
bucket_numbers = sort_cache['bucket_data'].keys()
#check if all buckets have been constructed
if len(bucket_numbers) != CFG_BIBSORT_BUCKETS:
if verbose > 3 and of.startswith('h'):
write_warning("Not all buckets have been constructed.. switching to old fashion sorting.", req=req)
if sort_or_rank == 'r':
return rank_records_bibrank(sort_method, 0, recIDs, None, verbose)
else:
return sort_records_bibxxx(req, recIDs, None, sort_field, sort_order, '', verbose, of, ln, rg, jrec)
if sort_order == 'd':
bucket_numbers.reverse()
for bucket_no in bucket_numbers:
solution.union_update(input_recids & sort_cache['bucket_data'][bucket_no])
if len(solution) >= irec_max:
break
dict_solution = {}
missing_records = []
for recid in solution:
try:
dict_solution[recid] = sort_cache['data_dict_ordered'][recid]
except KeyError:
#recid is in buckets, but not in the bsrMETHODDATA,
#maybe because the value has been deleted, but the change has not yet been propagated to the buckets
missing_records.append(recid)
#check if there are recids that are not in any bucket -> to be added at the end/top, ordered by insertion date
if len(solution) < irec_max:
#some records have not been yet inserted in the bibsort structures
#or, some records have no value for the sort_method
missing_records = sorted(missing_records + list(input_recids.difference(solution)))
#the records need to be sorted in reverse order for the print record function
#the return statement should be equivalent with the following statements
#(these are clearer, but less efficient, since they revert the same list twice)
#sorted_solution = (missing_records + sorted(dict_solution, key=dict_solution.__getitem__, reverse=sort_order=='d'))[:irec_max]
#sorted_solution.reverse()
#return sorted_solution
if sort_method.strip().lower().startswith('latest') and sort_order == 'd':
# if we want to sort the records on their insertion date, add the mission records at the top
solution = sorted(dict_solution, key=dict_solution.__getitem__, reverse=sort_order=='a') + missing_records
else:
solution = missing_records + sorted(dict_solution, key=dict_solution.__getitem__, reverse=sort_order=='a')
#calculate the min index on the reverted list
index_min = max(len(solution) - irec_max, 0) #just to be sure that the min index is not negative
#return all the records up to irec_max, but on the reverted list
if sort_or_rank == 'r':
# we need the recids, with values
return (solution[index_min:], [dict_solution.get(record, 0) for record in solution[index_min:]])
else:
return solution[index_min:]
def sort_records_bibxxx(req, recIDs, tags, sort_field='', sort_order='d', sort_pattern='', verbose=0, of='hb', ln=CFG_SITE_LANG, rg=None, jrec=None):
"""OLD FASHION SORTING WITH NO CACHE, for sort fields that are not run in BibSort
Sort records in 'recIDs' list according sort field 'sort_field' in order 'sort_order'.
If more than one instance of 'sort_field' is found for a given record, try to choose that that is given by
'sort pattern', for example "sort by report number that starts by CERN-PS".
Note that 'sort_field' can be field code like 'author' or MARC tag like '100__a' directly."""
_ = gettext_set_language(ln)
#we should return sorted records up to irec_max(exclusive)
dummy, irec_max = get_interval_for_records_to_sort(len(recIDs), jrec, rg)
#calculate the min index on the reverted list
index_min = max(len(recIDs) - irec_max, 0) #just to be sure that the min index is not negative
## check arguments:
if not sort_field:
return recIDs[index_min:]
if len(recIDs) > CFG_WEBSEARCH_NB_RECORDS_TO_SORT:
if of.startswith('h'):
write_warning(_("Sorry, sorting is allowed on sets of up to %d records only. Using default sort order.") % CFG_WEBSEARCH_NB_RECORDS_TO_SORT, "Warning", req=req)
return recIDs[index_min:]
recIDs_dict = {}
recIDs_out = []
if not tags:
# tags have not been camputed yet
sort_fields = string.split(sort_field, ",")
tags, error_field = get_tags_from_sort_fields(sort_fields)
if error_field:
if of.startswith('h'):
write_warning(_("Sorry, %s does not seem to be a valid sort option. The records will not be sorted.") % cgi.escape(error_field), "Error", req=req)
return recIDs[index_min:]
if verbose >= 3 and of.startswith('h'):
write_warning("Sorting by tags %s." % cgi.escape(repr(tags)), req=req)
if sort_pattern:
write_warning("Sorting preferentially by %s." % cgi.escape(sort_pattern), req=req)
## check if we have sorting tag defined:
if tags:
# fetch the necessary field values:
for recID in recIDs:
val = "" # will hold value for recID according to which sort
vals = [] # will hold all values found in sorting tag for recID
for tag in tags:
if CFG_CERN_SITE and tag == '773__c':
# CERN hack: journal sorting
# 773__c contains page numbers, e.g. 3-13, and we want to sort by 3, and numerically:
vals.extend(["%050s" % x.split("-", 1)[0] for x in get_fieldvalues(recID, tag)])
else:
vals.extend(get_fieldvalues(recID, tag))
if sort_pattern:
# try to pick that tag value that corresponds to sort pattern
bingo = 0
for v in vals:
if v.lower().startswith(sort_pattern.lower()): # bingo!
bingo = 1
val = v
break
if not bingo: # sort_pattern not present, so add other vals after spaces
val = sort_pattern + " " + string.join(vals)
else:
# no sort pattern defined, so join them all together
val = string.join(vals)
val = strip_accents(val.lower()) # sort values regardless of accents and case
if recIDs_dict.has_key(val):
recIDs_dict[val].append(recID)
else:
recIDs_dict[val] = [recID]
# sort them:
recIDs_dict_keys = recIDs_dict.keys()
recIDs_dict_keys.sort()
# now that keys are sorted, create output array:
for k in recIDs_dict_keys:
for s in recIDs_dict[k]:
recIDs_out.append(s)
# ascending or descending?
if sort_order == 'a':
recIDs_out.reverse()
# okay, we are done
# return only up to the maximum that we need to sort
if len(recIDs_out) != len(recIDs):
dummy, irec_max = get_interval_for_records_to_sort(len(recIDs_out), jrec, rg)
index_min = max(len(recIDs_out) - irec_max, 0) #just to be sure that the min index is not negative
return recIDs_out[index_min:]
else:
# good, no sort needed
return recIDs[index_min:]
def get_interval_for_records_to_sort(nb_found, jrec=None, rg=None):
"""calculates in which interval should the sorted records be
a value of 'rg=-9999' means to print all records: to be used with care."""
if not jrec:
jrec = 1
if not rg:
#return all
return jrec-1, nb_found
if rg == -9999: # print all records
rg = nb_found
else:
rg = abs(rg)
if jrec < 1: # sanity checks
jrec = 1
if jrec > nb_found:
jrec = max(nb_found-rg+1, 1)
# will sort records from irec_min to irec_max excluded
irec_min = jrec - 1
irec_max = irec_min + rg
if irec_min < 0:
irec_min = 0
if irec_max > nb_found:
irec_max = nb_found
return irec_min, irec_max
def print_records(req, recIDs, jrec=1, rg=CFG_WEBSEARCH_DEF_RECORDS_IN_GROUPS, format='hb', ot='', ln=CFG_SITE_LANG,
relevances=[], relevances_prologue="(", relevances_epilogue="%%)",
decompress=zlib.decompress, search_pattern='', print_records_prologue_p=True,
print_records_epilogue_p=True, verbose=0, tab='', sf='', so='d', sp='',
rm='', em=''):
"""
Prints list of records 'recIDs' formatted according to 'format' in
groups of 'rg' starting from 'jrec'.
Assumes that the input list 'recIDs' is sorted in reverse order,
so it counts records from tail to head.
A value of 'rg=-9999' means to print all records: to be used with care.
Print also list of RELEVANCES for each record (if defined), in
between RELEVANCE_PROLOGUE and RELEVANCE_EPILOGUE.
Print prologue and/or epilogue specific to 'format' if
'print_records_prologue_p' and/or print_records_epilogue_p' are
True.
'sf' is sort field and 'rm' is ranking method that are passed here
only for proper linking purposes: e.g. when a certain ranking
method or a certain sort field was selected, keep it selected in
any dynamic search links that may be printed.
"""
if em != "" and EM_REPOSITORY["body"] not in em:
return
# load the right message language
_ = gettext_set_language(ln)
# sanity checking:
if req is None:
return
# get user_info (for formatting based on user)
if isinstance(req, cStringIO.OutputType):
user_info = {}
else:
user_info = collect_user_info(req)
if len(recIDs):
nb_found = len(recIDs)
if rg == -9999: # print all records
rg = nb_found
else:
rg = abs(rg)
if jrec < 1: # sanity checks
jrec = 1
if jrec > nb_found:
jrec = max(nb_found-rg+1, 1)
# will print records from irec_max to irec_min excluded:
irec_max = nb_found - jrec
irec_min = nb_found - jrec - rg
if irec_min < 0:
irec_min = -1
if irec_max >= nb_found:
irec_max = nb_found - 1
#req.write("%s:%d-%d" % (recIDs, irec_min, irec_max))
if format.startswith('x'):
# print header if needed
if print_records_prologue_p:
print_records_prologue(req, format)
# print records
recIDs_to_print = [recIDs[x] for x in range(irec_max, irec_min, -1)]
if ot:
# asked to print some filtered fields only, so call print_record() on the fly:
for irec in range(irec_max, irec_min, -1):
x = print_record(recIDs[irec], format, ot, ln, search_pattern=search_pattern,
user_info=user_info, verbose=verbose, sf=sf, so=so, sp=sp, rm=rm)
req.write(x)
if x:
req.write('\n')
else:
format_records(recIDs_to_print,
format,
ln=ln,
search_pattern=search_pattern,
record_separator="\n",
user_info=user_info,
req=req)
# print footer if needed
if print_records_epilogue_p:
print_records_epilogue(req, format)
elif format.startswith('t') or str(format[0:3]).isdigit():
# we are doing plain text output:
for irec in range(irec_max, irec_min, -1):
x = print_record(recIDs[irec], format, ot, ln, search_pattern=search_pattern,
user_info=user_info, verbose=verbose, sf=sf, so=so, sp=sp, rm=rm)
req.write(x)
if x:
req.write('\n')
elif format == 'excel':
recIDs_to_print = [recIDs[x] for x in range(irec_max, irec_min, -1)]
create_excel(recIDs=recIDs_to_print, req=req, ln=ln, ot=ot, user_info=user_info)
else:
# we are doing HTML output:
if format == 'hp' or format.startswith("hb_") or format.startswith("hd_"):
# portfolio and on-the-fly formats:
for irec in range(irec_max, irec_min, -1):
req.write(print_record(recIDs[irec], format, ot, ln, search_pattern=search_pattern,
user_info=user_info, verbose=verbose, sf=sf, so=so, sp=sp, rm=rm))
elif format.startswith("hb"):
# HTML brief format:
display_add_to_basket = True
if user_info:
if user_info['email'] == 'guest':
if CFG_ACCESS_CONTROL_LEVEL_ACCOUNTS > 4:
display_add_to_basket = False
else:
if not user_info['precached_usebaskets']:
display_add_to_basket = False
if em != "" and EM_REPOSITORY["basket"] not in em:
display_add_to_basket = False
req.write(websearch_templates.tmpl_record_format_htmlbrief_header(
ln = ln))
for irec in range(irec_max, irec_min, -1):
row_number = jrec+irec_max-irec
recid = recIDs[irec]
if relevances and relevances[irec]:
relevance = relevances[irec]
else:
relevance = ''
record = print_record(recIDs[irec], format, ot, ln, search_pattern=search_pattern,
user_info=user_info, verbose=verbose, sf=sf, so=so, sp=sp, rm=rm)
req.write(websearch_templates.tmpl_record_format_htmlbrief_body(
ln = ln,
recid = recid,
row_number = row_number,
relevance = relevance,
record = record,
relevances_prologue = relevances_prologue,
relevances_epilogue = relevances_epilogue,
display_add_to_basket = display_add_to_basket
))
req.write(websearch_templates.tmpl_record_format_htmlbrief_footer(
ln = ln,
display_add_to_basket = display_add_to_basket))
elif format.startswith("hd"):
# HTML detailed format:
for irec in range(irec_max, irec_min, -1):
if record_exists(recIDs[irec]) == -1:
write_warning(_("The record has been deleted."), req=req)
merged_recid = get_merged_recid(recIDs[irec])
if merged_recid:
write_warning(_("The record %d replaces it." % merged_recid), req=req)
continue
unordered_tabs = get_detailed_page_tabs(get_colID(guess_primary_collection_of_a_record(recIDs[irec])),
recIDs[irec], ln=ln)
ordered_tabs_id = [(tab_id, values['order']) for (tab_id, values) in unordered_tabs.iteritems()]
ordered_tabs_id.sort(lambda x, y: cmp(x[1], y[1]))
link_ln = ''
if ln != CFG_SITE_LANG:
link_ln = '?ln=%s' % ln
recid = recIDs[irec]
recid_to_display = recid # Record ID used to build the URL.
if CFG_WEBSEARCH_USE_ALEPH_SYSNOS:
try:
recid_to_display = get_fieldvalues(recid,
CFG_BIBUPLOAD_EXTERNAL_SYSNO_TAG)[0]
except IndexError:
# No external sysno is available, keep using
# internal recid.
pass
tabs = [(unordered_tabs[tab_id]['label'], \
'%s/%s/%s/%s%s' % (CFG_SITE_URL, CFG_SITE_RECORD, recid_to_display, tab_id, link_ln), \
tab_id == tab,
unordered_tabs[tab_id]['enabled']) \
for (tab_id, order) in ordered_tabs_id
if unordered_tabs[tab_id]['visible'] == True]
tabs_counts = get_detailed_page_tabs_counts(recid)
citedbynum = tabs_counts['Citations']
references = tabs_counts['References']
discussions = tabs_counts['Discussions']
# load content
if tab == 'usage':
req.write(webstyle_templates.detailed_record_container_top(recIDs[irec],
tabs,
ln,
citationnum=citedbynum,
referencenum=references,
discussionnum=discussions))
r = calculate_reading_similarity_list(recIDs[irec], "downloads")
downloadsimilarity = None
downloadhistory = None
#if r:
# downloadsimilarity = r
if CFG_BIBRANK_SHOW_DOWNLOAD_GRAPHS:
downloadhistory = create_download_history_graph_and_box(recIDs[irec], ln)
r = calculate_reading_similarity_list(recIDs[irec], "pageviews")
viewsimilarity = None
if r: viewsimilarity = r
content = websearch_templates.tmpl_detailed_record_statistics(recIDs[irec],
ln,
downloadsimilarity=downloadsimilarity,
downloadhistory=downloadhistory,
viewsimilarity=viewsimilarity)
req.write(content)
req.write(webstyle_templates.detailed_record_container_bottom(recIDs[irec],
tabs,
ln))
elif tab == 'citations':
recid = recIDs[irec]
req.write(webstyle_templates.detailed_record_container_top(recid,
tabs,
ln,
citationnum=citedbynum,
referencenum=references,
discussionnum=discussions))
req.write(websearch_templates.tmpl_detailed_record_citations_prologue(recid, ln))
# Citing
citinglist = calculate_cited_by_list(recid)
req.write(websearch_templates.tmpl_detailed_record_citations_citing_list(recid,
ln,
citinglist,
sf=sf,
so=so,
sp=sp,
rm=rm))
# Self-cited
selfcited = get_self_cited_by(recid)
req.write(websearch_templates.tmpl_detailed_record_citations_self_cited(recid,
ln, selfcited=selfcited, citinglist=citinglist))
# Co-cited
s = calculate_co_cited_with_list(recid)
cociting = None
if s:
cociting = s
req.write(websearch_templates.tmpl_detailed_record_citations_co_citing(recid,
ln,
cociting=cociting))
# Citation history, if needed
citationhistory = None
if citinglist:
citationhistory = create_citation_history_graph_and_box(recid, ln)
#debug
if verbose > 3:
write_warning("Citation graph debug: " + \
str(len(citationhistory)), req=req)
req.write(websearch_templates.tmpl_detailed_record_citations_citation_history(recid, ln, citationhistory))
req.write(websearch_templates.tmpl_detailed_record_citations_epilogue(recid, ln))
req.write(webstyle_templates.detailed_record_container_bottom(recid,
tabs,
ln))
elif tab == 'references':
req.write(webstyle_templates.detailed_record_container_top(recIDs[irec],
tabs,
ln,
citationnum=citedbynum,
referencenum=references,
discussionnum=discussions))
req.write(format_record(recIDs[irec], 'HDREF', ln=ln, user_info=user_info, verbose=verbose))
req.write(webstyle_templates.detailed_record_container_bottom(recIDs[irec],
tabs,
ln))
elif tab == 'keywords':
from invenio.bibclassify_webinterface import \
record_get_keywords, write_keywords_body, \
generate_keywords
from invenio.webinterface_handler import wash_urlargd
form = req.form
argd = wash_urlargd(form, {
'generate': (str, 'no'),
'sort': (str, 'occurrences'),
'type': (str, 'tagcloud'),
'numbering': (str, 'off'),
})
recid = recIDs[irec]
req.write(webstyle_templates.detailed_record_container_top(recid,
tabs,
ln))
content = websearch_templates.tmpl_record_plots(recID=recid,
ln=ln)
req.write(content)
req.write(webstyle_templates.detailed_record_container_bottom(recid,
tabs,
ln))
req.write(webstyle_templates.detailed_record_container_top(recid,
tabs, ln, citationnum=citedbynum, referencenum=references))
if argd['generate'] == 'yes':
# The user asked to generate the keywords.
keywords = generate_keywords(req, recid, argd)
else:
# Get the keywords contained in the MARC.
keywords = record_get_keywords(recid, argd)
if argd['sort'] == 'related' and not keywords:
req.write('You may want to run BibIndex.')
# Output the keywords or the generate button.
write_keywords_body(keywords, req, recid, argd)
req.write(webstyle_templates.detailed_record_container_bottom(recid,
tabs, ln))
elif tab == 'plots':
req.write(webstyle_templates.detailed_record_container_top(recIDs[irec],
tabs,
ln))
content = websearch_templates.tmpl_record_plots(recID=recIDs[irec],
ln=ln)
req.write(content)
req.write(webstyle_templates.detailed_record_container_bottom(recIDs[irec],
tabs,
ln))
else:
# Metadata tab
req.write(webstyle_templates.detailed_record_container_top(recIDs[irec],
tabs,
ln,
show_short_rec_p=False,
citationnum=citedbynum, referencenum=references,
discussionnum=discussions))
creationdate = None
modificationdate = None
if record_exists(recIDs[irec]) == 1:
creationdate = get_creation_date(recIDs[irec])
modificationdate = get_modification_date(recIDs[irec])
content = print_record(recIDs[irec], format, ot, ln,
search_pattern=search_pattern,
user_info=user_info, verbose=verbose,
sf=sf, so=so, sp=sp, rm=rm)
content = websearch_templates.tmpl_detailed_record_metadata(
recID = recIDs[irec],
ln = ln,
format = format,
creationdate = creationdate,
modificationdate = modificationdate,
content = content)
# display of the next-hit/previous-hit/back-to-search links
# on the detailed record pages
content += websearch_templates.tmpl_display_back_to_search(req,
recIDs[irec],
ln)
req.write(content)
req.write(webstyle_templates.detailed_record_container_bottom(recIDs[irec],
tabs,
ln,
creationdate=creationdate,
modificationdate=modificationdate,
show_short_rec_p=False))
if len(tabs) > 0:
# Add the mini box at bottom of the page
if CFG_WEBCOMMENT_ALLOW_REVIEWS:
from invenio.modules.comments.api import get_mini_reviews
reviews = get_mini_reviews(recid = recIDs[irec], ln=ln)
else:
reviews = ''
actions = format_record(recIDs[irec], 'HDACT', ln=ln, user_info=user_info, verbose=verbose)
files = format_record(recIDs[irec], 'HDFILE', ln=ln, user_info=user_info, verbose=verbose)
req.write(webstyle_templates.detailed_record_mini_panel(recIDs[irec],
ln,
format,
files=files,
reviews=reviews,
actions=actions))
else:
# Other formats
for irec in range(irec_max, irec_min, -1):
req.write(print_record(recIDs[irec], format, ot, ln,
search_pattern=search_pattern,
user_info=user_info, verbose=verbose,
sf=sf, so=so, sp=sp, rm=rm))
else:
write_warning(_("Use different search terms."), req=req)
def print_records_prologue(req, format, cc=None):
"""
Print the appropriate prologue for list of records in the given
format.
"""
prologue = "" # no prologue needed for HTML or Text formats
if format.startswith('xm'):
prologue = websearch_templates.tmpl_xml_marc_prologue()
elif format.startswith('xn'):
prologue = websearch_templates.tmpl_xml_nlm_prologue()
elif format.startswith('xw'):
prologue = websearch_templates.tmpl_xml_refworks_prologue()
elif format.startswith('xr'):
prologue = websearch_templates.tmpl_xml_rss_prologue(cc=cc)
elif format.startswith('xe8x'):
prologue = websearch_templates.tmpl_xml_endnote_8x_prologue()
elif format.startswith('xe'):
prologue = websearch_templates.tmpl_xml_endnote_prologue()
elif format.startswith('xo'):
prologue = websearch_templates.tmpl_xml_mods_prologue()
elif format.startswith('xp'):
prologue = websearch_templates.tmpl_xml_podcast_prologue(cc=cc)
elif format.startswith('x'):
prologue = websearch_templates.tmpl_xml_default_prologue()
req.write(prologue)
def print_records_epilogue(req, format):
"""
Print the appropriate epilogue for list of records in the given
format.
"""
epilogue = "" # no epilogue needed for HTML or Text formats
if format.startswith('xm'):
epilogue = websearch_templates.tmpl_xml_marc_epilogue()
elif format.startswith('xn'):
epilogue = websearch_templates.tmpl_xml_nlm_epilogue()
elif format.startswith('xw'):
epilogue = websearch_templates.tmpl_xml_refworks_epilogue()
elif format.startswith('xr'):
epilogue = websearch_templates.tmpl_xml_rss_epilogue()
elif format.startswith('xe8x'):
epilogue = websearch_templates.tmpl_xml_endnote_8x_epilogue()
elif format.startswith('xe'):
epilogue = websearch_templates.tmpl_xml_endnote_epilogue()
elif format.startswith('xo'):
epilogue = websearch_templates.tmpl_xml_mods_epilogue()
elif format.startswith('xp'):
epilogue = websearch_templates.tmpl_xml_podcast_epilogue()
elif format.startswith('x'):
epilogue = websearch_templates.tmpl_xml_default_epilogue()
req.write(epilogue)
def get_record(recid):
"""Directly the record object corresponding to the recid."""
if CFG_BIBUPLOAD_SERIALIZE_RECORD_STRUCTURE:
value = run_sql("SELECT value FROM bibfmt WHERE id_bibrec=%s AND FORMAT='recstruct'", (recid, ))
if value:
try:
return deserialize_via_marshal(value[0][0])
except:
### In case of corruption, let's rebuild it!
pass
return create_record(print_record(recid, 'xm'))[0]
def print_record(recID, format='hb', ot='', ln=CFG_SITE_LANG, decompress=zlib.decompress,
search_pattern=None, user_info=None, verbose=0, sf='', so='d',
sp='', rm='', brief_links=True):
"""
Prints record 'recID' formatted according to 'format'.
'sf' is sort field and 'rm' is ranking method that are passed here
only for proper linking purposes: e.g. when a certain ranking
method or a certain sort field was selected, keep it selected in
any dynamic search links that may be printed.
"""
if format == 'recstruct':
return get_record(recID)
_ = gettext_set_language(ln)
display_claim_this_paper = False
try:
display_claim_this_paper = user_info["precached_viewclaimlink"]
except (KeyError, TypeError):
display_claim_this_paper = False
#check from user information if the user has the right to see hidden fields/tags in the
#records as well
can_see_hidden = False
if user_info:
can_see_hidden = user_info.get('precached_canseehiddenmarctags', False)
out = ""
# sanity check:
record_exist_p = record_exists(recID)
if record_exist_p == 0: # doesn't exist
return out
# New Python BibFormat procedure for formatting
# Old procedure follows further below
# We must still check some special formats, but these
# should disappear when BibFormat improves.
if not (CFG_BIBFORMAT_USE_OLD_BIBFORMAT \
or format.lower().startswith('t') \
or format.lower().startswith('hm') \
or str(format[0:3]).isdigit() \
or ot):
# Unspecified format is hd
if format == '':
format = 'hd'
if record_exist_p == -1 and get_output_format_content_type(format) == 'text/html':
# HTML output displays a default value for deleted records.
# Other format have to deal with it.
out += _("The record has been deleted.")
# was record deleted-but-merged ?
merged_recid = get_merged_recid(recID)
if merged_recid:
out += ' ' + _("The record %d replaces it." % merged_recid)
else:
out += call_bibformat(recID, format, ln, search_pattern=search_pattern,
user_info=user_info, verbose=verbose)
# at the end of HTML brief mode, print the "Detailed record" functionality:
if brief_links and format.lower().startswith('hb') and \
format.lower() != 'hb_p':
out += websearch_templates.tmpl_print_record_brief_links(ln=ln,
recID=recID,
sf=sf,
so=so,
sp=sp,
rm=rm,
display_claim_link=display_claim_this_paper)
return out
# Old PHP BibFormat procedure for formatting
# print record opening tags, if needed:
if format == "marcxml" or format == "oai_dc":
out += " <record>\n"
out += " <header>\n"
for oai_id in get_fieldvalues(recID, CFG_OAI_ID_FIELD):
out += " <identifier>%s</identifier>\n" % oai_id
out += " <datestamp>%s</datestamp>\n" % get_modification_date(recID)
out += " </header>\n"
out += " <metadata>\n"
if format.startswith("xm") or format == "marcxml":
# look for detailed format existence:
query = "SELECT value FROM bibfmt WHERE id_bibrec=%s AND format=%s"
res = run_sql(query, (recID, format), 1)
if res and record_exist_p == 1 and not ot:
# record 'recID' is formatted in 'format', and we are not
# asking for field-filtered output; so print it:
out += "%s" % decompress(res[0][0])
elif ot:
# field-filtered output was asked for; print only some fields
if not can_see_hidden:
ot = list(set(ot) - set(CFG_BIBFORMAT_HIDDEN_TAGS))
out += record_xml_output(get_record(recID), ot)
else:
# record 'recID' is not formatted in 'format' or we ask
# for field-filtered output -- they are not in "bibfmt"
# table; so fetch all the data from "bibXXx" tables:
if format == "marcxml":
out += """ <record xmlns="http://www.loc.gov/MARC21/slim">\n"""
out += " <controlfield tag=\"001\">%d</controlfield>\n" % int(recID)
elif format.startswith("xm"):
out += """ <record>\n"""
out += " <controlfield tag=\"001\">%d</controlfield>\n" % int(recID)
if record_exist_p == -1:
# deleted record, so display only OAI ID and 980:
oai_ids = get_fieldvalues(recID, CFG_OAI_ID_FIELD)
if oai_ids:
out += "<datafield tag=\"%s\" ind1=\"%s\" ind2=\"%s\"><subfield code=\"%s\">%s</subfield></datafield>\n" % \
(CFG_OAI_ID_FIELD[0:3], CFG_OAI_ID_FIELD[3:4], CFG_OAI_ID_FIELD[4:5], CFG_OAI_ID_FIELD[5:6], oai_ids[0])
out += "<datafield tag=\"980\" ind1=\"\" ind2=\"\"><subfield code=\"c\">DELETED</subfield></datafield>\n"
else:
# controlfields
query = "SELECT b.tag,b.value,bb.field_number FROM bib00x AS b, bibrec_bib00x AS bb "\
"WHERE bb.id_bibrec=%s AND b.id=bb.id_bibxxx AND b.tag LIKE '00%%' "\
"ORDER BY bb.field_number, b.tag ASC"
res = run_sql(query, (recID, ))
for row in res:
field, value = row[0], row[1]
value = encode_for_xml(value)
out += """ <controlfield tag="%s">%s</controlfield>\n""" % \
(encode_for_xml(field[0:3]), value)
# datafields
i = 1 # Do not process bib00x and bibrec_bib00x, as
# they are controlfields. So start at bib01x and
# bibrec_bib00x (and set i = 0 at the end of
# first loop)
for digit1 in range(0, 10):
for digit2 in range(i, 10):
bx = "bib%d%dx" % (digit1, digit2)
bibx = "bibrec_bib%d%dx" % (digit1, digit2)
query = "SELECT b.tag,b.value,bb.field_number FROM %s AS b, %s AS bb "\
"WHERE bb.id_bibrec=%%s AND b.id=bb.id_bibxxx AND b.tag LIKE %%s"\
"ORDER BY bb.field_number, b.tag ASC" % (bx, bibx)
res = run_sql(query, (recID, str(digit1)+str(digit2)+'%'))
field_number_old = -999
field_old = ""
for row in res:
field, value, field_number = row[0], row[1], row[2]
ind1, ind2 = field[3], field[4]
if ind1 == "_" or ind1 == "":
ind1 = " "
if ind2 == "_" or ind2 == "":
ind2 = " "
# print field tag, unless hidden
printme = True
if not can_see_hidden:
for htag in CFG_BIBFORMAT_HIDDEN_TAGS:
ltag = len(htag)
samelenfield = field[0:ltag]
if samelenfield == htag:
printme = False
if printme:
if field_number != field_number_old or field[:-1] != field_old[:-1]:
if field_number_old != -999:
out += """ </datafield>\n"""
out += """ <datafield tag="%s" ind1="%s" ind2="%s">\n""" % \
(encode_for_xml(field[0:3]), encode_for_xml(ind1), encode_for_xml(ind2))
field_number_old = field_number
field_old = field
# print subfield value
value = encode_for_xml(value)
out += """ <subfield code="%s">%s</subfield>\n""" % \
(encode_for_xml(field[-1:]), value)
# all fields/subfields printed in this run, so close the tag:
if field_number_old != -999:
out += """ </datafield>\n"""
i = 0 # Next loop should start looking at bib%0 and bibrec_bib00x
# we are at the end of printing the record:
out += " </record>\n"
elif format == "xd" or format == "oai_dc":
# XML Dublin Core format, possibly OAI -- select only some bibXXx fields:
out += """ <dc xmlns="http://purl.org/dc/elements/1.1/"
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://purl.org/dc/elements/1.1/
http://www.openarchives.org/OAI/1.1/dc.xsd">\n"""
if record_exist_p == -1:
out += ""
else:
for f in get_fieldvalues(recID, "041__a"):
out += " <language>%s</language>\n" % f
for f in get_fieldvalues(recID, "100__a"):
out += " <creator>%s</creator>\n" % encode_for_xml(f)
for f in get_fieldvalues(recID, "700__a"):
out += " <creator>%s</creator>\n" % encode_for_xml(f)
for f in get_fieldvalues(recID, "245__a"):
out += " <title>%s</title>\n" % encode_for_xml(f)
for f in get_fieldvalues(recID, "65017a"):
out += " <subject>%s</subject>\n" % encode_for_xml(f)
for f in get_fieldvalues(recID, "8564_u"):
if f.split('.') == 'png':
continue
out += " <identifier>%s</identifier>\n" % encode_for_xml(f)
for f in get_fieldvalues(recID, "520__a"):
out += " <description>%s</description>\n" % encode_for_xml(f)
out += " <date>%s</date>\n" % get_creation_date(recID)
out += " </dc>\n"
elif len(format) == 6 and str(format[0:3]).isdigit():
# user has asked to print some fields only
if format == "001":
out += "<!--%s-begin-->%s<!--%s-end-->\n" % (format, recID, format)
else:
vals = get_fieldvalues(recID, format)
for val in vals:
out += "<!--%s-begin-->%s<!--%s-end-->\n" % (format, val, format)
elif format.startswith('t'):
## user directly asked for some tags to be displayed only
if record_exist_p == -1:
out += get_fieldvalues_alephseq_like(recID, ["001", CFG_OAI_ID_FIELD, "980"], can_see_hidden)
else:
out += get_fieldvalues_alephseq_like(recID, ot, can_see_hidden)
elif format == "hm":
if record_exist_p == -1:
out += "\n<pre>" + cgi.escape(get_fieldvalues_alephseq_like(recID, ["001", CFG_OAI_ID_FIELD, "980"], can_see_hidden)) + "</pre>"
else:
out += "\n<pre>" + cgi.escape(get_fieldvalues_alephseq_like(recID, ot, can_see_hidden)) + "</pre>"
elif format.startswith("h") and ot:
## user directly asked for some tags to be displayed only
if record_exist_p == -1:
out += "\n<pre>" + get_fieldvalues_alephseq_like(recID, ["001", CFG_OAI_ID_FIELD, "980"], can_see_hidden) + "</pre>"
else:
out += "\n<pre>" + get_fieldvalues_alephseq_like(recID, ot, can_see_hidden) + "</pre>"
elif format == "hd":
# HTML detailed format
if record_exist_p == -1:
out += _("The record has been deleted.")
else:
# look for detailed format existence:
query = "SELECT value FROM bibfmt WHERE id_bibrec=%s AND format=%s"
res = run_sql(query, (recID, format), 1)
if res:
# record 'recID' is formatted in 'format', so print it
out += "%s" % decompress(res[0][0])
else:
# record 'recID' is not formatted in 'format', so try to call BibFormat on the fly or use default format:
out_record_in_format = call_bibformat(recID, format, ln, search_pattern=search_pattern,
user_info=user_info, verbose=verbose)
if out_record_in_format:
out += out_record_in_format
else:
out += websearch_templates.tmpl_print_record_detailed(
ln = ln,
recID = recID,
)
elif format.startswith("hb_") or format.startswith("hd_"):
# underscore means that HTML brief/detailed formats should be called on-the-fly; suitable for testing formats
if record_exist_p == -1:
out += _("The record has been deleted.")
else:
out += call_bibformat(recID, format, ln, search_pattern=search_pattern,
user_info=user_info, verbose=verbose)
elif format.startswith("hx"):
# BibTeX format, called on the fly:
if record_exist_p == -1:
out += _("The record has been deleted.")
else:
out += call_bibformat(recID, format, ln, search_pattern=search_pattern,
user_info=user_info, verbose=verbose)
elif format.startswith("hs"):
# for citation/download similarity navigation links:
if record_exist_p == -1:
out += _("The record has been deleted.")
else:
out += '<a href="%s">' % websearch_templates.build_search_url(recid=recID, ln=ln)
# firstly, title:
titles = get_fieldvalues(recID, "245__a")
if titles:
for title in titles:
out += "<strong>%s</strong>" % title
else:
# usual title not found, try conference title:
titles = get_fieldvalues(recID, "111__a")
if titles:
for title in titles:
out += "<strong>%s</strong>" % title
else:
# just print record ID:
out += "<strong>%s %d</strong>" % (get_field_i18nname("record ID", ln, False), recID)
out += "</a>"
# secondly, authors:
authors = get_fieldvalues(recID, "100__a") + get_fieldvalues(recID, "700__a")
if authors:
out += " - %s" % authors[0]
if len(authors) > 1:
out += " <em>et al</em>"
# thirdly publication info:
publinfos = get_fieldvalues(recID, "773__s")
if not publinfos:
publinfos = get_fieldvalues(recID, "909C4s")
if not publinfos:
publinfos = get_fieldvalues(recID, "037__a")
if not publinfos:
publinfos = get_fieldvalues(recID, "088__a")
if publinfos:
out += " - %s" % publinfos[0]
else:
# fourthly publication year (if not publication info):
years = get_fieldvalues(recID, "773__y")
if not years:
years = get_fieldvalues(recID, "909C4y")
if not years:
years = get_fieldvalues(recID, "260__c")
if years:
out += " (%s)" % years[0]
else:
# HTML brief format by default
if record_exist_p == -1:
out += _("The record has been deleted.")
else:
query = "SELECT value FROM bibfmt WHERE id_bibrec=%s AND format=%s"
res = run_sql(query, (recID, format))
if res:
# record 'recID' is formatted in 'format', so print it
out += "%s" % decompress(res[0][0])
else:
# record 'recID' is not formatted in 'format', so try to call BibFormat on the fly: or use default format:
if CFG_WEBSEARCH_CALL_BIBFORMAT:
out_record_in_format = call_bibformat(recID, format, ln, search_pattern=search_pattern,
user_info=user_info, verbose=verbose)
if out_record_in_format:
out += out_record_in_format
else:
out += websearch_templates.tmpl_print_record_brief(
ln = ln,
recID = recID,
)
else:
out += websearch_templates.tmpl_print_record_brief(
ln = ln,
recID = recID,
)
# at the end of HTML brief mode, print the "Detailed record" functionality:
if format == 'hp' or format.startswith("hb_") or format.startswith("hd_"):
pass # do nothing for portfolio and on-the-fly formats
else:
out += websearch_templates.tmpl_print_record_brief_links(ln=ln,
recID=recID,
sf=sf,
so=so,
sp=sp,
rm=rm,
display_claim_link=display_claim_this_paper)
# print record closing tags, if needed:
if format == "marcxml" or format == "oai_dc":
out += " </metadata>\n"
out += " </record>\n"
return out
def call_bibformat(recID, format="HD", ln=CFG_SITE_LANG, search_pattern=None, user_info=None, verbose=0):
"""
Calls BibFormat and returns formatted record.
BibFormat will decide by itself if old or new BibFormat must be used.
"""
from invenio.modules.formatter.utils import get_pdf_snippets
keywords = []
if search_pattern is not None:
for unit in create_basic_search_units(None, str(search_pattern), None):
bsu_o, bsu_p, bsu_f, bsu_m = unit[0], unit[1], unit[2], unit[3]
if (bsu_o != '-' and bsu_f in [None, 'fulltext']):
if bsu_m == 'a' and bsu_p.startswith('%') and bsu_p.endswith('%'):
# remove leading and training `%' representing partial phrase search
keywords.append(bsu_p[1:-1])
else:
keywords.append(bsu_p)
out = format_record(recID,
of=format,
ln=ln,
search_pattern=keywords,
user_info=user_info,
verbose=verbose)
if CFG_WEBSEARCH_FULLTEXT_SNIPPETS and user_info and \
'fulltext' in user_info['uri'].lower():
# check snippets only if URL contains fulltext
# FIXME: make it work for CLI too, via new function arg
if keywords:
snippets = ''
try:
snippets = get_pdf_snippets(recID, keywords, user_info)
except:
register_exception()
if snippets:
out += snippets
return out
def log_query(hostname, query_args, uid=-1):
"""
Log query into the query and user_query tables.
Return id_query or None in case of problems.
"""
id_query = None
if uid >= 0:
# log the query only if uid is reasonable
res = run_sql("SELECT id FROM query WHERE urlargs=%s", (query_args,), 1)
try:
id_query = res[0][0]
except:
id_query = run_sql("INSERT INTO query (type, urlargs) VALUES ('r', %s)", (query_args,))
if id_query:
run_sql("INSERT INTO user_query (id_user, id_query, hostname, date) VALUES (%s, %s, %s, %s)",
(uid, id_query, hostname,
time.strftime("%Y-%m-%d %H:%M:%S", time.localtime())))
return id_query
def log_query_info(action, p, f, colls, nb_records_found_total=-1):
"""Write some info to the log file for later analysis."""
try:
log = open(CFG_LOGDIR + "/search.log", "a")
log.write(time.strftime("%Y%m%d%H%M%S#", time.localtime()))
log.write(action+"#")
log.write(p+"#")
log.write(f+"#")
for coll in colls[:-1]:
log.write("%s," % coll)
log.write("%s#" % colls[-1])
log.write("%d" % nb_records_found_total)
log.write("\n")
log.close()
except:
pass
return
def clean_dictionary(dictionary, list_of_items):
"""Returns a copy of the dictionary with all the items
in the list_of_items as empty strings"""
out_dictionary = dictionary.copy()
out_dictionary.update((item, '') for item in list_of_items)
return out_dictionary
### CALLABLES
def perform_request_search(req=None, cc=CFG_SITE_NAME, c=None, p="", f="", rg=CFG_WEBSEARCH_DEF_RECORDS_IN_GROUPS, sf="", so="d", sp="", rm="", of="id", ot="", aas=0,
p1="", f1="", m1="", op1="", p2="", f2="", m2="", op2="", p3="", f3="", m3="", sc=0, jrec=0,
recid=-1, recidb=-1, sysno="", id=-1, idb=-1, sysnb="", action="", d1="",
d1y=0, d1m=0, d1d=0, d2="", d2y=0, d2m=0, d2d=0, dt="", verbose=0, ap=0, ln=CFG_SITE_LANG, ec=None, tab="",
wl=0, em=""):
"""Perform search or browse request, without checking for
authentication. Return list of recIDs found, if of=id.
Otherwise create web page.
The arguments are as follows:
req - mod_python Request class instance.
cc - current collection (e.g. "ATLAS"). The collection the
user started to search/browse from.
c - collection list (e.g. ["Theses", "Books"]). The
collections user may have selected/deselected when
starting to search from 'cc'.
p - pattern to search for (e.g. "ellis and muon or kaon").
f - field to search within (e.g. "author").
rg - records in groups of (e.g. "10"). Defines how many hits
per collection in the search results page are
displayed. (Note that `rg' is ignored in case of `of=id'.)
sf - sort field (e.g. "title").
so - sort order ("a"=ascending, "d"=descending).
sp - sort pattern (e.g. "CERN-") -- in case there are more
values in a sort field, this argument tells which one
to prefer
rm - ranking method (e.g. "jif"). Defines whether results
should be ranked by some known ranking method.
of - output format (e.g. "hb"). Usually starting "h" means
HTML output (and "hb" for HTML brief, "hd" for HTML
detailed), "x" means XML output, "t" means plain text
output, "id" means no output at all but to return list
of recIDs found, "intbitset" means to return an intbitset
representation of the recIDs found (no sorting or ranking
will be performed). (Suitable for high-level API.)
ot - output only these MARC tags (e.g. "100,700,909C0b").
Useful if only some fields are to be shown in the
output, e.g. for library to control some fields.
em - output only part of the page.
aas - advanced search ("0" means no, "1" means yes). Whether
search was called from within the advanced search
interface.
p1 - first pattern to search for in the advanced search
interface. Much like 'p'.
f1 - first field to search within in the advanced search
interface. Much like 'f'.
m1 - first matching type in the advanced search interface.
("a" all of the words, "o" any of the words, "e" exact
phrase, "p" partial phrase, "r" regular expression).
op1 - first operator, to join the first and the second unit
in the advanced search interface. ("a" add, "o" or,
"n" not).
p2 - second pattern to search for in the advanced search
interface. Much like 'p'.
f2 - second field to search within in the advanced search
interface. Much like 'f'.
m2 - second matching type in the advanced search interface.
("a" all of the words, "o" any of the words, "e" exact
phrase, "p" partial phrase, "r" regular expression).
op2 - second operator, to join the second and the third unit
in the advanced search interface. ("a" add, "o" or,
"n" not).
p3 - third pattern to search for in the advanced search
interface. Much like 'p'.
f3 - third field to search within in the advanced search
interface. Much like 'f'.
m3 - third matching type in the advanced search interface.
("a" all of the words, "o" any of the words, "e" exact
phrase, "p" partial phrase, "r" regular expression).
sc - split by collection ("0" no, "1" yes). Governs whether
we want to present the results in a single huge list,
or splitted by collection.
jrec - jump to record (e.g. "234"). Used for navigation
inside the search results. (Note that `jrec' is ignored
in case of `of=id'.)
recid - display record ID (e.g. "20000"). Do not
search/browse but go straight away to the Detailed
record page for the given recID.
recidb - display record ID bis (e.g. "20010"). If greater than
'recid', then display records from recid to recidb.
Useful for example for dumping records from the
database for reformatting.
sysno - display old system SYS number (e.g. ""). If you
migrate to Invenio from another system, and store your
old SYS call numbers, you can use them instead of recid
if you wish so.
id - the same as recid, in case recid is not set. For
backwards compatibility.
idb - the same as recid, in case recidb is not set. For
backwards compatibility.
sysnb - the same as sysno, in case sysno is not set. For
backwards compatibility.
action - action to do. "SEARCH" for searching, "Browse" for
browsing. Default is to search.
d1 - first datetime in full YYYY-mm-dd HH:MM:DD format
(e.g. "1998-08-23 12:34:56"). Useful for search limits
on creation/modification date (see 'dt' argument
below). Note that 'd1' takes precedence over d1y, d1m,
d1d if these are defined.
d1y - first date's year (e.g. "1998"). Useful for search
limits on creation/modification date.
d1m - first date's month (e.g. "08"). Useful for search
limits on creation/modification date.
d1d - first date's day (e.g. "23"). Useful for search
limits on creation/modification date.
d2 - second datetime in full YYYY-mm-dd HH:MM:DD format
(e.g. "1998-09-02 12:34:56"). Useful for search limits
on creation/modification date (see 'dt' argument
below). Note that 'd2' takes precedence over d2y, d2m,
d2d if these are defined.
d2y - second date's year (e.g. "1998"). Useful for search
limits on creation/modification date.
d2m - second date's month (e.g. "09"). Useful for search
limits on creation/modification date.
d2d - second date's day (e.g. "02"). Useful for search
limits on creation/modification date.
dt - first and second date's type (e.g. "c"). Specifies
whether to search in creation dates ("c") or in
modification dates ("m"). When dt is not set and d1*
and d2* are set, the default is "c".
verbose - verbose level (0=min, 9=max). Useful to print some
internal information on the searching process in case
something goes wrong.
ap - alternative patterns (0=no, 1=yes). In case no exact
match is found, the search engine can try alternative
patterns e.g. to replace non-alphanumeric characters by
a boolean query. ap defines if this is wanted.
ln - language of the search interface (e.g. "en"). Useful
for internationalization.
ec - list of external search engines to search as well
(e.g. "SPIRES HEP").
wl - wildcard limit (ex: 100) the wildcard queries will be
limited at 100 results
"""
kwargs = prs_wash_arguments(req=req, cc=cc, c=c, p=p, f=f, rg=rg, sf=sf, so=so, sp=sp, rm=rm, of=of, ot=ot, aas=aas,
p1=p1, f1=f1, m1=m1, op1=op1, p2=p2, f2=f2, m2=m2, op2=op2, p3=p3, f3=f3, m3=m3, sc=sc, jrec=jrec,
recid=recid, recidb=recidb, sysno=sysno, id=id, idb=idb, sysnb=sysnb, action=action, d1=d1,
d1y=d1y, d1m=d1m, d1d=d1d, d2=d2, d2y=d2y, d2m=d2m, d2d=d2d, dt=dt, verbose=verbose, ap=ap, ln=ln, ec=ec,
tab=tab, wl=wl, em=em)
return prs_perform_search(kwargs=kwargs, **kwargs)
def prs_perform_search(kwargs=None, **dummy):
"""Internal call which does the search, it is calling standard Invenio;
Unless you know what you are doing, don't use this call as an API
"""
# separately because we can call it independently
out = prs_wash_arguments_colls(kwargs=kwargs, **kwargs)
if not out:
return out
return prs_search(kwargs=kwargs, **kwargs)
def prs_wash_arguments_colls(kwargs=None, of=None, req=None, cc=None, c=None, sc=None, verbose=None,
aas=None, ln=None, em="", **dummy):
"""
Check and wash collection list argument before we start searching.
If there are troubles, e.g. a collection is not defined, print
warning to the browser.
@return: True if collection list is OK, and various False values
(empty string, empty list) if there was an error.
"""
# raise an exception when trying to print out html from the cli
if of.startswith("h"):
assert req
# for every search engine request asking for an HTML output, we
# first regenerate cache of collection and field I18N names if
# needed; so that later we won't bother checking timestamps for
# I18N names at all:
if of.startswith("h"):
collection_i18nname_cache.recreate_cache_if_needed()
field_i18nname_cache.recreate_cache_if_needed()
try:
(cc, colls_to_display, colls_to_search, hosted_colls, wash_colls_debug) = wash_colls(cc, c, sc, verbose) # which colls to search and to display?
kwargs['colls_to_display'] = colls_to_display
kwargs['colls_to_search'] = colls_to_search
kwargs['hosted_colls'] = hosted_colls
kwargs['wash_colls_debug'] = wash_colls_debug
except InvenioWebSearchUnknownCollectionError, exc:
colname = exc.colname
if of.startswith("h"):
page_start(req, of, cc, aas, ln, getUid(req),
websearch_templates.tmpl_collection_not_found_page_title(colname, ln))
req.write(websearch_templates.tmpl_collection_not_found_page_body(colname, ln))
page_end(req, of, ln, em)
return ''
elif of == "id":
return []
elif of == "intbitset":
return intbitset()
elif of.startswith("x"):
# Print empty, but valid XML
print_records_prologue(req, of)
print_records_epilogue(req, of)
page_end(req, of, ln, em)
return ''
else:
page_end(req, of, ln, em)
return ''
return True
def prs_wash_arguments(req=None, cc=CFG_SITE_NAME, c=None, p="", f="", rg=CFG_WEBSEARCH_DEF_RECORDS_IN_GROUPS,
sf="", so="d", sp="", rm="", of="id", ot="", aas=0,
p1="", f1="", m1="", op1="", p2="", f2="", m2="", op2="", p3="", f3="", m3="",
sc=0, jrec=0, recid=-1, recidb=-1, sysno="", id=-1, idb=-1, sysnb="", action="", d1="",
d1y=0, d1m=0, d1d=0, d2="", d2y=0, d2m=0, d2d=0, dt="", verbose=0, ap=0, ln=CFG_SITE_LANG,
ec=None, tab="", uid=None, wl=0, em="", **dummy):
"""
Sets the (default) values and checks others for the PRS call
"""
# wash output format:
of = wash_output_format(of)
# wash all arguments requiring special care
p = wash_pattern(p)
f = wash_field(f)
p1 = wash_pattern(p1)
f1 = wash_field(f1)
p2 = wash_pattern(p2)
f2 = wash_field(f2)
p3 = wash_pattern(p3)
f3 = wash_field(f3)
(d1y, d1m, d1d, d2y, d2m, d2d) = map(int, (d1y, d1m, d1d, d2y, d2m, d2d))
datetext1, datetext2 = wash_dates(d1, d1y, d1m, d1d, d2, d2y, d2m, d2d)
# wash ranking method:
if not is_method_valid(None, rm):
rm = ""
# backwards compatibility: id, idb, sysnb -> recid, recidb, sysno (if applicable)
if sysnb != "" and sysno == "":
sysno = sysnb
if id > 0 and recid == -1:
recid = id
if idb > 0 and recidb == -1:
recidb = idb
# TODO deduce passed search limiting criterias (if applicable)
pl, pl_in_url = "", "" # no limits by default
if action != "browse" and req and not isinstance(req, cStringIO.OutputType) \
and req.args and not isinstance(req.args, dict): # we do not want to add options while browsing or while calling via command-line
fieldargs = cgi.parse_qs(req.args)
for fieldcode in get_fieldcodes():
if fieldargs.has_key(fieldcode):
for val in fieldargs[fieldcode]:
pl += "+%s:\"%s\" " % (fieldcode, val)
pl_in_url += "&amp;%s=%s" % (urllib.quote(fieldcode), urllib.quote(val))
# deduce recid from sysno argument (if applicable):
if sysno: # ALEPH SYS number was passed, so deduce DB recID for the record:
recid = get_mysql_recid_from_aleph_sysno(sysno)
if recid is None:
recid = 0 # use recid 0 to indicate that this sysno does not exist
# deduce collection we are in (if applicable):
if recid > 0:
referer = None
if req:
referer = req.headers_in.get('Referer')
cc = guess_collection_of_a_record(recid, referer)
# deduce user id (if applicable):
if uid is None:
try:
uid = getUid(req)
except:
uid = 0
_ = gettext_set_language(ln)
kwargs = {'req':req,'cc':cc, 'c':c, 'p':p, 'f':f, 'rg':rg, 'sf':sf, 'so':so, 'sp':sp, 'rm':rm, 'of':of, 'ot':ot, 'aas':aas,
'p1':p1, 'f1':f1, 'm1':m1, 'op1':op1, 'p2':p2, 'f2':f2, 'm2':m2, 'op2':op2, 'p3':p3, 'f3':f3, 'm3':m3, 'sc':sc, 'jrec':jrec,
'recid':recid, 'recidb':recidb, 'sysno':sysno, 'id':id, 'idb':idb, 'sysnb':sysnb, 'action':action, 'd1':d1,
'd1y':d1y, 'd1m':d1m, 'd1d':d1d, 'd2':d2, 'd2y':d2y, 'd2m':d2m, 'd2d':d2d, 'dt':dt, 'verbose':verbose, 'ap':ap, 'ln':ln, 'ec':ec,
'tab':tab, 'wl':wl, 'em': em,
'datetext1': datetext1, 'datetext2': datetext2, 'uid': uid, 'cc':cc, 'pl': pl, 'pl_in_url': pl_in_url, '_': _,
'selected_external_collections_infos':None,
}
kwargs.update(**dummy)
return kwargs
def prs_search(kwargs=None, recid=0, req=None, cc=None, p=None, p1=None, p2=None, p3=None,
f=None, ec=None, verbose=None, ln=None, selected_external_collections_infos=None,
action=None,rm=None, of=None, em=None,
**dummy):
"""
This function write various bits into the req object as the search
proceeds (so that pieces of a page are rendered even before the
search ended)
"""
## 0 - start output
if recid >= 0: # recid can be 0 if deduced from sysno and if such sysno does not exist
output = prs_detailed_record(kwargs=kwargs, **kwargs)
if output is not None:
return output
elif action == "browse":
## 2 - browse needed
of = 'hb'
output = prs_browse(kwargs=kwargs, **kwargs)
if output is not None:
return output
elif rm and p.startswith("recid:"):
## 3-ter - similarity search (or old-style citation search) needed
output = prs_search_similar_records(kwargs=kwargs, **kwargs)
if output is not None:
return output
elif p.startswith("cocitedwith:"): #WAS EXPERIMENTAL
## 3-terter - cited by search needed
output = prs_search_cocitedwith(kwargs=kwargs, **kwargs)
if output is not None:
return output
else:
## 3 - common search needed
output = prs_search_common(kwargs=kwargs, **kwargs)
if output is not None:
return output
# External searches
if of.startswith("h"):
if not of in ['hcs', 'hcs2']:
perform_external_collection_search_with_em(req, cc, [p, p1, p2, p3], f, ec, verbose,
ln, selected_external_collections_infos, em=em)
return page_end(req, of, ln, em)
def prs_detailed_record(kwargs=None, req=None, of=None, cc=None, aas=None, ln=None, uid=None, recid=None, recidb=None,
p=None, verbose=None, tab=None, sf=None, so=None, sp=None, rm=None, ot=None, _=None, em=None,
**dummy):
"""Formats and prints one record"""
## 1 - detailed record display
title, description, keywords = \
websearch_templates.tmpl_record_page_header_content(req, recid, ln)
if req is not None and req.method != 'HEAD':
page_start(req, of, cc, aas, ln, uid, title, description, keywords, recid, tab, em)
# Default format is hb but we are in detailed -> change 'of'
if of == "hb":
of = "hd"
if record_exists(recid):
if recidb <= recid: # sanity check
recidb = recid + 1
if of in ["id", "intbitset"]:
result = [recidx for recidx in range(recid, recidb) if record_exists(recidx)]
if of == "intbitset":
return intbitset(result)
else:
return result
else:
print_records(req, range(recid, recidb), -1, -9999, of, ot, ln, search_pattern=p, verbose=verbose,
tab=tab, sf=sf, so=so, sp=sp, rm=rm, em=em)
if req and of.startswith("h"): # register detailed record page view event
client_ip_address = str(req.remote_ip)
register_page_view_event(recid, uid, client_ip_address)
else: # record does not exist
if of == "id":
return []
elif of == "intbitset":
return intbitset()
elif of.startswith("x"):
# Print empty, but valid XML
print_records_prologue(req, of)
print_records_epilogue(req, of)
elif of.startswith("h"):
if req.method == 'HEAD':
raise apache.SERVER_RETURN, apache.HTTP_NOT_FOUND
else:
write_warning(_("Requested record does not seem to exist."), req=req)
def prs_browse(kwargs=None, req=None, of=None, cc=None, aas=None, ln=None, uid=None, _=None, p=None,
p1=None, p2=None, p3=None, colls_to_display=None, f=None, rg=None, sf=None,
so=None, sp=None, rm=None, ot=None, f1=None, m1=None, op1=None,
f2=None, m2=None, op2=None, f3=None, m3=None, sc=None, pl=None,
d1y=None, d1m=None, d1d=None, d2y=None, d2m=None, d2d=None,
dt=None, jrec=None, ec=None, action=None,
colls_to_search=None, verbose=None, em=None, **dummy):
page_start(req, of, cc, aas, ln, uid, _("Browse"), p=create_page_title_search_pattern_info(p, p1, p2, p3), em=em)
req.write(create_search_box(cc, colls_to_display, p, f, rg, sf, so, sp, rm, of, ot, aas, ln, p1, f1, m1, op1,
p2, f2, m2, op2, p3, f3, m3, sc, pl, d1y, d1m, d1d, d2y, d2m, d2d, dt, jrec, ec, action,
em
))
write_warning(create_exact_author_browse_help_link(p, p1, p2, p3, f, f1, f2, f3,
rm, cc, ln, jrec, rg, aas, action),
req=req)
try:
if aas == 1 or (p1 or p2 or p3):
browse_pattern(req, colls_to_search, p1, f1, rg, ln)
browse_pattern(req, colls_to_search, p2, f2, rg, ln)
browse_pattern(req, colls_to_search, p3, f3, rg, ln)
else:
browse_pattern(req, colls_to_search, p, f, rg, ln)
except:
register_exception(req=req, alert_admin=True)
if of.startswith("h"):
req.write(create_error_box(req, verbose=verbose, ln=ln))
elif of.startswith("x"):
# Print empty, but valid XML
print_records_prologue(req, of)
print_records_epilogue(req, of)
return page_end(req, of, ln, em)
def prs_search_similar_records(kwargs=None, req=None, of=None, cc=None, pl_in_url=None, ln=None, uid=None, _=None, p=None,
p1=None, p2=None, p3=None, colls_to_display=None, f=None, rg=None, sf=None,
so=None, sp=None, rm=None, ot=None, aas=None, f1=None, m1=None, op1=None,
f2=None, m2=None, op2=None, f3=None, m3=None, sc=None, pl=None,
d1y=None, d1m=None, d1d=None, d2y=None, d2m=None, d2d=None,
dt=None, jrec=None, ec=None, action=None, em=None,
verbose=None, **dummy):
if req and req.method != 'HEAD':
page_start(req, of, cc, aas, ln, uid, _("Search Results"), p=create_page_title_search_pattern_info(p, p1, p2, p3),
em=em)
if of.startswith("h"):
req.write(create_search_box(cc, colls_to_display, p, f, rg, sf, so, sp, rm, of, ot, aas, ln, p1, f1, m1, op1,
p2, f2, m2, op2, p3, f3, m3, sc, pl, d1y, d1m, d1d, d2y, d2m, d2d, dt, jrec, ec, action,
em
))
if record_exists(p[6:]) != 1:
# record does not exist
if of.startswith("h"):
if req.method == 'HEAD':
raise apache.SERVER_RETURN, apache.HTTP_NOT_FOUND
else:
write_warning(_("Requested record does not seem to exist."), req=req)
if of == "id":
return []
if of == "intbitset":
return intbitset()
elif of.startswith("x"):
# Print empty, but valid XML
print_records_prologue(req, of)
print_records_epilogue(req, of)
else:
# record well exists, so find similar ones to it
t1 = os.times()[4]
results_similar_recIDs, results_similar_relevances, results_similar_relevances_prologue, results_similar_relevances_epilogue, results_similar_comments = \
rank_records_bibrank(rm, 0, get_collection_reclist(cc), string.split(p), verbose, f, rg, jrec)
if results_similar_recIDs:
t2 = os.times()[4]
cpu_time = t2 - t1
if of.startswith("h"):
req.write(print_search_info(p, f, sf, so, sp, rm, of, ot, cc, len(results_similar_recIDs),
jrec, rg, aas, ln, p1, p2, p3, f1, f2, f3, m1, m2, m3, op1, op2,
sc, pl_in_url,
d1y, d1m, d1d, d2y, d2m, d2d, dt, cpu_time, em=em))
write_warning(results_similar_comments, req=req)
print_records(req, results_similar_recIDs, jrec, rg, of, ot, ln,
results_similar_relevances, results_similar_relevances_prologue,
results_similar_relevances_epilogue,
search_pattern=p, verbose=verbose, sf=sf, so=so, sp=sp, rm=rm, em=em)
elif of == "id":
return results_similar_recIDs
elif of == "intbitset":
return intbitset(results_similar_recIDs)
elif of.startswith("x"):
print_records(req, results_similar_recIDs, jrec, rg, of, ot, ln,
results_similar_relevances, results_similar_relevances_prologue,
results_similar_relevances_epilogue, search_pattern=p, verbose=verbose,
sf=sf, so=so, sp=sp, rm=rm, em=em)
else:
# rank_records failed and returned some error message to display:
if of.startswith("h"):
write_warning(results_similar_relevances_prologue, req=req)
write_warning(results_similar_relevances_epilogue, req=req)
write_warning(results_similar_comments, req=req)
if of == "id":
return []
elif of == "intbitset":
return intbitset()
elif of.startswith("x"):
# Print empty, but valid XML
print_records_prologue(req, of)
print_records_epilogue(req, of)
def prs_search_cocitedwith(kwargs=None, req=None, of=None, cc=None, pl_in_url=None, ln=None, uid=None, _=None, p=None,
p1=None, p2=None, p3=None, colls_to_display=None, f=None, rg=None, sf=None,
so=None, sp=None, rm=None, ot=None, aas=None, f1=None, m1=None, op1=None,
f2=None, m2=None, op2=None, f3=None, m3=None, sc=None, pl=None,
d1y=None, d1m=None, d1d=None, d2y=None, d2m=None, d2d=None,
dt=None, jrec=None, ec=None, action=None,
verbose=None, em=None, **dummy):
page_start(req, of, cc, aas, ln, uid, _("Search Results"), p=create_page_title_search_pattern_info(p, p1, p2, p3),
em=em)
if of.startswith("h"):
req.write(create_search_box(cc, colls_to_display, p, f, rg, sf, so, sp, rm, of, ot, aas, ln, p1, f1, m1, op1,
p2, f2, m2, op2, p3, f3, m3, sc, pl, d1y, d1m, d1d, d2y, d2m, d2d, dt, jrec, ec, action,
em
))
recID = p[12:]
if record_exists(recID) != 1:
# record does not exist
if of.startswith("h"):
write_warning(_("Requested record does not seem to exist."), req=req)
if of == "id":
return []
elif of == "intbitset":
return intbitset()
elif of.startswith("x"):
# Print empty, but valid XML
print_records_prologue(req, of)
print_records_epilogue(req, of)
else:
# record well exists, so find co-cited ones:
t1 = os.times()[4]
results_cocited_recIDs = map(lambda x: x[0], calculate_co_cited_with_list(int(recID)))
if results_cocited_recIDs:
t2 = os.times()[4]
cpu_time = t2 - t1
if of.startswith("h"):
req.write(print_search_info(p, f, sf, so, sp, rm, of, ot, CFG_SITE_NAME, len(results_cocited_recIDs),
jrec, rg, aas, ln, p1, p2, p3, f1, f2, f3, m1, m2, m3, op1, op2,
sc, pl_in_url,
d1y, d1m, d1d, d2y, d2m, d2d, dt, cpu_time, em=em))
print_records(req, results_cocited_recIDs, jrec, rg, of, ot, ln, search_pattern=p, verbose=verbose,
sf=sf, so=so, sp=sp, rm=rm, em=em)
elif of == "id":
return results_cocited_recIDs
elif of == "intbitset":
return intbitset(results_cocited_recIDs)
elif of.startswith("x"):
print_records(req, results_cocited_recIDs, jrec, rg, of, ot, ln, search_pattern=p, verbose=verbose,
sf=sf, so=so, sp=sp, rm=rm, em=em)
else:
# cited rank_records failed and returned some error message to display:
if of.startswith("h"):
write_warning("nothing found", req=req)
if of == "id":
return []
elif of == "intbitset":
return intbitset()
elif of.startswith("x"):
# Print empty, but valid XML
print_records_prologue(req, of)
print_records_epilogue(req, of)
def prs_search_hosted_collections(kwargs=None, req=None, of=None, ln=None, _=None, p=None,
p1=None, p2=None, p3=None, hosted_colls=None, f=None,
colls_to_search=None, hosted_colls_actual_or_potential_results_p=None,
verbose=None, **dummy):
hosted_colls_results = hosted_colls_timeouts = hosted_colls_true_results = None
# search into the hosted collections only if the output format is html or xml
if hosted_colls and (of.startswith("h") or of.startswith("x")) and not p.startswith("recid:"):
# hosted_colls_results : the hosted collections' searches that did not timeout
# hosted_colls_timeouts : the hosted collections' searches that timed out and will be searched later on again
(hosted_colls_results, hosted_colls_timeouts) = calculate_hosted_collections_results(req, [p, p1, p2, p3], f, hosted_colls, verbose, ln, CFG_HOSTED_COLLECTION_TIMEOUT_ANTE_SEARCH)
# successful searches
if hosted_colls_results:
hosted_colls_true_results = []
for result in hosted_colls_results:
# if the number of results is None or 0 (or False) then just do nothing
if result[1] == None or result[1] == False:
# these are the searches the returned no or zero results
if verbose:
write_warning("Hosted collections (perform_search_request): %s returned no results" % result[0][1].name, req=req)
else:
# these are the searches that actually returned results on time
hosted_colls_true_results.append(result)
if verbose:
write_warning("Hosted collections (perform_search_request): %s returned %s results in %s seconds" % (result[0][1].name, result[1], result[2]), req=req)
else:
if verbose:
write_warning("Hosted collections (perform_search_request): there were no hosted collections results to be printed at this time", req=req)
if hosted_colls_timeouts:
if verbose:
for timeout in hosted_colls_timeouts:
write_warning("Hosted collections (perform_search_request): %s timed out and will be searched again later" % timeout[0][1].name, req=req)
# we need to know for later use if there were any hosted collections to be searched even if they weren't in the end
elif hosted_colls and ((not (of.startswith("h") or of.startswith("x"))) or p.startswith("recid:")):
(hosted_colls_results, hosted_colls_timeouts) = (None, None)
else:
if verbose:
write_warning("Hosted collections (perform_search_request): there were no hosted collections to be searched", req=req)
## let's define some useful boolean variables:
# True means there are actual or potential hosted collections results to be printed
kwargs['hosted_colls_actual_or_potential_results_p'] = not (not hosted_colls or not ((hosted_colls_results and hosted_colls_true_results) or hosted_colls_timeouts))
# True means there are hosted collections timeouts to take care of later
# (useful for more accurate printing of results later)
kwargs['hosted_colls_potential_results_p'] = not (not hosted_colls or not hosted_colls_timeouts)
# True means we only have hosted collections to deal with
kwargs['only_hosted_colls_actual_or_potential_results_p'] = not colls_to_search and hosted_colls_actual_or_potential_results_p
kwargs['hosted_colls_results'] = hosted_colls_results
kwargs['hosted_colls_timeouts'] = hosted_colls_timeouts
kwargs['hosted_colls_true_results'] = hosted_colls_true_results
def prs_advanced_search(results_in_any_collection, kwargs=None, req=None, of=None,
cc=None, ln=None, _=None, p=None, p1=None, p2=None, p3=None,
f=None, f1=None, m1=None, op1=None, f2=None, m2=None,
op2=None, f3=None, m3=None, ap=None, ec=None,
selected_external_collections_infos=None, verbose=None,
wl=None, em=None, **dummy):
len_results_p1 = 0
len_results_p2 = 0
len_results_p3 = 0
try:
results_in_any_collection.union_update(search_pattern_parenthesised(req, p1, f1, m1, ap=ap, of=of, verbose=verbose, ln=ln, wl=wl))
len_results_p1 = len(results_in_any_collection)
if len_results_p1 == 0:
if of.startswith("h"):
perform_external_collection_search_with_em(req, cc, [p, p1, p2, p3], f, ec,
verbose, ln, selected_external_collections_infos, em=em)
elif of.startswith("x"):
# Print empty, but valid XML
print_records_prologue(req, of)
print_records_epilogue(req, of)
return page_end(req, of, ln, em)
if p2:
results_tmp = search_pattern_parenthesised(req, p2, f2, m2, ap=ap, of=of, verbose=verbose, ln=ln, wl=wl)
len_results_p2 = len(results_tmp)
if op1 == "a": # add
results_in_any_collection.intersection_update(results_tmp)
elif op1 == "o": # or
results_in_any_collection.union_update(results_tmp)
elif op1 == "n": # not
results_in_any_collection.difference_update(results_tmp)
else:
if of.startswith("h"):
write_warning("Invalid set operation %s." % cgi.escape(op1), "Error", req=req)
if len(results_in_any_collection) == 0:
if of.startswith("h"):
if len_results_p2:
#each individual query returned results, but the boolean operation did not
nearestterms = []
nearest_search_args = req.argd.copy()
if p1:
nearestterms.append((p1, len_results_p1, clean_dictionary(nearest_search_args, ['p2', 'f2', 'm2', 'p3', 'f3', 'm3'])))
nearestterms.append((p2, len_results_p2, clean_dictionary(nearest_search_args, ['p1', 'f1', 'm1', 'p3', 'f3', 'm3'])))
write_warning(websearch_templates.tmpl_search_no_boolean_hits(ln=ln, nearestterms=nearestterms), req=req)
perform_external_collection_search_with_em(req, cc, [p, p1, p2, p3], f, ec, verbose,
ln, selected_external_collections_infos, em=em)
elif of.startswith("x"):
# Print empty, but valid XML
print_records_prologue(req, of)
print_records_epilogue(req, of)
if p3:
results_tmp = search_pattern_parenthesised(req, p3, f3, m3, ap=ap, of=of, verbose=verbose, ln=ln, wl=wl)
len_results_p3 = len(results_tmp)
if op2 == "a": # add
results_in_any_collection.intersection_update(results_tmp)
elif op2 == "o": # or
results_in_any_collection.union_update(results_tmp)
elif op2 == "n": # not
results_in_any_collection.difference_update(results_tmp)
else:
if of.startswith("h"):
write_warning("Invalid set operation %s." % cgi.escape(op2), "Error", req=req)
if len(results_in_any_collection) == 0 and len_results_p3 and of.startswith("h"):
#each individual query returned results but the boolean operation did not
nearestterms = []
nearest_search_args = req.argd.copy()
if p1:
nearestterms.append((p1, len_results_p1, clean_dictionary(nearest_search_args, ['p2', 'f2', 'm2', 'p3', 'f3', 'm3'])))
if p2:
nearestterms.append((p2, len_results_p2, clean_dictionary(nearest_search_args, ['p1', 'f1', 'm1', 'p3', 'f3', 'm3'])))
nearestterms.append((p3, len_results_p3, clean_dictionary(nearest_search_args, ['p1', 'f1', 'm1', 'p2', 'f2', 'm2'])))
write_warning(websearch_templates.tmpl_search_no_boolean_hits(ln=ln, nearestterms=nearestterms), req=req)
except:
register_exception(req=req, alert_admin=True)
if of.startswith("h"):
req.write(create_error_box(req, verbose=verbose, ln=ln))
perform_external_collection_search_with_em(req, cc, [p, p1, p2, p3], f, ec, verbose,
ln, selected_external_collections_infos, em=em)
elif of.startswith("x"):
# Print empty, but valid XML
print_records_prologue(req, of)
print_records_epilogue(req, of)
return page_end(req, of, ln, em)
def prs_simple_search(results_in_any_collection, kwargs=None, req=None, of=None, cc=None, ln=None, p=None, f=None,
p1=None, p2=None, p3=None, ec=None, verbose=None, selected_external_collections_infos=None,
only_hosted_colls_actual_or_potential_results_p=None, query_representation_in_cache=None,
ap=None, hosted_colls_actual_or_potential_results_p=None, wl=None, em=None,
**dummy):
try:
results_in_cache = intbitset().fastload(
search_results_cache.get(query_representation_in_cache))
except:
results_in_cache = None
if results_in_cache is not None:
# query is not in the cache already, so reuse it:
results_in_any_collection.union_update(results_in_cache)
if verbose and of.startswith("h"):
write_warning("Search stage 0: query found in cache, reusing cached results.", req=req)
else:
try:
# added the display_nearest_terms_box parameter to avoid printing out the "Nearest terms in any collection"
# recommendations when there are results only in the hosted collections. Also added the if clause to avoid
# searching in case we know we only have actual or potential hosted collections results
if not only_hosted_colls_actual_or_potential_results_p:
results_in_any_collection.union_update(search_pattern_parenthesised(req, p, f, ap=ap, of=of, verbose=verbose, ln=ln,
display_nearest_terms_box=not hosted_colls_actual_or_potential_results_p,
wl=wl))
except:
register_exception(req=req, alert_admin=True)
if of.startswith("h"):
req.write(create_error_box(req, verbose=verbose, ln=ln))
perform_external_collection_search_with_em(req, cc, [p, p1, p2, p3], f, ec, verbose,
ln, selected_external_collections_infos, em=em)
return page_end(req, of, ln, em)
def prs_intersect_results_with_collrecs(results_final, results_in_any_collection, kwargs=None, colls_to_search=None,
req=None, ap=None, of=None, ln=None,
cc=None, p=None, p1=None, p2=None, p3=None, f=None,
ec=None, verbose=None, selected_external_collections_infos=None, em=None,
**dummy):
display_nearest_terms_box=not kwargs['hosted_colls_actual_or_potential_results_p']
try:
# added the display_nearest_terms_box parameter to avoid printing out the "Nearest terms in any collection"
# recommendations when there results only in the hosted collections. Also added the if clause to avoid
# searching in case we know since the last stage that we have no results in any collection
if len(results_in_any_collection) != 0:
results_final.update(intersect_results_with_collrecs(req, results_in_any_collection, colls_to_search, ap, of,
verbose, ln, display_nearest_terms_box=display_nearest_terms_box))
except:
register_exception(req=req, alert_admin=True)
if of.startswith("h"):
req.write(create_error_box(req, verbose=verbose, ln=ln))
perform_external_collection_search_with_em(req, cc, [p, p1, p2, p3], f, ec, verbose,
ln, selected_external_collections_infos, em=em)
return page_end(req, of, ln, em)
def prs_store_results_in_cache(query_representation_in_cache, results_in_any_collection, req=None, verbose=None, of=None, **dummy):
if CFG_WEBSEARCH_SEARCH_CACHE_SIZE > 0:
search_results_cache.set(query_representation_in_cache,
results_in_any_collection.fastdump(),
timeout=CFG_WEBSEARCH_SEARCH_CACHE_TIMEOUT)
search_results_cache.set(query_representation_in_cache + '::cc',
dummy.get('cc', CFG_SITE_NAME),
timeout=CFG_WEBSEARCH_SEARCH_CACHE_TIMEOUT)
if req:
from flask import request
req = request
search_results_cache.set(query_representation_in_cache + '::p',
req.values.get('p', ''),
timeout=CFG_WEBSEARCH_SEARCH_CACHE_TIMEOUT)
if verbose and of.startswith("h"):
write_warning(req, "Search stage 3: storing query results in cache.", req=req)
def prs_apply_search_limits(results_final, kwargs=None, req=None, of=None, cc=None, ln=None, _=None,
p=None, p1=None, p2=None, p3=None, f=None, pl=None, ap=None, dt=None,
ec=None, selected_external_collections_infos=None,
hosted_colls_actual_or_potential_results_p=None,
datetext1=None, datetext2=None, verbose=None, wl=None, em=None,
**dummy):
if datetext1 != "" and results_final != {}:
if verbose and of.startswith("h"):
write_warning("Search stage 5: applying time etc limits, from %s until %s..." % (datetext1, datetext2), req=req)
try:
results_final = intersect_results_with_hitset(req,
results_final,
search_unit_in_bibrec(datetext1, datetext2, dt),
ap,
aptext= _("No match within your time limits, "
"discarding this condition..."),
of=of)
except:
register_exception(req=req, alert_admin=True)
if of.startswith("h"):
req.write(create_error_box(req, verbose=verbose, ln=ln))
perform_external_collection_search_with_em(req, cc, [p, p1, p2, p3], f, ec, verbose,
ln, selected_external_collections_infos, em=em)
return page_end(req, of, ln, em)
if results_final == {} and not hosted_colls_actual_or_potential_results_p:
if of.startswith("h"):
perform_external_collection_search_with_em(req, cc, [p, p1, p2, p3], f, ec, verbose,
ln, selected_external_collections_infos, em=em)
#if of.startswith("x"):
# # Print empty, but valid XML
# print_records_prologue(req, of)
# print_records_epilogue(req, of)
return page_end(req, of, ln, em)
if pl and results_final != {}:
pl = wash_pattern(pl)
if verbose and of.startswith("h"):
write_warning("Search stage 5: applying search pattern limit %s..." % cgi.escape(pl), req=req)
try:
results_final = intersect_results_with_hitset(req,
results_final,
search_pattern_parenthesised(req, pl, ap=0, ln=ln, wl=wl),
ap,
aptext=_("No match within your search limits, "
"discarding this condition..."),
of=of)
except:
register_exception(req=req, alert_admin=True)
if of.startswith("h"):
req.write(create_error_box(req, verbose=verbose, ln=ln))
perform_external_collection_search_with_em(req, cc, [p, p1, p2, p3], f, ec, verbose,
ln, selected_external_collections_infos, em=em)
return page_end(req, of, ln, em)
if results_final == {} and not hosted_colls_actual_or_potential_results_p:
if of.startswith("h"):
perform_external_collection_search_with_em(req, cc, [p, p1, p2, p3], f, ec, verbose,
ln, selected_external_collections_infos, em=em)
if of.startswith("x"):
# Print empty, but valid XML
print_records_prologue(req, of)
print_records_epilogue(req, of)
return page_end(req, of, ln, em)
def prs_split_into_collections(kwargs=None, results_final=None, colls_to_search=None, hosted_colls_results=None,
cpu_time=0, results_final_nb_total=None, hosted_colls_actual_or_potential_results_p=None,
hosted_colls_true_results=None, hosted_colls_timeouts=None, **dummy):
results_final_nb_total = 0
results_final_nb = {} # will hold number of records found in each collection
# (in simple dict to display overview more easily)
for coll in results_final.keys():
results_final_nb[coll] = len(results_final[coll])
#results_final_nb_total += results_final_nb[coll]
# Now let us calculate results_final_nb_total more precisely,
# in order to get the total number of "distinct" hits across
# searched collections; this is useful because a record might
# have been attributed to more than one primary collection; so
# we have to avoid counting it multiple times. The price to
# pay for this accuracy of results_final_nb_total is somewhat
# increased CPU time.
if results_final.keys() == 1:
# only one collection; no need to union them
results_final_for_all_selected_colls = results_final.values()[0]
results_final_nb_total = results_final_nb.values()[0]
else:
# okay, some work ahead to union hits across collections:
results_final_for_all_selected_colls = intbitset()
for coll in results_final.keys():
results_final_for_all_selected_colls.union_update(results_final[coll])
results_final_nb_total = len(results_final_for_all_selected_colls)
#if hosted_colls and (of.startswith("h") or of.startswith("x")):
if hosted_colls_actual_or_potential_results_p:
if hosted_colls_results:
for result in hosted_colls_true_results:
colls_to_search.append(result[0][1].name)
results_final_nb[result[0][1].name] = result[1]
results_final_nb_total += result[1]
cpu_time += result[2]
if hosted_colls_timeouts:
for timeout in hosted_colls_timeouts:
colls_to_search.append(timeout[1].name)
# use -963 as a special number to identify the collections that timed out
results_final_nb[timeout[1].name] = -963
kwargs['results_final_nb'] = results_final_nb
kwargs['results_final_nb_total'] = results_final_nb_total
kwargs['results_final_for_all_selected_colls'] = results_final_for_all_selected_colls
kwargs['cpu_time'] = cpu_time #rca TODO: check where the cpu_time is used, this line was missing
return (results_final_nb, results_final_nb_total, results_final_for_all_selected_colls)
def prs_summarize_records(kwargs=None, req=None, p=None, f=None, aas=None,
p1=None, p2=None, p3=None, f1=None, f2=None, f3=None, op1=None, op2=None,
ln=None, results_final_for_all_selected_colls=None, of='hcs', **dummy):
# feed the current search to be summarized:
from invenio.search_engine_summarizer import summarize_records
search_p = p
search_f = f
if not p and (aas == 1 or p1 or p2 or p3):
op_d = {'n': ' and not ', 'a': ' and ', 'o': ' or ', '': ''}
triples = ziplist([f1, f2, f3], [p1, p2, p3], [op1, op2, ''])
triples_len = len(triples)
for i in range(triples_len):
fi, pi, oi = triples[i] # e.g.:
if i < triples_len-1 and not triples[i+1][1]: # if p2 empty
triples[i+1][0] = '' # f2 must be too
oi = '' # and o1
if ' ' in pi:
pi = '"'+pi+'"'
if fi:
fi = fi + ':'
search_p += fi + pi + op_d[oi]
search_f = ''
summarize_records(results_final_for_all_selected_colls, of, ln, search_p, search_f, req)
def prs_print_records(kwargs=None, results_final=None, req=None, of=None, cc=None, pl_in_url=None,
ln=None, _=None, p=None, p1=None, p2=None, p3=None, f=None, rg=None, sf=None,
so=None, sp=None, rm=None, ot=None, aas=None, f1=None, m1=None, op1=None,
f2=None, m2=None, op2=None, f3=None, m3=None, sc=None, d1y=None, d1m=None,
d1d=None, d2y=None, d2m=None, d2d=None, dt=None, jrec=None, colls_to_search=None,
hosted_colls_actual_or_potential_results_p=None, hosted_colls_results=None,
hosted_colls_true_results=None, hosted_colls_timeouts=None, results_final_nb=None,
cpu_time=None, verbose=None, em=None, **dummy):
if len(colls_to_search)>1:
cpu_time = -1 # we do not want to have search time printed on each collection
print_records_prologue(req, of, cc=cc)
results_final_colls = []
wlqh_results_overlimit = 0
for coll in colls_to_search:
if results_final.has_key(coll) and len(results_final[coll]):
if of.startswith("h"):
req.write(print_search_info(p, f, sf, so, sp, rm, of, ot, coll, results_final_nb[coll],
jrec, rg, aas, ln, p1, p2, p3, f1, f2, f3, m1, m2, m3, op1, op2,
sc, pl_in_url,
d1y, d1m, d1d, d2y, d2m, d2d, dt, cpu_time, em=em))
results_final_recIDs = list(results_final[coll])
results_final_relevances = []
results_final_relevances_prologue = ""
results_final_relevances_epilogue = ""
if rm: # do we have to rank?
results_final_recIDs_ranked, results_final_relevances, results_final_relevances_prologue, results_final_relevances_epilogue, results_final_comments = \
rank_records(req, rm, 0, results_final[coll],
string.split(p) + string.split(p1) +
string.split(p2) + string.split(p3), verbose, so, of, ln, rg, jrec, kwargs['f'])
if of.startswith("h"):
write_warning(results_final_comments, req=req)
if results_final_recIDs_ranked:
results_final_recIDs = results_final_recIDs_ranked
else:
# rank_records failed and returned some error message to display:
write_warning(results_final_relevances_prologue, req=req)
write_warning(results_final_relevances_epilogue, req=req)
elif sf or (CFG_BIBSORT_BUCKETS and sorting_methods): # do we have to sort?
results_final_recIDs = sort_records(req, results_final_recIDs, sf, so, sp, verbose, of, ln, rg, jrec)
if len(results_final_recIDs) < CFG_WEBSEARCH_PREV_NEXT_HIT_LIMIT:
results_final_colls.append(results_final_recIDs)
else:
wlqh_results_overlimit = 1
print_records(req, results_final_recIDs, jrec, rg, of, ot, ln,
results_final_relevances,
results_final_relevances_prologue,
results_final_relevances_epilogue,
search_pattern=p,
print_records_prologue_p=False,
print_records_epilogue_p=False,
verbose=verbose,
sf=sf,
so=so,
sp=sp,
rm=rm,
em=em)
if of.startswith("h"):
req.write(print_search_info(p, f, sf, so, sp, rm, of, ot, coll, results_final_nb[coll],
jrec, rg, aas, ln, p1, p2, p3, f1, f2, f3, m1, m2, m3, op1, op2,
sc, pl_in_url,
d1y, d1m, d1d, d2y, d2m, d2d, dt, cpu_time, 1, em=em))
if req and not isinstance(req, cStringIO.OutputType):
# store the last search results page
session_param_set(req, 'websearch-last-query', req.unparsed_uri)
if wlqh_results_overlimit:
results_final_colls = None
# store list of results if user wants to display hits
# in a single list, or store list of collections of records
# if user displays hits split by collections:
session_param_set(req, 'websearch-last-query-hits', results_final_colls)
#if hosted_colls and (of.startswith("h") or of.startswith("x")):
if hosted_colls_actual_or_potential_results_p:
if hosted_colls_results:
# TODO: add a verbose message here
for result in hosted_colls_true_results:
if of.startswith("h"):
req.write(print_hosted_search_info(p, f, sf, so, sp, rm, of, ot, result[0][1].name, results_final_nb[result[0][1].name],
jrec, rg, aas, ln, p1, p2, p3, f1, f2, f3, m1, m2, m3, op1, op2,
sc, pl_in_url,
d1y, d1m, d1d, d2y, d2m, d2d, dt, cpu_time, em=em))
req.write(print_hosted_results(url_and_engine=result[0], ln=ln, of=of, req=req, limit=rg, em=em))
if of.startswith("h"):
req.write(print_hosted_search_info(p, f, sf, so, sp, rm, of, ot, result[0][1].name, results_final_nb[result[0][1].name],
jrec, rg, aas, ln, p1, p2, p3, f1, f2, f3, m1, m2, m3, op1, op2,
sc, pl_in_url,
d1y, d1m, d1d, d2y, d2m, d2d, dt, cpu_time, 1))
if hosted_colls_timeouts:
# TODO: add a verbose message here
# TODO: check if verbose messages still work when dealing with (re)calculations of timeouts
(hosted_colls_timeouts_results, hosted_colls_timeouts_timeouts) = do_calculate_hosted_collections_results(req, ln, None, verbose, None, hosted_colls_timeouts, CFG_HOSTED_COLLECTION_TIMEOUT_POST_SEARCH)
if hosted_colls_timeouts_results:
for result in hosted_colls_timeouts_results:
if result[1] == None or result[1] == False:
## these are the searches the returned no or zero results
## also print a nearest terms box, in case this is the only
## collection being searched and it returns no results?
if of.startswith("h"):
req.write(print_hosted_search_info(p, f, sf, so, sp, rm, of, ot, result[0][1].name, -963,
jrec, rg, aas, ln, p1, p2, p3, f1, f2, f3, m1, m2, m3, op1, op2,
sc, pl_in_url,
d1y, d1m, d1d, d2y, d2m, d2d, dt, cpu_time))
req.write(print_hosted_results(url_and_engine=result[0], ln=ln, of=of, req=req, no_records_found=True, limit=rg, em=em))
req.write(print_hosted_search_info(p, f, sf, so, sp, rm, of, ot, result[0][1].name, -963,
jrec, rg, aas, ln, p1, p2, p3, f1, f2, f3, m1, m2, m3, op1, op2,
sc, pl_in_url,
d1y, d1m, d1d, d2y, d2m, d2d, dt, cpu_time, 1))
else:
# these are the searches that actually returned results on time
if of.startswith("h"):
req.write(print_hosted_search_info(p, f, sf, so, sp, rm, of, ot, result[0][1].name, result[1],
jrec, rg, aas, ln, p1, p2, p3, f1, f2, f3, m1, m2, m3, op1, op2,
sc, pl_in_url,
d1y, d1m, d1d, d2y, d2m, d2d, dt, cpu_time))
req.write(print_hosted_results(url_and_engine=result[0], ln=ln, of=of, req=req, limit=rg, em=em))
if of.startswith("h"):
req.write(print_hosted_search_info(p, f, sf, so, sp, rm, of, ot, result[0][1].name, result[1],
jrec, rg, aas, ln, p1, p2, p3, f1, f2, f3, m1, m2, m3, op1, op2,
sc, pl_in_url,
d1y, d1m, d1d, d2y, d2m, d2d, dt, cpu_time, 1))
if hosted_colls_timeouts_timeouts:
for timeout in hosted_colls_timeouts_timeouts:
if of.startswith("h"):
req.write(print_hosted_search_info(p, f, sf, so, sp, rm, of, ot, timeout[1].name, -963,
jrec, rg, aas, ln, p1, p2, p3, f1, f2, f3, m1, m2, m3, op1, op2,
sc, pl_in_url,
d1y, d1m, d1d, d2y, d2m, d2d, dt, cpu_time))
req.write(print_hosted_results(url_and_engine=timeout[0], ln=ln, of=of, req=req, search_timed_out=True, limit=rg, em=em))
req.write(print_hosted_search_info(p, f, sf, so, sp, rm, of, ot, timeout[1].name, -963,
jrec, rg, aas, ln, p1, p2, p3, f1, f2, f3, m1, m2, m3, op1, op2,
sc, pl_in_url,
d1y, d1m, d1d, d2y, d2m, d2d, dt, cpu_time, 1))
print_records_epilogue(req, of)
if f == "author" and of.startswith("h"):
req.write(create_similarly_named_authors_link_box(p, ln))
def prs_log_query(kwargs=None, req=None, uid=None, of=None, ln=None, p=None, f=None,
colls_to_search=None, results_final_nb_total=None, em=None, **dummy):
# FIXME move query logging to signal receiver
# log query:
try:
from flask.ext.login import current_user
if req:
from flask import request
req = request
id_query = log_query(req.host,
'&'.join(map(lambda (k,v): k+'='+v, request.values.iteritems(multi=True))),
uid)
#id_query = log_query(req.remote_host, req.args, uid)
#of = request.values.get('of', 'hb')
if of.startswith("h") and id_query and (em == '' or EM_REPOSITORY["alert"] in em):
if not of in ['hcs', 'hcs2']:
# display alert/RSS teaser for non-summary formats:
display_email_alert_part = True
if current_user:
if current_user['email'] == 'guest':
if CFG_ACCESS_CONTROL_LEVEL_ACCOUNTS > 4:
display_email_alert_part = False
else:
if not current_user['precached_usealerts']:
display_email_alert_part = False
from flask import flash
flash(websearch_templates.tmpl_alert_rss_teaser_box_for_query(id_query, \
ln=ln, display_email_alert_part=display_email_alert_part), 'search-results-after')
except:
# do not log query if req is None (used by CLI interface)
pass
log_query_info("ss", p, f, colls_to_search, results_final_nb_total)
def prs_search_common(kwargs=None, req=None, of=None, cc=None, ln=None, uid=None, _=None, p=None,
p1=None, p2=None, p3=None, colls_to_display=None, f=None, rg=None, sf=None,
so=None, sp=None, rm=None, ot=None, aas=None, f1=None, m1=None, op1=None,
f2=None, m2=None, op2=None, f3=None, m3=None, sc=None, pl=None,
d1y=None, d1m=None, d1d=None, d2y=None, d2m=None, d2d=None,
dt=None, jrec=None, ec=None, action=None, colls_to_search=None, wash_colls_debug=None,
verbose=None, wl=None, em=None, **dummy):
query_representation_in_cache = get_search_results_cache_key(**kwargs)
page_start(req, of, cc, aas, ln, uid, p=create_page_title_search_pattern_info(p, p1, p2, p3), em=em)
if of.startswith("h") and verbose and wash_colls_debug:
write_warning("wash_colls debugging info : %s" % wash_colls_debug, req=req)
prs_search_hosted_collections(kwargs=kwargs, **kwargs)
if of.startswith("h"):
req.write(create_search_box(cc, colls_to_display, p, f, rg, sf, so, sp, rm, of, ot, aas, ln, p1, f1, m1, op1,
p2, f2, m2, op2, p3, f3, m3, sc, pl, d1y, d1m, d1d, d2y, d2m, d2d, dt, jrec, ec, action,
em
))
t1 = os.times()[4]
results_in_any_collection = intbitset()
if aas == 1 or (p1 or p2 or p3):
## 3A - advanced search
output = prs_advanced_search(results_in_any_collection, kwargs=kwargs, **kwargs)
if output is not None:
return output
else:
## 3B - simple search
output = prs_simple_search(results_in_any_collection, kwargs=kwargs, **kwargs)
if output is not None:
return output
if len(results_in_any_collection) == 0 and not kwargs['hosted_colls_actual_or_potential_results_p']:
if of.startswith("x"):
# Print empty, but valid XML
print_records_prologue(req, of)
print_records_epilogue(req, of)
return None
# store this search query results into search results cache if needed:
prs_store_results_in_cache(query_representation_in_cache, results_in_any_collection, **kwargs)
# search stage 4 and 5: intersection with collection universe and sorting/limiting
try:
output = prs_intersect_with_colls_and_apply_search_limits(results_in_any_collection, kwargs=kwargs, **kwargs)
if output is not None:
return output
except Exception: # no results to display
return None
t2 = os.times()[4]
cpu_time = t2 - t1
kwargs['cpu_time'] = cpu_time
## search stage 6: display results:
return prs_display_results(kwargs=kwargs, **kwargs)
def prs_intersect_with_colls_and_apply_search_limits(results_in_any_collection,
kwargs=None, req=None, of=None, ln=None, _=None,
p=None, p1=None, p2=None, p3=None, f=None, cc=None, ec=None,
verbose=None, em=None, **dummy):
# search stage 4: intersection with collection universe:
results_final = {}
output = prs_intersect_results_with_collrecs(results_final, results_in_any_collection, kwargs, **kwargs)
if output is not None:
return output
# another external search if we still don't have something
if results_final == {} and not kwargs['hosted_colls_actual_or_potential_results_p']:
if of.startswith("x"):
# Print empty, but valid XML
print_records_prologue(req, of)
print_records_epilogue(req, of)
kwargs['results_final'] = results_final
raise Exception
# search stage 5: apply search option limits and restrictions:
output = prs_apply_search_limits(results_final, kwargs=kwargs, **kwargs)
kwargs['results_final'] = results_final
if output is not None:
return output
def prs_display_results(kwargs=None, results_final=None, req=None, of=None, sf=None,
so=None, sp=None, verbose=None, p=None, p1=None, p2=None, p3=None,
cc=None, ln=None, _=None, ec=None, colls_to_search=None, rm=None, cpu_time=None,
f=None, em=None, **dummy
):
## search stage 6: display results:
# split result set into collections
(results_final_nb, results_final_nb_total, results_final_for_all_selected_colls) = prs_split_into_collections(kwargs=kwargs, **kwargs)
# we continue past this point only if there is a hosted collection that has timed out and might offer potential results
if results_final_nb_total == 0 and not kwargs['hosted_colls_potential_results_p']:
if of.startswith("h"):
write_warning("No match found, please enter different search terms.", req=req)
elif of.startswith("x"):
# Print empty, but valid XML
print_records_prologue(req, of)
print_records_epilogue(req, of)
else:
prs_log_query(kwargs=kwargs, **kwargs)
# yes, some hits found: good!
# collection list may have changed due to not-exact-match-found policy so check it out:
for coll in results_final.keys():
if coll not in colls_to_search:
colls_to_search.append(coll)
# print results overview:
if of == "intbitset":
#return the result as an intbitset
return results_final_for_all_selected_colls
elif of == "id":
# we have been asked to return list of recIDs
recIDs = list(results_final_for_all_selected_colls)
if rm: # do we have to rank?
results_final_for_all_colls_rank_records_output = rank_records(req, rm, 0, results_final_for_all_selected_colls,
string.split(p) + string.split(p1) +
string.split(p2) + string.split(p3), verbose, so, of, ln, kwargs['rg'], kwargs['jrec'], kwargs['f'])
if results_final_for_all_colls_rank_records_output[0]:
recIDs = results_final_for_all_colls_rank_records_output[0]
elif sf or (CFG_BIBSORT_BUCKETS and sorting_methods): # do we have to sort?
recIDs = sort_records(req, recIDs, sf, so, sp, verbose, of, ln)
return recIDs
elif of.startswith("h"):
if of not in ['hcs', 'hcs2']:
# added the hosted_colls_potential_results_p parameter to help print out the overview more accurately
req.write(print_results_overview(colls_to_search, results_final_nb_total, results_final_nb, cpu_time,
ln, ec, hosted_colls_potential_results_p=kwargs['hosted_colls_potential_results_p'], em=em))
kwargs['selected_external_collections_infos'] = print_external_results_overview(req, cc, [p, p1, p2, p3],
f, ec, verbose, ln, print_overview=em == "" or EM_REPOSITORY["overview"] in em)
# print number of hits found for XML outputs:
if of.startswith("x") or of == 'mobb':
req.write("<!-- Search-Engine-Total-Number-Of-Results: %s -->\n" % kwargs['results_final_nb_total'])
# print records:
if of in ['hcs', 'hcs2']:
prs_summarize_records(kwargs=kwargs, **kwargs)
else:
prs_print_records(kwargs=kwargs, **kwargs)
# this is a copy of the prs_display_results with output parts removed, needed for external modules
def prs_rank_results(kwargs=None, results_final=None, req=None, colls_to_search=None,
sf=None, so=None, sp=None, of=None, rm=None, p=None, p1=None, p2=None, p3=None,
verbose=None, **dummy
):
## search stage 6: display results:
# split result set into collections
(results_final_nb, results_final_nb_total, results_final_for_all_selected_colls) = prs_split_into_collections(kwargs=kwargs, **kwargs)
# yes, some hits found: good!
# collection list may have changed due to not-exact-match-found policy so check it out:
for coll in results_final.keys():
if coll not in colls_to_search:
colls_to_search.append(coll)
# we have been asked to return list of recIDs
recIDs = list(results_final_for_all_selected_colls)
if rm: # do we have to rank?
results_final_for_all_colls_rank_records_output = rank_records(req, rm, 0, results_final_for_all_selected_colls,
string.split(p) + string.split(p1) +
string.split(p2) + string.split(p3), verbose, so, of, field=kwargs['f'])
if results_final_for_all_colls_rank_records_output[0]:
recIDs = results_final_for_all_colls_rank_records_output[0]
elif sf or (CFG_BIBSORT_BUCKETS and sorting_methods): # do we have to sort?
recIDs = sort_records(req, recIDs, sf, so, sp, verbose, of)
return recIDs
def perform_request_cache(req, action="show"):
"""Manipulates the search engine cache."""
req.content_type = "text/html"
req.send_http_header()
req.write("<html>")
out = ""
out += "<h1>Search Cache</h1>"
req.write(out)
# show collection reclist cache:
out = "<h3>Collection reclist cache</h3>"
out += "- collection table last updated: %s" % get_table_update_time('collection')
out += "<br />- reclist cache timestamp: %s" % collection_reclist_cache.timestamp
out += "<br />- reclist cache contents:"
out += "<blockquote>"
for coll in collection_reclist_cache.cache.keys():
if collection_reclist_cache.cache[coll]:
out += "%s (%d)<br />" % (coll, len(collection_reclist_cache.cache[coll]))
out += "</blockquote>"
req.write(out)
# show field i18nname cache:
out = "<h3>Field I18N names cache</h3>"
out += "- fieldname table last updated: %s" % get_table_update_time('fieldname')
out += "<br />- i18nname cache timestamp: %s" % field_i18nname_cache.timestamp
out += "<br />- i18nname cache contents:"
out += "<blockquote>"
for field in field_i18nname_cache.cache.keys():
for ln in field_i18nname_cache.cache[field].keys():
out += "%s, %s = %s<br />" % (field, ln, field_i18nname_cache.cache[field][ln])
out += "</blockquote>"
req.write(out)
# show collection i18nname cache:
out = "<h3>Collection I18N names cache</h3>"
out += "- collectionname table last updated: %s" % get_table_update_time('collectionname')
out += "<br />- i18nname cache timestamp: %s" % collection_i18nname_cache.timestamp
out += "<br />- i18nname cache contents:"
out += "<blockquote>"
for coll in collection_i18nname_cache.cache.keys():
for ln in collection_i18nname_cache.cache[coll].keys():
out += "%s, %s = %s<br />" % (coll, ln, collection_i18nname_cache.cache[coll][ln])
out += "</blockquote>"
req.write(out)
req.write("</html>")
return "\n"
def perform_request_log(req, date=""):
"""Display search log information for given date."""
req.content_type = "text/html"
req.send_http_header()
req.write("<html>")
req.write("<h1>Search Log</h1>")
if date: # case A: display stats for a day
yyyymmdd = string.atoi(date)
req.write("<p><big><strong>Date: %d</strong></big><p>" % yyyymmdd)
req.write("""<table border="1">""")
req.write("<tr><td><strong>%s</strong></td><td><strong>%s</strong></td><td><strong>%s</strong></td><td><strong>%s</strong></td><td><strong>%s</strong></td><td><strong>%s</strong></td></tr>" % ("No.", "Time", "Pattern", "Field", "Collection", "Number of Hits"))
# read file:
p = os.popen("grep ^%d %s/search.log" % (yyyymmdd, CFG_LOGDIR), 'r')
lines = p.readlines()
p.close()
# process lines:
i = 0
for line in lines:
try:
datetime, dummy_aas, p, f, c, nbhits = string.split(line,"#")
i += 1
req.write("<tr><td align=\"right\">#%d</td><td>%s:%s:%s</td><td>%s</td><td>%s</td><td>%s</td><td>%s</td></tr>" \
% (i, datetime[8:10], datetime[10:12], datetime[12:], p, f, c, nbhits))
except:
pass # ignore eventual wrong log lines
req.write("</table>")
else: # case B: display summary stats per day
yyyymm01 = int(time.strftime("%Y%m01", time.localtime()))
yyyymmdd = int(time.strftime("%Y%m%d", time.localtime()))
req.write("""<table border="1">""")
req.write("<tr><td><strong>%s</strong></td><td><strong>%s</strong></tr>" % ("Day", "Number of Queries"))
for day in range(yyyymm01, yyyymmdd + 1):
p = os.popen("grep -c ^%d %s/search.log" % (day, CFG_LOGDIR), 'r')
for line in p.readlines():
req.write("""<tr><td>%s</td><td align="right"><a href="%s/search/log?date=%d">%s</a></td></tr>""" % \
(day, CFG_SITE_URL, day, line))
p.close()
req.write("</table>")
req.write("</html>")
return "\n"
def get_all_field_values(tag):
"""
Return all existing values stored for a given tag.
@param tag: the full tag, e.g. 909C0b
@type tag: string
@return: the list of values
@rtype: list of strings
"""
table = 'bib%02dx' % int(tag[:2])
return [row[0] for row in run_sql("SELECT DISTINCT(value) FROM %s WHERE tag=%%s" % table, (tag, ))]
def get_most_popular_field_values(recids, tags, exclude_values=None, count_repetitive_values=True, split_by=0):
"""
Analyze RECIDS and look for TAGS and return most popular values
and the frequency with which they occur sorted according to
descending frequency.
If a value is found in EXCLUDE_VALUES, then do not count it.
If COUNT_REPETITIVE_VALUES is True, then we count every occurrence
of value in the tags. If False, then we count the value only once
regardless of the number of times it may appear in a record.
(But, if the same value occurs in another record, we count it, of
course.)
@return: list of tuples containing tag and its frequency
Example:
>>> get_most_popular_field_values(range(11,20), '980__a')
[('PREPRINT', 10), ('THESIS', 7), ...]
>>> get_most_popular_field_values(range(11,20), ('100__a', '700__a'))
[('Ellis, J', 10), ('Ellis, N', 7), ...]
>>> get_most_popular_field_values(range(11,20), ('100__a', '700__a'), ('Ellis, J'))
[('Ellis, N', 7), ...]
"""
def _get_most_popular_field_values_helper_sorter(val1, val2):
"""Compare VAL1 and VAL2 according to, firstly, frequency, then
secondly, alphabetically."""
compared_via_frequencies = cmp(valuefreqdict[val2],
valuefreqdict[val1])
if compared_via_frequencies == 0:
return cmp(val1.lower(), val2.lower())
else:
return compared_via_frequencies
valuefreqdict = {}
## sanity check:
if not exclude_values:
exclude_values = []
if isinstance(tags, str):
tags = (tags,)
## find values to count:
vals_to_count = []
displaytmp = {}
if count_repetitive_values:
# counting technique A: can look up many records at once: (very fast)
for tag in tags:
vals_to_count.extend(get_fieldvalues(recids, tag, sort=False,
split_by=split_by))
else:
# counting technique B: must count record-by-record: (slow)
for recid in recids:
vals_in_rec = []
for tag in tags:
for val in get_fieldvalues(recid, tag, False):
vals_in_rec.append(val)
# do not count repetitive values within this record
# (even across various tags, so need to unify again):
dtmp = {}
for val in vals_in_rec:
dtmp[val.lower()] = 1
displaytmp[val.lower()] = val
vals_in_rec = dtmp.keys()
vals_to_count.extend(vals_in_rec)
## are we to exclude some of found values?
for val in vals_to_count:
if val not in exclude_values:
if val in valuefreqdict:
valuefreqdict[val] += 1
else:
valuefreqdict[val] = 1
## sort by descending frequency of values:
if not CFG_NUMPY_IMPORTABLE:
## original version
out = []
vals = valuefreqdict.keys()
vals.sort(_get_most_popular_field_values_helper_sorter)
for val in vals:
tmpdisplv = ''
if val in displaytmp:
tmpdisplv = displaytmp[val]
else:
tmpdisplv = val
out.append((tmpdisplv, valuefreqdict[val]))
return out
else:
f = [] # frequencies
n = [] # original names
ln = [] # lowercased names
## build lists within one iteration
for (val, freq) in valuefreqdict.iteritems():
f.append(-1 * freq)
if val in displaytmp:
n.append(displaytmp[val])
else:
n.append(val)
ln.append(val.lower())
## sort by frequency (desc) and then by lowercased name.
return [(n[i], -1 * f[i]) for i in numpy.lexsort([ln, f])]
def profile(p="", f="", c=CFG_SITE_NAME):
"""Profile search time."""
import profile
import pstats
profile.run("perform_request_search(p='%s',f='%s', c='%s')" % (p, f, c), "perform_request_search_profile")
p = pstats.Stats("perform_request_search_profile")
p.strip_dirs().sort_stats("cumulative").print_stats()
return 0
def perform_external_collection_search_with_em(req, current_collection, pattern_list, field,
external_collection, verbosity_level=0, lang=CFG_SITE_LANG,
selected_external_collections_infos=None, em=""):
perform_external_collection_search(req, current_collection, pattern_list, field, external_collection,
verbosity_level, lang, selected_external_collections_infos,
print_overview=em == "" or EM_REPOSITORY["overview"] in em,
print_search_info=em == "" or EM_REPOSITORY["search_info"] in em,
print_see_also_box=em == "" or EM_REPOSITORY["see_also_box"] in em,
print_body=em == "" or EM_REPOSITORY["body"] in em)
diff --git a/invenio/legacy/webalert/alert_engine.py b/invenio/legacy/webalert/alert_engine.py
index ad963c4d1..73c68f69a 100644
--- a/invenio/legacy/webalert/alert_engine.py
+++ b/invenio/legacy/webalert/alert_engine.py
@@ -1,581 +1,581 @@
# -*- coding: utf-8 -*-
##
## This file is part of Invenio.
## Copyright (C) 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011 CERN.
##
## Invenio is free software; you can redistribute it and/or
## modify it under the terms of the GNU General Public License as
## published by the Free Software Foundation; either version 2 of the
## License, or (at your option) any later version.
##
## Invenio is distributed in the hope that it will be useful, but
## WITHOUT ANY WARRANTY; without even the implied warranty of
## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
## General Public License for more details.
##
## You should have received a copy of the GNU General Public License
## along with Invenio; if not, write to the Free Software Foundation, Inc.,
## 59 Temple Place, Suite 330, Boston, MA 02111-1307, USA.
"""Alert engine implementation."""
## rest of the Python code goes below
__revision__ = "$Id$"
from cgi import parse_qs
from re import search, sub
from time import strftime
import datetime
from invenio.config import \
CFG_LOGDIR, \
CFG_SITE_ADMIN_EMAIL, \
CFG_SITE_URL, \
CFG_WEBALERT_SEND_EMAIL_NUMBER_OF_TRIES, \
CFG_WEBALERT_SEND_EMAIL_SLEEPTIME_BETWEEN_TRIES, \
CFG_SITE_NAME, \
CFG_WEBALERT_MAX_NUM_OF_RECORDS_IN_ALERT_EMAIL
from invenio.webbasket_dblayer import get_basket_owner_id, add_to_basket
from invenio.webbasket import format_external_records
from invenio.search_engine import perform_request_search, wash_colls, \
get_coll_sons, is_hosted_collection, get_coll_normalised_name, \
check_user_can_view_record
from invenio.webinterface_handler import wash_urlargd
from invenio.dbquery import run_sql
from invenio.webuser import get_email, collect_user_info
from invenio.ext.email import send_email
-from invenio.errorlib import register_exception
+from invenio.ext.logging import register_exception
from invenio.alert_engine_config import CFG_WEBALERT_DEBUG_LEVEL
from invenio.websearch_external_collections_config import \
CFG_EXTERNAL_COLLECTION_TIMEOUT, \
CFG_EXTERNAL_COLLECTION_MAXRESULTS_ALERTS
from invenio.websearch_external_collections_getter import HTTPAsyncPageGetter, async_download
from invenio.websearch_external_collections_utils import get_collection_id
import invenio.template
websearch_templates = invenio.template.load('websearch')
webalert_templates = invenio.template.load('webalert')
def update_date_lastrun(alert):
"""Update the last time this alert was ran in the database."""
return run_sql('update user_query_basket set date_lastrun=%s where id_user=%s and id_query=%s and id_basket=%s;', (strftime("%Y-%m-%d"), alert[0], alert[1], alert[2],))
def get_alert_queries(frequency):
"""Return all the queries for the given frequency."""
return run_sql('select distinct id, urlargs from query q, user_query_basket uqb where q.id=uqb.id_query and uqb.frequency=%s and uqb.date_lastrun <= now();', (frequency,))
def get_alert_queries_for_user(uid):
"""Returns all the queries for the given user id."""
return run_sql('select distinct id, urlargs, uqb.frequency from query q, user_query_basket uqb where q.id=uqb.id_query and uqb.id_user=%s and uqb.date_lastrun <= now();', (uid,))
def get_alerts(query, frequency):
"""Returns a dictionary of all the records found for a specific query and frequency along with other informationm"""
r = run_sql('select id_user, id_query, id_basket, frequency, date_lastrun, alert_name, notification, alert_desc, alert_recipient from user_query_basket where id_query=%s and frequency=%s;', (query['id_query'], frequency,))
return {'alerts': r, 'records': query['records'], 'argstr': query['argstr'], 'date_from': query['date_from'], 'date_until': query['date_until']}
def add_records_to_basket(records, basket_id):
"""Add the given records to the given baskets"""
index = 0
owner_uid = get_basket_owner_id(basket_id)
# We check that the owner of the recipient basket would be allowed
# to view the records. This does not apply to external records
# (hosted collections).
user_info = collect_user_info(owner_uid)
filtered_records = ([], records[1])
filtered_out_recids = [] # only set in debug mode
for recid in records[0]:
(auth_code, auth_msg) = check_user_can_view_record(user_info, recid)
if auth_code == 0:
filtered_records[0].append(recid)
elif CFG_WEBALERT_DEBUG_LEVEL > 2:
# only keep track of this in DEBUG mode
filtered_out_recids.append(recid)
nrec = len(filtered_records[0])
index += nrec
if index > CFG_WEBALERT_MAX_NUM_OF_RECORDS_IN_ALERT_EMAIL:
index = CFG_WEBALERT_MAX_NUM_OF_RECORDS_IN_ALERT_EMAIL
if nrec > 0:
nrec_to_add = nrec < index and nrec or index
if CFG_WEBALERT_DEBUG_LEVEL > 0:
print "-> adding %i records into basket %s: %s" % (nrec_to_add, basket_id, filtered_records[0][:nrec_to_add])
if nrec > nrec_to_add:
print "-> not added %i records into basket %s: %s due to maximum limit restrictions." % (nrec - nrec_to_add, basket_id, filtered_records[0][nrec_to_add:])
try:
if CFG_WEBALERT_DEBUG_LEVEL == 0:
add_to_basket(owner_uid, filtered_records[0][:nrec_to_add], 0, basket_id)
else:
print ' NOT ADDED, DEBUG LEVEL > 0'
except Exception:
register_exception()
if CFG_WEBALERT_DEBUG_LEVEL > 2 and filtered_out_recids:
print "-> these records have been filtered out, as user id %s did not have access:\n%s" % \
(owner_uid, repr(filtered_out_recids))
if index < CFG_WEBALERT_MAX_NUM_OF_RECORDS_IN_ALERT_EMAIL:
for external_collection_results in filtered_records[1][0]:
nrec = len(external_collection_results[1][0])
# index_tmp: the number of maximum allowed records to be added to
# the basket for the next collection.
index_tmp = CFG_WEBALERT_MAX_NUM_OF_RECORDS_IN_ALERT_EMAIL - index
index += nrec
if index > CFG_WEBALERT_MAX_NUM_OF_RECORDS_IN_ALERT_EMAIL:
index = CFG_WEBALERT_MAX_NUM_OF_RECORDS_IN_ALERT_EMAIL
if nrec > 0 and index_tmp > 0:
nrec_to_add = nrec < index_tmp and nrec or index_tmp
if CFG_WEBALERT_DEBUG_LEVEL > 0:
print "-> adding %s external records (collection \"%s\") into basket %s: %s" % (nrec_to_add, external_collection_results[0], basket_id, external_collection_results[1][0][:nrec_to_add])
if nrec > nrec_to_add:
print "-> not added %s external records (collection \"%s\") into basket %s: %s due to maximum limit restriction" % (nrec - nrec_to_add, external_collection_results[0], basket_id, external_collection_results[1][0][nrec_to_add:])
try:
if CFG_WEBALERT_DEBUG_LEVEL == 0:
collection_id = get_collection_id(external_collection_results[0])
added_items = add_to_basket(owner_uid, external_collection_results[1][0][:nrec_to_add], collection_id, basket_id)
format_external_records(added_items, of="xm")
else:
print ' NOT ADDED, DEBUG LEVEL > 0'
except Exception:
register_exception()
elif nrec > 0 and CFG_WEBALERT_DEBUG_LEVEL > 0:
print "-> not added %s external records (collection \"%s\") into basket %s: %s due to maximum limit restriction" % (nrec, external_collection_results[0], basket_id, external_collection_results[1][0])
elif CFG_WEBALERT_DEBUG_LEVEL > 0:
for external_collection_results in filtered_records[1][0]:
nrec = len(external_collection_results[1][0])
if nrec > 0:
print "-> not added %i external records (collection \"%s\") into basket %s: %s due to maximum limit restrictions" % (nrec, external_collection_results[0], basket_id, external_collection_results[1][0])
def get_query(alert_id):
"""Returns the query for that corresponds to this alert id."""
r = run_sql('select urlargs from query where id=%s', (alert_id,))
return r[0][0]
def email_notify(alert, records, argstr):
"""Send the notification e-mail for a specific alert."""
if CFG_WEBALERT_DEBUG_LEVEL > 2:
print "+" * 80 + '\n'
uid = alert[0]
user_info = collect_user_info(uid)
frequency = alert[3]
alert_name = alert[5]
alert_description = alert[7]
alert_recipient_email = alert[8] # set only by admin. Bypasses access-right checks.
filtered_out_recids = [] # only set in debug mode
if not alert_recipient_email:
# Filter out records that user (who setup the alert) should
# not see. This does not apply to external records (hosted
# collections).
filtered_records = ([], records[1])
for recid in records[0]:
(auth_code, auth_msg) = check_user_can_view_record(user_info, recid)
if auth_code == 0:
filtered_records[0].append(recid)
elif CFG_WEBALERT_DEBUG_LEVEL > 2:
# only keep track of this in DEBUG mode
filtered_out_recids.append(recid)
else:
# If admin has decided to send to some mailing-list, we cannot
# verify that recipients have access to the records. So keep
# all of them.
filtered_records = records
if len(filtered_records[0]) == 0:
total_n_external_records = 0
for external_collection_results in filtered_records[1][0]:
total_n_external_records += len(external_collection_results[1][0])
if total_n_external_records == 0:
return
msg = ""
if CFG_WEBALERT_DEBUG_LEVEL > 2 and filtered_out_recids:
print "-> these records have been filtered out, as user id %s did not have access:\n%s" % \
(uid, repr(filtered_out_recids))
if CFG_WEBALERT_DEBUG_LEVEL > 0:
msg = "*** THIS MESSAGE WAS SENT IN DEBUG MODE ***\n\n"
url = CFG_SITE_URL + "/search?" + argstr
# Extract the pattern, the collection list, the current collection
# and the sc (split collection) from the formatted query
query = parse_qs(argstr)
pattern = query.get('p', [''])[0]
collection_list = query.get('c', [])
current_collection = query.get('cc', [''])
sc = query.get('sc', ['1'])
collections = calculate_desired_collection_list(collection_list, current_collection, int(sc[0]))
msg += webalert_templates.tmpl_alert_email_body(alert_name,
alert_description,
url,
filtered_records,
pattern,
collections,
frequency,
alert_use_basket_p(alert))
email = alert_recipient_email or get_email(uid)
if email == 'guest':
print "********************************************************************************"
print "The following alert was not send, because cannot detect user email address:"
print " " + repr(argstr)
print "********************************************************************************"
return
if CFG_WEBALERT_DEBUG_LEVEL > 0:
print "********************************************************************************"
print msg
print "********************************************************************************"
if CFG_WEBALERT_DEBUG_LEVEL < 2:
send_email(fromaddr=webalert_templates.tmpl_alert_email_from(),
toaddr=email,
subject=webalert_templates.tmpl_alert_email_title(alert_name),
content=msg,
header='',
footer='',
attempt_times=CFG_WEBALERT_SEND_EMAIL_NUMBER_OF_TRIES,
attempt_sleeptime=CFG_WEBALERT_SEND_EMAIL_SLEEPTIME_BETWEEN_TRIES)
if CFG_WEBALERT_DEBUG_LEVEL == 4:
send_email(fromaddr=webalert_templates.tmpl_alert_email_from(),
toaddr=CFG_SITE_ADMIN_EMAIL,
subject=webalert_templates.tmpl_alert_email_title(alert_name),
content=msg,
header='',
footer='',
attempt_times=CFG_WEBALERT_SEND_EMAIL_NUMBER_OF_TRIES,
attempt_sleeptime=CFG_WEBALERT_SEND_EMAIL_SLEEPTIME_BETWEEN_TRIES)
def _date_to_tuple(date):
"""Private function. Converts a date as a tuple of string into a list of integers."""
return [int(part) for part in (date.year, date.month, date.day)]
def get_record_ids(argstr, date_from, date_until):
"""Returns the local and external records found for a specific query and timeframe."""
argd = wash_urlargd(parse_qs(argstr), websearch_templates.search_results_default_urlargd)
p = argd.get('p', [])
c = argd.get('c', [])
cc = argd.get('cc', [])
aas = argd.get('aas', [])
f = argd.get('f', [])
so = argd.get('so', [])
sp = argd.get('sp', [])
ot = argd.get('ot', [])
p1 = argd.get('p1', [])
f1 = argd.get('f1', [])
m1 = argd.get('m1', [])
op1 = argd.get('op1', [])
p2 = argd.get('p2', [])
f2 = argd.get('f2', [])
m2 = argd.get('m2', [])
op2 = argd.get('op3', [])
p3 = argd.get('p3', [])
f3 = argd.get('f3', [])
m3 = argd.get('m3', [])
sc = argd.get('sc', [])
d1y, d1m, d1d = _date_to_tuple(date_from)
d2y, d2m, d2d = _date_to_tuple(date_until)
#alerts might contain collections that have been deleted
#check if such collections are in the query, and if yes, do not include them in the search
cc = get_coll_normalised_name(cc)
if not cc and not c: #the alarm was for an entire collection that does not exist anymore
return ([], ([], []))
if c: # some collections were defined in the query
c = [c_norm_name for c_norm_name in [get_coll_normalised_name(c_name) for c_name in c] if c_norm_name] #remove unknown collections from c
if not c: #none of the collection selected in the alert still exist
return ([], ([], []))
washed_colls = wash_colls(cc, c, sc, 0)
hosted_colls = washed_colls[3]
if hosted_colls:
req_args = "p=%s&f=%s&d1d=%s&d1m=%s&d1y=%s&d2d=%s&d2m=%s&d2y=%s&ap=%i" % (p, f, d1d, d1m, d1y, d2d, d2m, d2y, 0)
external_records = calculate_external_records(req_args, [p, p1, p2, p3], f, hosted_colls, CFG_EXTERNAL_COLLECTION_TIMEOUT, CFG_EXTERNAL_COLLECTION_MAXRESULTS_ALERTS)
else:
external_records = ([], [])
recids = perform_request_search(of='id', p=p, c=c, cc=cc, f=f, so=so, sp=sp, ot=ot,
aas=aas, p1=p1, f1=f1, m1=m1, op1=op1, p2=p2, f2=f2,
m2=m2, op2=op2, p3=p3, f3=f3, m3=m3, sc=sc, d1y=d1y,
d1m=d1m, d1d=d1d, d2y=d2y, d2m=d2m, d2d=d2d)
return (recids, external_records)
def run_query(query, frequency, date_until):
"""Return a dictionary containing the information of the performed query.
The information contains the id of the query, the arguments as a
string, and the list of found records."""
if frequency == 'day':
date_from = date_until - datetime.timedelta(days=1)
elif frequency == 'week':
date_from = date_until - datetime.timedelta(weeks=1)
else:
# Months are not an explicit notion of timedelta (it's the
# most ambiguous too). So we explicitely take the same day of
# the previous month.
d, m, y = (date_until.day, date_until.month, date_until.year)
m = m - 1
if m == 0:
m = 12
y = y - 1
date_from = datetime.date(year=y, month=m, day=d)
recs = get_record_ids(query[1], date_from, date_until)
n = len(recs[0])
if n:
log('query %08s produced %08s records for all the local collections' % (query[0], n))
for external_collection_results in recs[1][0]:
n = len(external_collection_results[1][0])
if n:
log('query %08s produced %08s records for external collection \"%s\"' % (query[0], n, external_collection_results[0]))
if CFG_WEBALERT_DEBUG_LEVEL > 2:
print "[%s] run query: %s with dates: from=%s, until=%s\n found rec ids: %s" % (
strftime("%c"), query, date_from, date_until, recs)
return {'id_query': query[0], 'argstr': query[1],
'records': recs, 'date_from': date_from, 'date_until': date_until}
def process_alert_queries(frequency, date):
"""Run the alerts according to the frequency.
Retrieves the queries for which an alert exists, performs it, and
processes the corresponding alerts."""
alert_queries = get_alert_queries(frequency)
for aq in alert_queries:
q = run_query(aq, frequency, date)
alerts = get_alerts(q, frequency)
process_alerts(alerts)
def process_alert_queries_for_user(uid, date):
"""Process the alerts for the given user id.
All alerts are with reference date set as the current local time."""
alert_queries = get_alert_queries_for_user(uid)
for aq in alert_queries:
frequency = aq[2]
q = run_query(aq, frequency, date)
alerts = get_alerts(q, frequency)
process_alerts(alerts)
def replace_argument(argstr, argname, argval):
"""Replace the given date argument value with the new one.
If the argument is missing, it is added."""
if search('%s=\d+' % argname, argstr):
r = sub('%s=\d+' % argname, '%s=%s' % (argname, argval), argstr)
else:
r = argstr + '&%s=%s' % (argname, argval)
return r
def update_arguments(argstr, date_from, date_until):
"""Replace date arguments in argstr with the ones specified by date_from and date_until.
Absent arguments are added."""
d1y, d1m, d1d = _date_to_tuple(date_from)
d2y, d2m, d2d = _date_to_tuple(date_until)
r = replace_argument(argstr, 'd1y', d1y)
r = replace_argument(r, 'd1m', d1m)
r = replace_argument(r, 'd1d', d1d)
r = replace_argument(r, 'd2y', d2y)
r = replace_argument(r, 'd2m', d2m)
r = replace_argument(r, 'd2d', d2d)
return r
def log(msg):
"""Logs the given message in the alert engine log."""
try:
logfile = open(CFG_LOGDIR + '/alertengine.log', 'a')
logfile.write(strftime('%Y%m%d%H%M%S#'))
logfile.write(msg + '\n')
logfile.close()
except Exception:
register_exception()
def process_alerts(alerts):
"""Process the given alerts and store the records found to the user defined baskets
and/or notify them by e-mail"""
# TBD: do not generate the email each time, forge it once and then
# send it to all appropriate people
for a in alerts['alerts']:
if alert_use_basket_p(a):
add_records_to_basket(alerts['records'], a[2])
if alert_use_notification_p(a):
argstr = update_arguments(alerts['argstr'], alerts['date_from'], alerts['date_until'])
try:
email_notify(a, alerts['records'], argstr)
except Exception:
# There were troubles sending this alert, so register
# this exception and continue with other alerts:
register_exception(alert_admin=True,
prefix="Error when sending alert %s, %s\n." % \
(repr(a), repr(argstr)))
# Inform the admin when external collections time out
if len(alerts['records'][1][1]) > 0:
register_exception(alert_admin=True,
prefix="External collections %s timed out when sending alert %s, %s\n." % \
(", ".join(alerts['records'][1][1]), repr(a), repr(argstr)))
update_date_lastrun(a)
def alert_use_basket_p(alert):
"""Boolean. Should this alert store the records found in a basket?"""
return alert[2] != 0
def alert_use_notification_p(alert):
"""Boolean. Should this alert send a notification e-mail about the records found?"""
return alert[6] == 'y'
def run_alerts(date):
"""Run the alerts.
First decide which alerts to run according to the current local
time, and runs them."""
if date.day == 1:
process_alert_queries('month', date)
if date.isoweekday() == 1: # first day of the week
process_alert_queries('week', date)
process_alert_queries('day', date)
# External records related functions
def calculate_external_records(req_args, pattern_list, field, hosted_colls, timeout=CFG_EXTERNAL_COLLECTION_TIMEOUT, limit=CFG_EXTERNAL_COLLECTION_MAXRESULTS_ALERTS):
"""Function that returns the external records found and the potential time outs
given a search pattern and a list of hosted collections."""
(external_search_engines, basic_search_units) = calculate_external_search_params(pattern_list, field, hosted_colls)
return do_calculate_external_records(req_args, basic_search_units, external_search_engines, timeout, limit)
def calculate_external_search_params(pattern_list, field, hosted_colls):
"""Function that calculates the basic search units given the search pattern.
Also returns a set of hosted collections engines."""
from invenio.search_engine import create_basic_search_units
from invenio.websearch_external_collections import bind_patterns
from invenio.websearch_external_collections import select_hosted_search_engines as select_external_search_engines
pattern = bind_patterns(pattern_list)
basic_search_units = create_basic_search_units(None, pattern, field)
external_search_engines = select_external_search_engines(hosted_colls)
return (external_search_engines, basic_search_units)
def do_calculate_external_records(req_args, basic_search_units, external_search_engines, timeout=CFG_EXTERNAL_COLLECTION_TIMEOUT, limit=CFG_EXTERNAL_COLLECTION_MAXRESULTS_ALERTS):
"""Function that returns the external records found and the potential time outs
given the basic search units or the req arguments and a set of hosted collections engines."""
# list to hold the hosted search engines and their respective search urls
engines_list = []
# list to hold the non timed out results
results_list = []
# list to hold all the results
full_results_list = []
# list to hold all the timeouts
timeout_list = []
for engine in external_search_engines:
url = engine.build_search_url(basic_search_units, req_args, limit=limit)
if url:
engines_list.append([url, engine])
# we end up with a [[search url], [engine]] kind of list
# create the list of search urls to be handed to the asynchronous getter
pagegetters_list = [HTTPAsyncPageGetter(engine[0]) for engine in engines_list]
# function to be run on every result
def finished(pagegetter, data, dummy_time):
"""Function called, each time the download of a web page finish.
Will parse and print the results of this page."""
# each pagegetter that didn't timeout is added to this list
results_list.append((pagegetter, data))
# run the asynchronous getter
finished_list = async_download(pagegetters_list, finished, engines_list, timeout)
# create the complete list of tuples, one for each hosted collection, with the results and other information,
# including those that timed out
for (finished, engine) in zip(finished_list, engines_list): #finished_and_engines_list:
if finished:
for result in results_list:
if result[1] == engine:
engine[1].parser.parse_and_get_results(result[0].data, feedonly=True)
full_results_list.append((engine[1].name, engine[1].parser.parse_and_extract_records(of="xm")))
break
elif not finished:
timeout_list.append(engine[1].name)
return (full_results_list, timeout_list)
def calculate_desired_collection_list(c, cc, sc):
"""Function that calculates the user desired collection list when sending a webalert e-mail"""
if not cc[0]:
cc = [CFG_SITE_NAME]
# quickly create the reverse function of is_hosted_collection
is_not_hosted_collection = lambda coll: not is_hosted_collection(coll)
# calculate the list of non hosted, non restricted, regular sons of cc
washed_cc_sons = filter(is_not_hosted_collection, get_coll_sons(cc[0]))
# clean up c removing hosted collections
washed_c = filter(is_not_hosted_collection, c)
# try to simulate the wash_colls function behavior when calculating the collections to return
if not washed_c and not washed_cc_sons: #no collections found: cc has no sons, c not defined
return cc
if washed_cc_sons == washed_c:
if sc == 0:
return cc
elif sc == 1:
return washed_c
else:
if sc == 0:
return washed_c
elif sc == 1:
washed_c_sons = []
for coll in washed_c:
if coll in washed_cc_sons:
washed_c_sons.extend(get_coll_sons(coll))
else:
washed_c_sons.append(coll)
return washed_c_sons
diff --git a/invenio/legacy/webalert/api.py b/invenio/legacy/webalert/api.py
index dc331e454..bdf51fb67 100644
--- a/invenio/legacy/webalert/api.py
+++ b/invenio/legacy/webalert/api.py
@@ -1,429 +1,429 @@
## This file is part of Invenio.
## Copyright (C) 2003, 2004, 2005, 2006, 2007, 2008, 2010, 2011 CERN.
##
## Invenio is free software; you can redistribute it and/or
## modify it under the terms of the GNU General Public License as
## published by the Free Software Foundation; either version 2 of the
## License, or (at your option) any later version.
##
## Invenio is distributed in the hope that it will be useful, but
## WITHOUT ANY WARRANTY; without even the implied warranty of
## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
## General Public License for more details.
##
## You should have received a copy of the GNU General Public License
## along with Invenio; if not, write to the Free Software Foundation, Inc.,
## 59 Temple Place, Suite 330, Boston, MA 02111-1307, USA.
"""PERSONAL FEATURES - YOUR ALERTS"""
__revision__ = "$Id$"
import cgi
import time
from invenio.config import CFG_SITE_LANG
from invenio.dbquery import run_sql
from invenio.webuser import isGuestUser
-from invenio.errorlib import register_exception
+from invenio.ext.logging import register_exception
from invenio.webaccount import warning_guest_user
from invenio.webbasket import create_personal_baskets_selection_box
from invenio.webbasket_dblayer import check_user_owns_baskets
from invenio.base.i18n import gettext_set_language
from invenio.utils.date import convert_datestruct_to_datetext, convert_datetext_to_dategui
import invenio.template
webalert_templates = invenio.template.load('webalert')
### IMPLEMENTATION
class AlertError(Exception):
pass
def check_alert_name(alert_name, uid, ln=CFG_SITE_LANG):
"""check this user does not have another alert with this name."""
# load the right language
_ = gettext_set_language(ln)
sql = """select id_query
from user_query_basket
where id_user=%s and alert_name=%s"""
res = run_sql(sql, (uid, alert_name.strip()))
if len(res) > 0:
raise AlertError( _("You already have an alert named %s.") % ('<b>' + cgi.escape(alert_name) + '</b>',) )
def get_textual_query_info_from_urlargs(urlargs, ln=CFG_SITE_LANG):
"""Return nicely formatted search pattern and catalogue from urlargs of the search query.
Suitable for 'your searches' display."""
out = ""
args = cgi.parse_qs(urlargs)
return webalert_templates.tmpl_textual_query_info_from_urlargs(
ln = ln,
args = args,
)
return out
def perform_display(permanent, uid, ln=CFG_SITE_LANG):
"""display the searches performed by the current user
input: default permanent="n"; permanent="y" display permanent queries(most popular)
output: list of searches in formatted html
"""
# load the right language
_ = gettext_set_language(ln)
# first detect number of queries:
nb_queries_total = 0
nb_queries_distinct = 0
query = "SELECT COUNT(*),COUNT(DISTINCT(id_query)) FROM user_query WHERE id_user=%s"
res = run_sql(query, (uid,), 1)
try:
nb_queries_total = res[0][0]
nb_queries_distinct = res[0][1]
except:
pass
# query for queries:
params = ()
if permanent == "n":
SQL_query = "SELECT DISTINCT(q.id),q.urlargs "\
"FROM query q, user_query uq "\
"WHERE uq.id_user=%s "\
"AND uq.id_query=q.id "\
"ORDER BY q.id DESC"
params = (uid,)
else:
# permanent="y"
SQL_query = "SELECT q.id,q.urlargs "\
"FROM query q "\
"WHERE q.type='p'"
query_result = run_sql(SQL_query, params)
queries = []
if len(query_result) > 0:
for row in query_result :
if permanent == "n":
res = run_sql("SELECT DATE_FORMAT(MAX(date),'%%Y-%%m-%%d %%H:%%i:%%s') FROM user_query WHERE id_user=%s and id_query=%s",
(uid, row[0]))
try:
lastrun = res[0][0]
except:
lastrun = _("unknown")
else:
lastrun = ""
queries.append({
'id' : row[0],
'args' : row[1],
'textargs' : get_textual_query_info_from_urlargs(row[1], ln=ln),
'lastrun' : lastrun,
})
return webalert_templates.tmpl_display_alerts(
ln = ln,
permanent = permanent,
nb_queries_total = nb_queries_total,
nb_queries_distinct = nb_queries_distinct,
queries = queries,
guest = isGuestUser(uid),
guesttxt = warning_guest_user(type="alerts", ln=ln)
)
def check_user_can_add_alert(id_user, id_query):
"""Check if ID_USER has really alert adding rights on ID_QUERY
(that is, the user made the query herself or the query is one of
predefined `popular' queries) and return True or False
accordingly. Useful to filter out malicious users trying to guess
idq URL parameter values in order to access potentially restricted
query alerts."""
# is this a predefined popular query?
res = run_sql("""SELECT COUNT(*) FROM query
WHERE id=%s AND type='p'""", (id_query,))
if res and res[0][0]:
return True
# has the user performed this query in the past?
res = run_sql("""SELECT COUNT(*) FROM user_query
WHERE id_query=%s AND id_user=%s""", (id_query, id_user))
if res and res[0][0]:
return True
return False
def perform_input_alert(action, id_query, alert_name, frequency, notification, id_basket, uid, old_id_basket=None, ln = CFG_SITE_LANG):
"""get the alert settings
input: action="add" for a new alert (blank form), action="modify" for an update
(get old values)
id_query id the identifier of the search to be alerted
for the "modify" action specify old alert_name, frequency of checking,
e-mail notification and basket id.
output: alert settings input form"""
# load the right language
_ = gettext_set_language(ln)
# security check:
if not check_user_can_add_alert(uid, id_query):
raise AlertError(_("You do not have rights for this operation."))
# display query information
res = run_sql("SELECT urlargs FROM query WHERE id=%s", (id_query,))
try:
urlargs = res[0][0]
except:
urlargs = "UNKNOWN"
baskets = create_personal_baskets_selection_box(uid=uid,
html_select_box_name='idb',
selected_bskid=old_id_basket,
ln=ln)
return webalert_templates.tmpl_input_alert(
ln = ln,
query = get_textual_query_info_from_urlargs(urlargs, ln = ln),
action = action,
frequency = frequency,
notification = notification,
alert_name = alert_name,
baskets = baskets,
old_id_basket = old_id_basket,
id_basket = id_basket,
id_query = id_query,
guest = isGuestUser(uid),
guesttxt = warning_guest_user(type="alerts", ln=ln)
)
def check_alert_is_unique(id_basket, id_query, uid, ln=CFG_SITE_LANG ):
"""check the user does not have another alert for the specified query and basket"""
_ = gettext_set_language(ln)
sql = """select id_query
from user_query_basket
where id_user = %s and id_query = %s
and id_basket = %s"""
res = run_sql(sql, (uid, id_query, id_basket))
if len(res):
raise AlertError(_("You already have an alert defined for the specified query and basket."))
def perform_add_alert(alert_name, frequency, notification,
id_basket, id_query, uid, ln = CFG_SITE_LANG):
"""add an alert to the database
input: the name of the new alert;
alert frequency: 'month', 'week' or 'day';
setting for e-mail notification: 'y' for yes, 'n' for no;
basket identifier: 'no' for no basket;
new basket name for this alert;
identifier of the query to be alerted
output: confirmation message + the list of alerts Web page"""
# sanity check
if (None in (alert_name, frequency, notification, id_basket, id_query, uid)):
return ''
# load the right language
_ = gettext_set_language(ln)
# security check:
if not check_user_can_add_alert(uid, id_query):
raise AlertError(_("You do not have rights for this operation."))
# check the alert name is not empty
alert_name = alert_name.strip()
if alert_name == "":
raise AlertError(_("The alert name cannot be empty."))
# check if the alert can be created
check_alert_name(alert_name, uid, ln)
check_alert_is_unique(id_basket, id_query, uid, ln)
if id_basket != 0 and not check_user_owns_baskets(uid, id_basket):
raise AlertError( _("You are not the owner of this basket.") )
# add a row to the alerts table: user_query_basket
query = """INSERT INTO user_query_basket (id_user, id_query, id_basket,
frequency, date_creation, date_lastrun,
alert_name, notification)
VALUES (%s,%s,%s,%s,%s,'',%s,%s)"""
params = (uid, id_query, id_basket,
frequency, convert_datestruct_to_datetext(time.localtime()),
alert_name, notification)
run_sql(query, params)
out = _("The alert %s has been added to your profile.")
out %= '<b>' + cgi.escape(alert_name) + '</b>'
out += perform_list_alerts(uid, ln=ln)
return out
def perform_list_alerts(uid, ln=CFG_SITE_LANG):
"""perform_list_alerts display the list of alerts for the connected user"""
# set variables
out = ""
# query the database
query = """ SELECT q.id, q.urlargs,
a.id_basket, b.name,
a.alert_name, a.frequency,a.notification,
DATE_FORMAT(a.date_creation,'%%Y-%%m-%%d %%H:%%i:%%s'),
DATE_FORMAT(a.date_lastrun,'%%Y-%%m-%%d %%H:%%i:%%s')
FROM user_query_basket a LEFT JOIN query q ON a.id_query=q.id
LEFT JOIN bskBASKET b ON a.id_basket=b.id
WHERE a.id_user=%s
ORDER BY a.alert_name ASC """
res = run_sql(query, (uid,))
alerts = []
for (qry_id, qry_args,
bsk_id, bsk_name,
alrt_name, alrt_frequency, alrt_notification, alrt_creation, alrt_last_run) in res:
try:
if not qry_id:
raise StandardError("""\
Warning: I have detected a bad alert for user id %d.
It seems one of his/her alert queries was deleted from the 'query' table.
Please check this and delete it if needed.
Otherwise no problem, I'm continuing with the other alerts now.
Here are all the alerts defined by this user: %s""" % (uid, repr(res)))
alerts.append({
'queryid' : qry_id,
'queryargs' : qry_args,
'textargs' : get_textual_query_info_from_urlargs(qry_args, ln=ln),
'userid' : uid,
'basketid' : bsk_id,
'basketname' : bsk_name,
'alertname' : alrt_name,
'frequency' : alrt_frequency,
'notification' : alrt_notification,
'created' : convert_datetext_to_dategui(alrt_creation),
'lastrun' : convert_datetext_to_dategui(alrt_last_run)
})
except StandardError:
register_exception(alert_admin=True)
# link to the "add new alert" form
out = webalert_templates.tmpl_list_alerts(ln=ln, alerts=alerts,
guest=isGuestUser(uid),
guesttxt=warning_guest_user(type="alerts", ln=ln))
return out
def perform_remove_alert(alert_name, id_query, id_basket, uid, ln=CFG_SITE_LANG):
"""perform_remove_alert: remove an alert from the database
input: alert name
identifier of the query;
identifier of the basket
uid
output: confirmation message + the list of alerts Web page"""
# load the right language
_ = gettext_set_language(ln)
# security check:
if not check_user_can_add_alert(uid, id_query):
raise AlertError(_("You do not have rights for this operation."))
# set variables
out = ""
if (None in (alert_name, id_query, id_basket, uid)):
return out
# remove a row from the alerts table: user_query_basket
query = """DELETE FROM user_query_basket
WHERE id_user=%s AND id_query=%s AND id_basket=%s"""
params = (uid, id_query, id_basket)
res = run_sql(query, params)
if res:
out += "The alert <b>%s</b> has been removed from your profile.<br /><br />\n" % cgi.escape(alert_name)
else:
out += "Unable to remove alert <b>%s</b>.<br /><br />\n" % cgi.escape(alert_name)
out += perform_list_alerts(uid, ln=ln)
return out
def perform_update_alert(alert_name, frequency, notification, id_basket, id_query, old_id_basket, uid, ln = CFG_SITE_LANG):
"""update alert settings into the database
input: the name of the new alert;
alert frequency: 'month', 'week' or 'day';
setting for e-mail notification: 'y' for yes, 'n' for no;
new basket identifier: 'no' for no basket;
new basket name for this alert;
identifier of the query to be alerted
old identifier of the basket associated to the alert
output: confirmation message + the list of alerts Web page"""
out = ''
# sanity check
if (None in (alert_name, frequency, notification, id_basket, id_query, old_id_basket, uid)):
return out
# load the right language
_ = gettext_set_language(ln)
# security check:
if not check_user_can_add_alert(uid, id_query):
raise AlertError(_("You do not have rights for this operation."))
# check the alert name is not empty
if alert_name.strip() == "":
raise AlertError(_("The alert name cannot be empty."))
# check if the alert can be created
sql = """select alert_name
from user_query_basket
where id_user=%s
and id_basket=%s
and id_query=%s"""
try:
old_alert_name = run_sql(sql, (uid, old_id_basket, id_query))[0][0]
except IndexError:
# FIXME: I18N since this technique of the below raise message,
# since this technique (detecting old alert IDs) is not nice
# and should be replaced some day soon.
raise AlertError("Unable to detect old alert name.")
if old_alert_name.strip()!="" and old_alert_name != alert_name:
check_alert_name( alert_name, uid, ln)
if id_basket != old_id_basket:
check_alert_is_unique( id_basket, id_query, uid, ln)
# update a row into the alerts table: user_query_basket
query = """UPDATE user_query_basket
SET alert_name=%s,frequency=%s,notification=%s,
date_creation=%s,date_lastrun='',id_basket=%s
WHERE id_user=%s AND id_query=%s AND id_basket=%s"""
params = (alert_name, frequency, notification,
convert_datestruct_to_datetext(time.localtime()),
id_basket, uid, id_query, old_id_basket)
run_sql(query, params)
out += _("The alert %s has been successfully updated.") % ("<b>" + cgi.escape(alert_name) + "</b>",)
out += "<br /><br />\n" + perform_list_alerts(uid, ln=ln)
return out
def is_selected(var, fld):
"Checks if the two are equal, and if yes, returns ' selected'. Useful for select boxes."
if var == fld:
return " selected"
else:
return ""
def account_list_alerts(uid, ln=CFG_SITE_LANG):
"""account_list_alerts: list alert for the account page
input: the user id
language
output: the list of alerts Web page"""
query = """ SELECT q.id, q.urlargs, a.id_user, a.id_query,
a.id_basket, a.alert_name, a.frequency,
a.notification,
DATE_FORMAT(a.date_creation,'%%d %%b %%Y'),
DATE_FORMAT(a.date_lastrun,'%%d %%b %%Y'),
a.id_basket
FROM query q, user_query_basket a
WHERE a.id_user=%s AND a.id_query=q.id
ORDER BY a.alert_name ASC """
res = run_sql(query, (uid,))
alerts = []
if len(res):
for row in res:
alerts.append({
'id' : row[0],
'name' : row[5]
})
return webalert_templates.tmpl_account_list_alerts(ln=ln, alerts=alerts)
def account_list_searches(uid, ln=CFG_SITE_LANG):
""" account_list_searches: list the searches of the user
input: the user id
output: resume of the searches"""
out = ""
# first detect number of queries:
nb_queries_total = 0
res = run_sql("SELECT COUNT(*) FROM user_query WHERE id_user=%s", (uid,), 1)
try:
nb_queries_total = res[0][0]
except:
pass
# load the right language
_ = gettext_set_language(ln)
out += _("You have made %(x_nb)s queries. A %(x_url_open)sdetailed list%(x_url_close)s is available with a possibility to (a) view search results and (b) subscribe to an automatic email alerting service for these queries.") % {'x_nb': nb_queries_total, 'x_url_open': '<a href="../youralerts/display?ln=%s">' % ln, 'x_url_close': '</a>'}
return out
diff --git a/invenio/legacy/webalert/webinterface.py b/invenio/legacy/webalert/webinterface.py
index 0afba7982..3f71a7e42 100644
--- a/invenio/legacy/webalert/webinterface.py
+++ b/invenio/legacy/webalert/webinterface.py
@@ -1,556 +1,556 @@
## This file is part of Invenio.
## Copyright (C) 2006, 2007, 2008, 2009, 2010, 2011 CERN.
##
## Invenio is free software; you can redistribute it and/or
## modify it under the terms of the GNU General Public License as
## published by the Free Software Foundation; either version 2 of the
## License, or (at your option) any later version.
##
## Invenio is distributed in the hope that it will be useful, but
## WITHOUT ANY WARRANTY; without even the implied warranty of
## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
## General Public License for more details.
##
## You should have received a copy of the GNU General Public License
## along with Invenio; if not, write to the Free Software Foundation, Inc.,
## 59 Temple Place, Suite 330, Boston, MA 02111-1307, USA.
"""PERSONAL FEATURES - YOUR ALERTS"""
__revision__ = "$Id$"
__lastupdated__ = """$Date$"""
from invenio.config import CFG_SITE_SECURE_URL, CFG_SITE_NAME, \
CFG_ACCESS_CONTROL_LEVEL_SITE, CFG_SITE_NAME_INTL
from invenio.webpage import page
from invenio import webalert
from invenio.webuser import getUid, page_not_authorized, isGuestUser
from invenio.webinterface_handler import wash_urlargd, WebInterfaceDirectory
from invenio.utils.url import redirect_to_url, make_canonical_urlargd
from invenio.webstat import register_customevent
-from invenio.errorlib import register_exception
+from invenio.ext.logging import register_exception
from invenio.webuser import collect_user_info
from invenio.base.i18n import gettext_set_language
import invenio.template
webalert_templates = invenio.template.load('webalert')
class WebInterfaceYourAlertsPages(WebInterfaceDirectory):
"""Defines the set of /youralerts pages."""
_exports = ['', 'display', 'input', 'modify', 'list', 'add',
'update', 'remove']
def index(self, req, dummy):
"""Index page."""
redirect_to_url(req, '%s/youralerts/list' % CFG_SITE_SECURE_URL)
def display(self, req, form):
"""Display search history page. A misnomer."""
argd = wash_urlargd(form, {'p': (str, "n")
})
uid = getUid(req)
# load the right language
_ = gettext_set_language(argd['ln'])
if CFG_ACCESS_CONTROL_LEVEL_SITE >= 1:
return page_not_authorized(req, "%s/youralerts/display" % \
(CFG_SITE_SECURE_URL,),
navmenuid="youralerts")
elif uid == -1 or isGuestUser(uid):
return redirect_to_url(req, "%s/youraccount/login%s" % (
CFG_SITE_SECURE_URL,
make_canonical_urlargd({
'referer' : "%s/youralerts/display%s" % (
CFG_SITE_SECURE_URL,
make_canonical_urlargd(argd, {})),
"ln" : argd['ln']}, {})))
user_info = collect_user_info(req)
if not user_info['precached_usealerts']:
return page_not_authorized(req, "../", \
text = _("You are not authorized to use alerts."))
if argd['p'] == 'y':
_title = _("Popular Searches")
else:
_title = _("Your Searches")
# register event in webstat
if user_info['email']:
user_str = "%s (%d)" % (user_info['email'], user_info['uid'])
else:
user_str = ""
try:
register_customevent("alerts", ["display", "", user_str])
except:
register_exception(suffix="Do the webstat tables exists? Try with 'webstatadmin --load-config'")
return page(title=_title,
body=webalert.perform_display(argd['p'], uid, ln=argd['ln']),
navtrail= """<a class="navtrail" href="%(sitesecureurl)s/youraccount/display?ln=%(ln)s">%(account)s</a>""" % {
'sitesecureurl' : CFG_SITE_SECURE_URL,
'ln': argd['ln'],
'account' : _("Your Account"),
},
description=_("%s Personalize, Display searches") % CFG_SITE_NAME_INTL.get(argd['ln'], CFG_SITE_NAME),
keywords=_("%s, personalize") % CFG_SITE_NAME_INTL.get(argd['ln'], CFG_SITE_NAME),
uid=uid,
language=argd['ln'],
req=req,
lastupdated=__lastupdated__,
navmenuid='youralerts',
secure_page_p=1)
def input(self, req, form):
argd = wash_urlargd(form, {'idq': (int, None),
'name': (str, ""),
'freq': (str, "week"),
'notif': (str, "y"),
'idb': (int, 0),
'error_msg': (str, ""),
})
uid = getUid(req)
if CFG_ACCESS_CONTROL_LEVEL_SITE >= 1:
return page_not_authorized(req, "%s/youralerts/input" % \
(CFG_SITE_SECURE_URL,),
navmenuid="youralerts")
elif uid == -1 or isGuestUser(uid):
return redirect_to_url(req, "%s/youraccount/login%s" % (
CFG_SITE_SECURE_URL,
make_canonical_urlargd({
'referer' : "%s/youralerts/input%s" % (
CFG_SITE_SECURE_URL,
make_canonical_urlargd(argd, {})),
"ln" : argd['ln']}, {})))
# load the right language
_ = gettext_set_language(argd['ln'])
user_info = collect_user_info(req)
if not user_info['precached_usealerts']:
return page_not_authorized(req, "../", \
text = _("You are not authorized to use alerts."))
try:
html = webalert.perform_input_alert("add", argd['idq'], argd['name'], argd['freq'],
argd['notif'], argd['idb'], uid, ln=argd['ln'])
except webalert.AlertError, msg:
return page(title=_("Error"),
body=webalert_templates.tmpl_errorMsg(ln=argd['ln'], error_msg=msg),
navtrail= """<a class="navtrail" href="%(sitesecureurl)s/youraccount/display?ln=%(ln)s">%(account)s</a>""" % {
'sitesecureurl' : CFG_SITE_SECURE_URL,
'ln': argd['ln'],
'account' : _("Your Account"),
},
description=_("%s Personalize, Set a new alert") % CFG_SITE_NAME_INTL.get(argd['ln'], CFG_SITE_NAME),
keywords=_("%s, personalize") % CFG_SITE_NAME_INTL.get(argd['ln'], CFG_SITE_NAME),
uid=uid,
language=argd['ln'],
req=req,
lastupdated=__lastupdated__,
navmenuid='youralerts')
if argd['error_msg'] != "":
html = webalert_templates.tmpl_errorMsg(
ln = argd['ln'],
error_msg = argd['error_msg'],
rest = html,
)
# register event in webstat
alert_str = "%s (%d)" % (argd['name'], argd['idq'])
if user_info['email']:
user_str = "%s (%d)" % (user_info['email'], user_info['uid'])
else:
user_str = ""
try:
register_customevent("alerts", ["input", alert_str, user_str])
except:
register_exception(suffix="Do the webstat tables exists? Try with 'webstatadmin --load-config'")
return page(title=_("Set a new alert"),
body=html,
navtrail= """<a class="navtrail" href="%(sitesecureurl)s/youraccount/display?ln=%(ln)s">%(account)s</a>""" % {
'sitesecureurl' : CFG_SITE_SECURE_URL,
'ln': argd['ln'],
'account' : _("Your Account"),
},
description=_("%s Personalize, Set a new alert") % CFG_SITE_NAME_INTL.get(argd['ln'], CFG_SITE_NAME),
keywords=_("%s, personalize") % CFG_SITE_NAME_INTL.get(argd['ln'], CFG_SITE_NAME),
uid=uid,
language=argd['ln'],
req=req,
lastupdated=__lastupdated__,
navmenuid='youralerts')
def modify(self, req, form):
argd = wash_urlargd(form, {'idq': (int, None),
'old_idb': (int, None),
'name': (str, ""),
'freq': (str, "week"),
'notif': (str, "y"),
'idb': (int, 0),
'error_msg': (str, ""),
})
uid = getUid(req)
if CFG_ACCESS_CONTROL_LEVEL_SITE >= 1:
return page_not_authorized(req, "%s/youralerts/modify" % \
(CFG_SITE_SECURE_URL,),
navmenuid="youralerts")
elif uid == -1 or isGuestUser(uid):
return redirect_to_url(req, "%s/youraccount/login%s" % (
CFG_SITE_SECURE_URL,
make_canonical_urlargd({
'referer' : "%s/youralerts/modify%s" % (
CFG_SITE_SECURE_URL,
make_canonical_urlargd(argd, {})),
"ln" : argd['ln']}, {})))
# load the right language
_ = gettext_set_language(argd['ln'])
user_info = collect_user_info(req)
if not user_info['precached_usealerts']:
return page_not_authorized(req, "../", \
text = _("You are not authorized to use alerts."))
try:
html = webalert.perform_input_alert("update", argd['idq'], argd['name'], argd['freq'],
argd['notif'], argd['idb'], uid, argd['old_idb'], ln=argd['ln'])
except webalert.AlertError, msg:
return page(title=_("Error"),
body=webalert_templates.tmpl_errorMsg(ln=argd['ln'], error_msg=msg),
navtrail= """<a class="navtrail" href="%(sitesecureurl)s/youraccount/display?ln=%(ln)s">%(account)s</a>""" % {
'sitesecureurl' : CFG_SITE_SECURE_URL,
'ln': argd['ln'],
'account' : _("Your Account"),
},
description=_("%s Personalize, Set a new alert") % CFG_SITE_NAME_INTL.get(argd['ln'], CFG_SITE_NAME),
keywords=_("%s, personalize") % CFG_SITE_NAME_INTL.get(argd['ln'], CFG_SITE_NAME),
uid=uid,
language=argd['ln'],
req=req,
lastupdated=__lastupdated__,
navmenuid='youralerts')
if argd['error_msg'] != "":
html = webalert_templates.tmpl_errorMsg(
ln = argd['ln'],
error_msg = argd['error_msg'],
rest = html,
)
# register event in webstat
alert_str = "%s (%d)" % (argd['name'], argd['idq'])
if user_info['email']:
user_str = "%s (%d)" % (user_info['email'], user_info['uid'])
else:
user_str = ""
try:
register_customevent("alerts", ["modify", alert_str, user_str])
except:
register_exception(suffix="Do the webstat tables exists? Try with 'webstatadmin --load-config'")
return page(title=_("Modify alert settings"),
body=html,
navtrail= """<a class="navtrail" href="%(sitesecureurl)s/youraccount/display?ln=%(ln)s">%(account)s</a>""" % {
'sitesecureurl' : CFG_SITE_SECURE_URL,
'ln': argd['ln'],
'account' : _("Your Account"),
},
description=_("%s Personalize, Modify alert settings") % CFG_SITE_NAME_INTL.get(argd['ln'], CFG_SITE_NAME),
keywords=_("%s, personalize") % CFG_SITE_NAME_INTL.get(argd['ln'], CFG_SITE_NAME),
uid=uid,
language=argd['ln'],
req=req,
lastupdated=__lastupdated__,
navmenuid='youralerts')
def list(self, req, form):
argd = wash_urlargd(form, {})
uid = getUid(req)
if CFG_ACCESS_CONTROL_LEVEL_SITE >= 1:
return page_not_authorized(req, "%s/youralerts/list" % \
(CFG_SITE_SECURE_URL,),
navmenuid="youralerts")
elif uid == -1 or isGuestUser(uid):
return redirect_to_url(req, "%s/youraccount/login%s" % (
CFG_SITE_SECURE_URL,
make_canonical_urlargd({
'referer' : "%s/youralerts/list%s" % (
CFG_SITE_SECURE_URL,
make_canonical_urlargd(argd, {})),
"ln" : argd['ln']}, {})))
# load the right language
_ = gettext_set_language(argd['ln'])
user_info = collect_user_info(req)
if not user_info['precached_usealerts']:
return page_not_authorized(req, "../", \
text = _("You are not authorized to use alerts."))
# register event in webstat
if user_info['email']:
user_str = "%s (%d)" % (user_info['email'], user_info['uid'])
else:
user_str = ""
try:
register_customevent("alerts", ["list", "", user_str])
except:
register_exception(suffix="Do the webstat tables exists? Try with 'webstatadmin --load-config'")
return page(title=_("Your Alerts"),
body=webalert.perform_list_alerts(uid, ln = argd['ln']),
navtrail= """<a class="navtrail" href="%(sitesecureurl)s/youraccount/display?ln=%(ln)s">%(account)s</a>""" % {
'sitesecureurl' : CFG_SITE_SECURE_URL,
'ln': argd['ln'],
'account' : _("Your Account"),
},
description=_("%s Personalize, Display alerts") % CFG_SITE_NAME_INTL.get(argd['ln'], CFG_SITE_NAME),
keywords=_("%s, personalize") % CFG_SITE_NAME_INTL.get(argd['ln'], CFG_SITE_NAME),
uid=uid,
language=argd['ln'],
req=req,
lastupdated=__lastupdated__,
navmenuid='youralerts')
def add(self, req, form):
argd = wash_urlargd(form, {'idq': (int, None),
'name': (str, None),
'freq': (str, None),
'notif': (str, None),
'idb': (int, None),
})
uid = getUid(req)
if CFG_ACCESS_CONTROL_LEVEL_SITE >= 1:
return page_not_authorized(req, "%s/youralerts/add" % \
(CFG_SITE_SECURE_URL,),
navmenuid="youralerts")
elif uid == -1 or isGuestUser(uid):
return redirect_to_url(req, "%s/youraccount/login%s" % (
CFG_SITE_SECURE_URL,
make_canonical_urlargd({
'referer' : "%s/youralerts/add%s" % (
CFG_SITE_SECURE_URL,
make_canonical_urlargd(argd, {})),
"ln" : argd['ln']}, {})))
# load the right language
_ = gettext_set_language(argd['ln'])
user_info = collect_user_info(req)
if not user_info['precached_usealerts']:
return page_not_authorized(req, "../", \
text = _("You are not authorized to use alerts."))
try:
html = webalert.perform_add_alert(argd['name'], argd['freq'], argd['notif'],
argd['idb'], argd['idq'], uid, ln=argd['ln'])
except webalert.AlertError, msg:
return page(title=_("Error"),
body=webalert_templates.tmpl_errorMsg(ln=argd['ln'], error_msg=msg),
navtrail= """<a class="navtrail" href="%(sitesecureurl)s/youraccount/display?ln=%(ln)s">%(account)s</a>""" % {
'sitesecureurl' : CFG_SITE_SECURE_URL,
'ln': argd['ln'],
'account' : _("Your Account"),
},
description=_("%s Personalize, Set a new alert") % CFG_SITE_NAME_INTL.get(argd['ln'], CFG_SITE_NAME),
keywords=_("%s, personalize") % CFG_SITE_NAME_INTL.get(argd['ln'], CFG_SITE_NAME),
uid=uid,
language=argd['ln'],
req=req,
lastupdated=__lastupdated__,
navmenuid='youralerts')
# register event in webstat
alert_str = "%s (%d)" % (argd['name'], argd['idq'])
if user_info['email']:
user_str = "%s (%d)" % (user_info['email'], user_info['uid'])
else:
user_str = ""
try:
register_customevent("alerts", ["add", alert_str, user_str])
except:
register_exception(suffix="Do the webstat tables exists? Try with 'webstatadmin --load-config'")
return page(title=_("Display alerts"),
body=html,
navtrail= """<a class="navtrail" href="%(sitesecureurl)s/youraccount/display?ln=%(ln)s">%(account)s</a>""" % {
'sitesecureurl' : CFG_SITE_SECURE_URL,
'ln': argd['ln'],
'account' : _("Your Account"),
},
description=_("%s Personalize, Display alerts") % CFG_SITE_NAME_INTL.get(argd['ln'], CFG_SITE_NAME),
keywords=_("%s, personalize") % CFG_SITE_NAME_INTL.get(argd['ln'], CFG_SITE_NAME),
uid=uid,
language=argd['ln'],
req=req,
lastupdated=__lastupdated__,
navmenuid='youralerts')
def update(self, req, form):
argd = wash_urlargd(form, {'name': (str, None),
'freq': (str, None),
'notif': (str, None),
'idb': (int, None),
'idq': (int, None),
'old_idb': (int, None),
})
uid = getUid(req)
if CFG_ACCESS_CONTROL_LEVEL_SITE >= 1:
return page_not_authorized(req, "%s/youralerts/update" % \
(CFG_SITE_SECURE_URL,),
navmenuid="youralerts")
elif uid == -1 or isGuestUser(uid):
return redirect_to_url(req, "%s/youraccount/login%s" % (
CFG_SITE_SECURE_URL,
make_canonical_urlargd({
'referer' : "%s/youralerts/update%s" % (
CFG_SITE_SECURE_URL,
make_canonical_urlargd(argd, {})),
"ln" : argd['ln']}, {})))
# load the right language
_ = gettext_set_language(argd['ln'])
user_info = collect_user_info(req)
if not user_info['precached_usealerts']:
return page_not_authorized(req, "../", \
text = _("You are not authorized to use alerts."))
try:
html = webalert.perform_update_alert(argd['name'], argd['freq'], argd['notif'],
argd['idb'], argd['idq'], argd['old_idb'], uid, ln=argd['ln'])
except webalert.AlertError, msg:
return page(title=_("Error"),
body=webalert_templates.tmpl_errorMsg(ln=argd['ln'], error_msg=msg),
navtrail= """<a class="navtrail" href="%(sitesecureurl)s/youraccount/display?ln=%(ln)s">%(account)s</a>""" % {
'sitesecureurl' : CFG_SITE_SECURE_URL,
'ln': argd['ln'],
'account' : _("Your Account"),
},
description=_("%s Personalize, Set a new alert") % CFG_SITE_NAME_INTL.get(argd['ln'], CFG_SITE_NAME),
keywords=_("%s, personalize") % CFG_SITE_NAME_INTL.get(argd['ln'], CFG_SITE_NAME),
uid=uid,
language=argd['ln'],
req=req,
lastupdated=__lastupdated__,
navmenuid='youralerts')
# register event in webstat
alert_str = "%s (%d)" % (argd['name'], argd['idq'])
if user_info['email']:
user_str = "%s (%d)" % (user_info['email'], user_info['uid'])
else:
user_str = ""
try:
register_customevent("alerts", ["update", alert_str, user_str])
except:
register_exception(suffix="Do the webstat tables exists? Try with 'webstatadmin --load-config'")
return page(title=_("Display alerts"),
body=html,
navtrail= """<a class="navtrail" href="%(sitesecureurl)s/youraccount/display?ln=%(ln)s">%(account)s</a>""" % {
'sitesecureurl' : CFG_SITE_SECURE_URL,
'ln': argd['ln'],
'account' : _("Your Account"),
},
description=_("%s Personalize, Display alerts") % CFG_SITE_NAME_INTL.get(argd['ln'], CFG_SITE_NAME),
keywords=_("%s, personalize") % CFG_SITE_NAME_INTL.get(argd['ln'], CFG_SITE_NAME),
uid=uid,
language=argd['ln'],
req=req,
lastupdated=__lastupdated__,
navmenuid='youralerts')
def remove(self, req, form):
argd = wash_urlargd(form, {'name': (str, None),
'idq': (int, None),
'idb': (int, None),
})
uid = getUid(req)
if CFG_ACCESS_CONTROL_LEVEL_SITE >= 1:
return page_not_authorized(req, "%s/youralerts/remove" % \
(CFG_SITE_SECURE_URL,),
navmenuid="youralerts")
elif uid == -1 or isGuestUser(uid):
return redirect_to_url(req, "%s/youraccount/login%s" % (
CFG_SITE_SECURE_URL,
make_canonical_urlargd({
'referer' : "%s/youralerts/remove%s" % (
CFG_SITE_SECURE_URL,
make_canonical_urlargd(argd, {})),
"ln" : argd['ln']}, {})))
# load the right language
_ = gettext_set_language(argd['ln'])
user_info = collect_user_info(req)
if not user_info['precached_usealerts']:
return page_not_authorized(req, "../", \
text = _("You are not authorized to use alerts."))
try:
html = webalert.perform_remove_alert(argd['name'], argd['idq'],
argd['idb'], uid, ln=argd['ln'])
except webalert.AlertError, msg:
return page(title=_("Error"),
body=webalert_templates.tmpl_errorMsg(ln=argd['ln'], error_msg=msg),
navtrail= """<a class="navtrail" href="%(sitesecureurl)s/youraccount/display?ln=%(ln)s">%(account)s</a>""" % {
'sitesecureurl' : CFG_SITE_SECURE_URL,
'ln': argd['ln'],
'account' : _("Your Account"),
},
description=_("%s Personalize, Set a new alert") % CFG_SITE_NAME_INTL.get(argd['ln'], CFG_SITE_NAME),
keywords=_("%s, personalize") % CFG_SITE_NAME_INTL.get(argd['ln'], CFG_SITE_NAME),
uid=uid,
language=argd['ln'],
req=req,
lastupdated=__lastupdated__,
navmenuid='youralerts')
# register event in webstat
alert_str = "%s (%d)" % (argd['name'], argd['idq'])
if user_info['email']:
user_str = "%s (%d)" % (user_info['email'], user_info['uid'])
else:
user_str = ""
try:
register_customevent("alerts", ["remove", alert_str, user_str])
except:
register_exception(suffix="Do the webstat tables exists? Try with 'webstatadmin --load-config'")
# display success
return page(title=_("Display alerts"),
body=html,
navtrail= """<a class="navtrail" href="%(sitesecureurl)s/youraccount/display?ln=%(ln)s">%(account)s</a>""" % {
'sitesecureurl' : CFG_SITE_SECURE_URL,
'ln': argd['ln'],
'account' : _("Your Account"),
},
description=_("%s Personalize, Display alerts") % CFG_SITE_NAME_INTL.get(argd['ln'], CFG_SITE_NAME),
keywords=_("%s, personalize") % CFG_SITE_NAME_INTL.get(argd['ln'], CFG_SITE_NAME),
uid=uid,
language=argd['ln'],
req=req,
lastupdated=__lastupdated__,
navmenuid='youralerts')
diff --git a/invenio/legacy/webbasket/api.py b/invenio/legacy/webbasket/api.py
index d37df2b13..2d3ea5a79 100644
--- a/invenio/legacy/webbasket/api.py
+++ b/invenio/legacy/webbasket/api.py
@@ -1,2945 +1,2945 @@
## This file is part of Invenio.
## Copyright (C) 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011, 2012, 2013 CERN.
##
## Invenio is free software; you can redistribute it and/or
## modify it under the terms of the GNU General Public License as
## published by the Free Software Foundation; either version 2 of the
## License, or (at your option) any later version.
##
## Invenio is distributed in the hope that it will be useful, but
## WITHOUT ANY WARRANTY; without even the implied warranty of
## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
## General Public License for more details.
##
## You should have received a copy of the GNU General Public License
## along with Invenio; if not, write to the Free Software Foundation, Inc.,
## 59 Temple Place, Suite 330, Boston, MA 02111-1307, USA.
"""Web Baskets features."""
__revision__ = "$Id$"
import sys
if sys.hexversion < 0x2040000:
# pylint: disable=W0622
from sets import Set as set
# pylint: enable=W0622
from invenio.intbitset import intbitset
import cgi
import urllib
from httplib import urlsplit, HTTPConnection
#from socket import getdefaulttimeout, setdefaulttimeout
from zlib import decompress
import re
from invenio.config import CFG_SITE_LANG, CFG_SITE_URL, \
CFG_WEBBASKET_MAX_NUMBER_OF_DISPLAYED_BASKETS
from invenio.base.i18n import gettext_set_language
from invenio.utils.date import convert_datetext_to_dategui, \
convert_datetext_to_datestruct,\
convert_datestruct_to_dategui
from invenio.modules.formatter import format_record
from invenio.webbasket_config import CFG_WEBBASKET_SHARE_LEVELS, \
CFG_WEBBASKET_SHARE_LEVELS_ORDERED, \
CFG_WEBBASKET_CATEGORIES, \
InvenioWebBasketWarning
from invenio.utils.url import get_referer
from invenio.webuser import isGuestUser, collect_user_info
from invenio.search_engine import \
record_exists, \
get_merged_recid, \
check_user_can_view_record, \
print_records_prologue, \
print_records_epilogue
#from invenio.webcomment import check_user_can_attach_file_to_comments
import invenio.webbasket_dblayer as db
try:
import invenio.template
webbasket_templates = invenio.template.load('webbasket')
except ImportError:
pass
from invenio.websearch_external_collections_utils import get_collection_name_by_id
from invenio.websearch_external_collections import select_hosted_search_engines
from invenio.websearch_external_collections_config import CFG_EXTERNAL_COLLECTION_TIMEOUT
from invenio.websearch_external_collections_getter import HTTPAsyncPageGetter, async_download
-from invenio.errorlib import register_exception
+from invenio.ext.logging import register_exception
from invenio.search_engine import search_unit
from invenio.htmlutils import remove_html_markup, unescape
########################################
### Display public baskets and notes ###
########################################
def perform_request_display_public(uid,
selected_bskid=0,
selected_recid=0,
optional_params={},
of='hb',
ln=CFG_SITE_LANG):
"""Engine for the display of a public interface. Calls the template and returns HTML.
@param selected_bskid: The id of the basket to be displayed (optional)
@param selected_recid: The id of the item to be displayed (optional)
@param optional_params: optional parameters to be passed, used for notes
@param of: display format
@param ln: language"""
_ = gettext_set_language(ln)
warnings_item = []
warnings_basket = []
(of, of_warnings) = wash_of(of)
if of_warnings:
navtrail = create_webbasket_navtrail(uid, ln=ln)
body = webbasket_templates.tmpl_warnings(of_warnings, ln)
return (body, of_warnings, navtrail)
basket = db.get_public_basket_info(selected_bskid)
if not basket:
if of != 'hb':
return ("", None, None)
try:
raise InvenioWebBasketWarning(_('The selected public basket does not exist or you do not have access to it.'))
except InvenioWebBasketWarning, exc:
register_exception(stream='warning')
warnings_html = webbasket_templates.tmpl_warnings(exc.message, ln)
#warnings.append(exc.message)
#warnings = ['WRN_WEBBASKET_INVALID_OR_RESTRICTED_PUBLIC_BASKET']
(body, navtrail) = perform_request_list_public_baskets(uid)
try:
raise InvenioWebBasketWarning(_('Please select a valid public basket from the list of public baskets.'))
except InvenioWebBasketWarning, exc:
register_exception(stream='warning')
#warnings.append(exc.message)
#warnings.append('WRN_WEBBASKET_SHOW_LIST_PUBLIC_BASKETS')
warnings_html += webbasket_templates.tmpl_warnings(exc.message, ln)
body = warnings_html + body
return (body, None, navtrail)
else:
(bskid, basket_name, id_owner, last_update, dummy, nb_items, recids, share_rights) = basket[0]
if selected_recid:
valid_recids = tuple(map(int, recids.split(',')))
if selected_recid in valid_recids:
(content, warnings_item) = __display_public_basket_single_item(bskid,
basket_name,
selected_recid,
nb_items,
share_rights,
optional_params,
of,
ln)
else:
try:
raise InvenioWebBasketWarning(_('The selected item does not exist or you do not have access to it.'))
except InvenioWebBasketWarning, exc:
register_exception(stream='warning')
warnings_item.append(exc.message)
#warnings_item.append('WRN_WEBBASKET_INVALID_OR_RESTRICTED_ITEM')
try:
raise InvenioWebBasketWarning(_('Returning to the public basket view.'))
except InvenioWebBasketWarning, exc:
register_exception(stream='warning')
warnings_item.append(exc.message)
#warnings_item.append('WRN_WEBBASKET_RETURN_TO_PUBLIC_BASKET')
selected_recid = 0
if not selected_recid:
if uid == id_owner:
subscription_status = 0
else:
if db.is_user_subscribed_to_basket(uid,bskid):
subscription_status = 1
else:
subscription_status = -1
(content, warnings_basket) = __display_public_basket(bskid,
basket_name,
last_update,
nb_items,
share_rights,
id_owner,
subscription_status,
of,
ln)
if of == 'hb':
body = webbasket_templates.tmpl_display(content=content)
warnings = warnings_item + warnings_basket
warnings_html = webbasket_templates.tmpl_warnings(warnings, ln)
body = warnings_html + body
else:
body = content
if of == 'hb':
navtrail = create_webbasket_navtrail(uid,
bskid=selected_bskid,
public_basket=True,
ln=ln)
if of == 'hb':
return (body, warnings, navtrail)
else:
return (body, None, None)
def __display_public_basket(bskid,
basket_name,
last_update,
nb_items,
share_rights,
id_owner,
subscription_status,
of='hb',
ln=CFG_SITE_LANG):
"""Private function. Display a basket giving its category and topic or group.
@param share_rights: rights user has on basket
@param group_sharing_level: None if basket is not shared,
0 if public basket,
> 0 if shared to usergroups but not public.
@param category: selected category (see webbasket_config.py)
@param selected_topic: # of selected topic to display baskets
@param selected_group_id: id of group to display baskets
@param ln: language"""
_ = gettext_set_language(ln)
warnings = []
nb_total_notes = 0
last_note = _("N/A")
records = []
notes_dates = []
last_update = convert_datetext_to_dategui(last_update, ln)
items = db.get_basket_content(bskid, of)
external_recids = []
for (recid, collection_id, nb_notes, last_note, ext_val, int_val, score) in items:
notes_dates.append(convert_datetext_to_datestruct(last_note))
last_note = convert_datetext_to_dategui(last_note, ln)
colid = collection_id and collection_id or collection_id == 0 and -1 or 0
val = ""
nb_total_notes += nb_notes
if recid < 0:
if ext_val:
val = decompress(ext_val)
else:
external_recids.append(recid)
else:
if int_val:
val = decompress(int_val)
else:
val = format_record(recid, of, on_the_fly=True)
records.append((recid, colid, nb_notes, last_note, val, score))
if external_recids:
external_records = format_external_records(external_recids, of)
for external_record in external_records:
for record in records:
if record[0] == -external_record[0]:
idx = records.index(record)
tuple_to_list = list(records.pop(idx))
tuple_to_list[4] = external_record[1]
records.insert(idx, tuple(tuple_to_list))
break
if notes_dates:
last_note = convert_datestruct_to_dategui(max(notes_dates), ln)
body = webbasket_templates.tmpl_public_basket(bskid,
basket_name,
last_update,
nb_items,
(check_sufficient_rights(share_rights, CFG_WEBBASKET_SHARE_LEVELS['READCMT'],),),
nb_total_notes,
records,
id_owner,
subscription_status,
of,
ln)
return (body, warnings)
def __display_public_basket_single_item(bskid,
basket_name,
recid,
nb_items,
share_rights,
optional_params={},
of='hb',
ln=CFG_SITE_LANG):
"""Private function. Display a basket giving its category and topic or group.
@param share_rights: rights user has on basket
@param group_sharing_level: None if basket is not shared,
0 if public basket,
> 0 if shared to usergroups but not public.
@param category: selected category (see webbasket_config.py)
@param selected_topic: # of selected topic to display baskets
@param selected_group_id: id of group to display baskets
@param ln: language"""
_ = gettext_set_language(ln)
warnings = []
item = db.get_basket_item(bskid, recid, of)
if item:
(recid, collection_id, nb_notes, last_note, ext_val, int_val, score) = item[0]
previous_item_recid = item[1]
next_item_recid = item[2]
item_index = item[3]
else:
# The validity of the recid and hence the item is already checked by the
# previous function and the appropriate warning is returned.
# This is just an extra check just in case we missed something.
# An empty body is returned.
body = ""
try:
raise InvenioWebBasketWarning(_('The selected item does not exist or you do not have access to it.'))
except InvenioWebBasketWarning, exc:
register_exception(stream='warning')
warnings.append(exc.message)
#warnings.append('WRN_WEBBASKET_INVALID_OR_RESTRICTED_ITEM')
return (body, warnings)
last_note = convert_datetext_to_dategui(last_note, ln)
colid = collection_id and collection_id or collection_id == 0 and -1 or 0
val = ""
if recid < 0:
if ext_val:
val = decompress(ext_val)
else:
external_record = format_external_records([recid], of)
val = external_record and external_record[0][1] or ""
else:
if int_val:
val = decompress(int_val)
else:
val = format_record(recid, of, on_the_fly=True)
item = (recid, colid, nb_notes, last_note, val, score)
notes = db.get_notes(bskid, recid)
body = webbasket_templates.tmpl_public_basket_single_item(bskid,
basket_name,
nb_items,
(check_sufficient_rights(share_rights, CFG_WEBBASKET_SHARE_LEVELS['READCMT']),
check_sufficient_rights(share_rights, CFG_WEBBASKET_SHARE_LEVELS['ADDCMT'])),
item,
notes,
previous_item_recid,
next_item_recid,
item_index,
optional_params,
of,
ln)
return (body, warnings)
def perform_request_list_public_baskets(uid,
limit=1,
sort='name',
asc=1,
nb_views_show_p=False,
ln=CFG_SITE_LANG):
"""Display list of public baskets.
@param limit: display baskets from the incrementally numbered 'limit' and on
@param sort: sort by 'name' or 'views' or 'owner' or 'date' or 'items'
@param asc: ascending sort or not
@param ln: language"""
warnings_html = ""
number_of_all_public_baskets = db.count_all_public_baskets()
limit -= 1
if limit < 0:
limit = 0
elif limit >= number_of_all_public_baskets:
limit = number_of_all_public_baskets - 1
if not nb_views_show_p and sort == 'views':
# TODO: Add a 'sort by views' restriction warning
#warnings.append('...')
#warnings_html += webbasket_templates.tmpl_warnings(warnings, ln)
sort = "name"
all_public_baskets = db.get_list_public_baskets(limit,
CFG_WEBBASKET_MAX_NUMBER_OF_DISPLAYED_BASKETS,
sort,
asc)
body = webbasket_templates.tmpl_display_list_public_baskets(all_public_baskets,
limit,
number_of_all_public_baskets,
sort,
asc,
nb_views_show_p,
ln)
search_box = __create_search_box(uid=uid,
category=CFG_WEBBASKET_CATEGORIES['ALLPUBLIC'],
ln=ln)
body = webbasket_templates.tmpl_display(content=body, search_box=search_box)
body = warnings_html + body
navtrail = create_webbasket_navtrail(uid,
public_basket=True,
ln=ln)
return (body, navtrail)
def perform_request_write_public_note(uid,
bskid=0,
recid=0,
cmtid=0,
ln=CFG_SITE_LANG):
"""Display a note writing form
@param uid: user id
@param bskid: basket id
@param recid: record id (comments are on a specific record in a specific basket)
@param cmtid: if provided this comment is a reply to comment cmtid.
@param category: selected category
@param topic: selected topic
@param group_id: selected group id
@param ln: language
"""
_ = gettext_set_language(ln)
optional_params = {}
warnings_rights = []
warnings_html = ""
if not can_add_notes_to_public_basket_p(bskid):
try:
raise InvenioWebBasketWarning(_('You do not have permission to write notes to this item.'))
except InvenioWebBasketWarning, exc:
register_exception(stream='warning')
# warnings_rights = exc.message
#warnings_rights = ['WRN_WEBBASKET_RESTRICTED_WRITE_NOTES']
warnings_html += webbasket_templates.tmpl_warnings(warnings_rights, ln)
else:
if cmtid and db.note_belongs_to_item_in_basket_p(cmtid, recid, bskid):
optional_params["Add note"] = db.get_note(cmtid)
optional_params["Reply to"] = cmtid
elif cmtid:
optional_params["Add note"] = ()
try:
raise InvenioWebBasketWarning(_('The note you are quoting does not exist or you do not have access to it.'))
except InvenioWebBasketWarning, exc:
register_exception(stream='warning')
optional_params["Warnings"] = exc.message
else:
optional_params["Add note"] = ()
(body, warnings, navtrail) = perform_request_display_public(uid=uid,
selected_bskid=bskid,
selected_recid=recid,
optional_params=optional_params,
of='hb',
ln=CFG_SITE_LANG)
if not warnings:
body = warnings_html + body
# warnings = warnings_rights
return (body, navtrail)
def perform_request_save_public_note(uid,
bskid=0,
recid=0,
note_title="",
note_body="",
date_creation="",
editor_type='textarea',
ln=CFG_SITE_LANG,
reply_to=None):
""" Save a given comment if able to.
@param uid: user id (int)
@param bskid: basket id (int)
@param recid: record id (int)
@param title: title of comment (string)
@param text: comment's body (string)
@param ln: language (string)
@param editor_type: the kind of editor/input used for the comment: 'textarea', 'ckeditor'
@param reply_to: the id of the comment we are replying to
"""
optional_params = {}
warnings_rights = []
warnings_html = ""
_ = gettext_set_language(ln)
if not can_add_notes_to_public_basket_p(bskid):
try:
raise InvenioWebBasketWarning(_('You do not have permission to write notes to this item.'))
except InvenioWebBasketWarning, exc:
register_exception(stream='warning')
# warnings_rights = exc.message
#warnings_rights = ['WRN_WEBBASKET_RESTRICTED_WRITE_NOTES']
warnings_html += webbasket_templates.tmpl_warnings(warnings_rights, ln)
else:
if not note_title or not note_body: # FIXME: improve check when ckeditor
optional_params["Incomplete note"] = (note_title, note_body)
try:
raise InvenioWebBasketWarning(_('You must fill in both the subject and the body of the note.'))
except InvenioWebBasketWarning, exc:
register_exception(stream='warning')
optional_params["Warnings"] = exc.message
else:
if editor_type == 'ckeditor':
# Here we remove the line feeds introduced by CKEditor (they
# have no meaning for the user) and replace the HTML line
# breaks by linefeeds, so that we are close to an input that
# would be done without the CKEditor. That's much better if a
# reply to a comment is made with a browser that does not
# support CKEditor.
note_body = note_body.replace('\n', '').replace('\r', '').replace('<br />', '\n')
if not(db.save_note(uid, bskid, recid, note_title, note_body, date_creation, reply_to)):
# TODO: The note could not be saved. DB problem?
pass
else:
# TODO: inform about successful annotating.
pass
(body, warnings, navtrail) = perform_request_display_public(uid=uid,
selected_bskid=bskid,
selected_recid=recid,
optional_params=optional_params,
of='hb',
ln=CFG_SITE_LANG)
if not warnings:
body = warnings_html + body
# warnings = warnings_rights
return (body, navtrail)
#################################
### Display baskets and notes ###
#################################
def perform_request_display(uid,
selected_category=CFG_WEBBASKET_CATEGORIES['PRIVATE'],
selected_topic="",
selected_group_id=0,
selected_bskid=0,
selected_recid=0,
optional_params={},
of='hb',
ln=CFG_SITE_LANG):
"""Display all the baskets of given category, topic or group.
@param uid: user id
@param selected_category: selected category (see webbasket_config.py)
@param selected_topic: # of selected topic to display baskets
@param selected_group_id: id of group to display baskets
@param ln: language"""
_ = gettext_set_language(ln)
warnings = []
warnings_html = ""
valid_category_choice = False
selected_basket_info = []
content = ""
search_box = ""
(of, of_warnings) = wash_of(of)
if of_warnings:
navtrail = create_webbasket_navtrail(uid, ln=ln)
body = webbasket_templates.tmpl_warnings(of_warnings, ln)
return (body, of_warnings, navtrail)
(selected_category, category_warnings) = wash_category(selected_category)
if not selected_category and category_warnings:
if of == 'xm':
return ("", None. None)
navtrail = create_webbasket_navtrail(uid, ln=ln)
body = webbasket_templates.tmpl_warnings(category_warnings, ln)
return (body, category_warnings, navtrail)
if selected_category == CFG_WEBBASKET_CATEGORIES['ALLPUBLIC']:
if of == 'xm':
return ("", None. None)
# TODO: Send the correct title of the page as well.
return perform_request_list_public_baskets(uid)
personal_info = db.get_all_user_personal_basket_ids_by_topic(uid)
personal_baskets_info = ()
if personal_info and selected_category == CFG_WEBBASKET_CATEGORIES['PRIVATE']:
# Create a dictionary that has the valid topics for keys and the basket
# ids in each topic (string, ids separated by commas) as values.
personal_info_dict = {}
for personal_info_topic_and_bskids in personal_info:
personal_info_dict[personal_info_topic_and_bskids[0]] = map(int, personal_info_topic_and_bskids[1].split(','))
valid_category_choice = True
if selected_topic:
valid_selected_topic_p = False
# Validate the topic. Check if the selected topic is one of the keys
# in the dictionary. If it is valid then get some more info for that
# topic from the DB.
if selected_topic in personal_info_dict.keys():
personal_baskets_info = db.get_personal_baskets_info_for_topic(uid, selected_topic)
valid_selected_topic_p = True
if not valid_selected_topic_p:
try:
raise InvenioWebBasketWarning(_('The selected topic does not exist or you do not have access to it.'))
except InvenioWebBasketWarning, exc:
register_exception(stream='warning')
warnings.append(exc.message)
#warnings.append('WRN_WEBBASKET_INVALID_OR_RESTRICTED_TOPIC')
warnings_html += webbasket_templates.tmpl_warnings(exc.message, ln)
valid_selected_topic_p = False
selected_topic = ""
else:
valid_selected_topic_p = True
if valid_selected_topic_p and selected_bskid:
if selected_topic:
valid_bskids = personal_info_dict[selected_topic]
else:
valid_bskids = []
for valid_bskids_per_topic in personal_info_dict.values():
valid_bskids.extend(valid_bskids_per_topic)
if selected_bskid in valid_bskids:
if not selected_topic:
for valid_topic in personal_info_dict.iterkeys():
if selected_bskid in personal_info_dict[valid_topic]:
selected_topic = valid_topic
break
personal_baskets_info = db.get_personal_baskets_info_for_topic(uid, selected_topic)
for personal_basket_info in personal_baskets_info:
if personal_basket_info[0] == selected_bskid:
selected_basket_info = list(personal_basket_info)
selected_basket_info.append(CFG_WEBBASKET_SHARE_LEVELS['MANAGE'])
break
else:
try:
raise InvenioWebBasketWarning(_('The selected basket does not exist or you do not have access to it.'))
except InvenioWebBasketWarning, exc:
register_exception(stream='warning')
warnings.append(exc.message)
#warnings.append('WRN_WEBBASKET_INVALID_OR_RESTRICTED_BASKET')
warnings_html += webbasket_templates.tmpl_warnings(exc.message, ln)
selected_bskid = 0
else:
selected_bskid = 0
group_info = db.get_all_user_group_basket_ids_by_group(uid)
group_baskets_info = ()
selected_group_name = ""
if group_info and selected_category == CFG_WEBBASKET_CATEGORIES['GROUP']:
# Create a dictionary that has the valid group as keys and the basket
# ids in each group (string, ids separated by commas) as values.
group_info_dict = {}
for group_info_group_and_bskids in group_info:
group_info_dict[group_info_group_and_bskids[0]] = (group_info_group_and_bskids[1], \
map(int, group_info_group_and_bskids[2].split(',')))
valid_category_choice = True
if selected_group_id:
valid_selected_group_p = False
# Validate the group. Check if the selected group is one of the keys
# in the dictionary. If it is valid then get some more info for that
# group from the DB.
if selected_group_id in group_info_dict.keys():
selected_group_name = group_info_dict[selected_group_id][0]
group_baskets_info = db.get_group_baskets_info_for_group(selected_group_id)
valid_selected_group_p = True
if not valid_selected_group_p:
try:
raise InvenioWebBasketWarning(_('The selected topic does not exist or you do not have access to it.'))
except InvenioWebBasketWarning, exc:
register_exception(stream='warning')
warnings.append(exc.message)
#warnings.append('WRN_WEBBASKET_INVALID_OR_RESTRICTED_GROUP')
warnings_html += webbasket_templates.tmpl_warnings(exc.message, ln)
selected_group_id = ""
valid_selected_group_p = False
else:
valid_selected_group_p = True
if valid_selected_group_p and selected_bskid:
if selected_group_id:
valid_bskids = group_info_dict[selected_group_id][1]
else:
valid_bskids = []
for group_and_valid_bskids_per_group in group_info_dict.values():
valid_bskids_per_group = group_and_valid_bskids_per_group[1]
valid_bskids.extend(valid_bskids_per_group)
if selected_bskid in valid_bskids:
if not selected_group_id:
for valid_group_id in group_info_dict.iterkeys():
if selected_bskid in group_info_dict[valid_group_id][1]:
selected_group_id = valid_group_id
break
selected_group_name = group_info_dict[selected_group_id][0]
group_baskets_info = db.get_group_baskets_info_for_group(selected_group_id)
for group_basket_info in group_baskets_info:
if group_basket_info[0] == selected_bskid:
selected_basket_info = list(group_basket_info)
# INFO: uncomment the two following lines to give MANAGE
# rights to the owner of the basket even when through
# the group view of the basket.
#if group_basket_info[7] == uid:
# selected_basket_info[6] = CFG_WEBBASKET_SHARE_LEVELS['MANAGE']
selected_basket_info.pop(7)
break
else:
try:
raise InvenioWebBasketWarning(_('The selected topic does not exist or you do not have access to it.'))
except InvenioWebBasketWarning, exc:
register_exception(stream='warning')
warnings.append(exc.message)
#warnings.append('WRN_WEBBASKET_INVALID_OR_RESTRICTED_BASKET')
warnings_html += webbasket_templates.tmpl_warnings(exc.message, ln)
selected_bskid = 0
else:
selected_bskid = 0
public_info = db.get_all_external_basket_ids_and_names(uid)
if public_info and selected_category == CFG_WEBBASKET_CATEGORIES['EXTERNAL']:
valid_category_choice = True
if selected_bskid:
valid_bskids = [(valid_basket[0], valid_basket[3]) for valid_basket in public_info]
if (selected_bskid, 0) in valid_bskids:
public_basket_info = db.get_external_basket_info(selected_bskid)
if public_basket_info:
selected_basket_info = list(public_basket_info[0])
elif (selected_bskid, None) in valid_bskids:
try:
raise InvenioWebBasketWarning(_('The selected basket is no longer public.'))
except InvenioWebBasketWarning, exc:
register_exception(stream='warning')
warnings.append(exc.message)
#warnings.append('WRN_WEBBASKET_FORMER_PUBLIC_BASKET')
warnings_html += webbasket_templates.tmpl_warnings(exc.message, ln)
selected_bskid = 0
else:
try:
raise InvenioWebBasketWarning(_('The selected basket does not exist or you do not have access to it.'))
except InvenioWebBasketWarning, exc:
register_exception(stream='warning')
warnings.append(exc.message)
#warnings.append('WRN_WEBBASKET_INVALID_OR_RESTRICTED_BASKET')
warnings_html += webbasket_templates.tmpl_warnings(exc.message, ln)
selected_bskid = 0
if not valid_category_choice:
if personal_info:
selected_category = CFG_WEBBASKET_CATEGORIES['PRIVATE']
elif group_info:
selected_category = CFG_WEBBASKET_CATEGORIES['GROUP']
elif public_info:
selected_category = CFG_WEBBASKET_CATEGORIES['EXTERNAL']
else:
selected_category = CFG_WEBBASKET_CATEGORIES['ALLPUBLIC']
if not of.startswith('x'):
directory_box = webbasket_templates.tmpl_create_directory_box(selected_category,
selected_topic,
(selected_group_id, selected_group_name),
selected_bskid,
(personal_info, personal_baskets_info),
(group_info, group_baskets_info),
public_info,
ln)
if selected_basket_info:
if selected_recid:
(bskid, basket_name, last_update, dummy, nb_items, dummy, share_rights) = selected_basket_info
(content, bsk_warnings) = __display_basket_single_item(uid, bskid,
basket_name,
selected_recid,
last_update,
nb_items,
share_rights,
selected_category,
selected_topic,
selected_group_id,
optional_params,
of,
ln)
else:
(bskid, basket_name, last_update, dummy, nb_items, dummy, share_rights) = selected_basket_info
share_level = db.get_basket_share_level(bskid)
if share_level:
share_level = share_level[0][0]
else:
share_level = None
if share_level == 0:
nb_subscribers = db.count_public_basket_subscribers(bskid)
else:
nb_subscribers = None
(content, bsk_warnings) = __display_basket(uid, bskid,
basket_name,
last_update,
nb_items,
nb_subscribers,
share_rights,
share_level,
selected_category,
selected_topic,
selected_group_id,
of,
ln)
warnings.extend(bsk_warnings)
if not of.startswith('x'):
warnings_html += webbasket_templates.tmpl_warnings(bsk_warnings, ln)
else:
if not of.startswith('x'):
search_box = __create_search_box(uid=uid,
category=selected_category,
topic=selected_topic,
grpid=selected_group_id,
p="",
n=1,
ln=ln)
if not of.startswith('x'):
body = webbasket_templates.tmpl_display(directory_box, content, search_box)
body = warnings_html + body
else:
body = content
if not of.startswith('x'):
navtrail = create_webbasket_navtrail(uid,
category=selected_category,
topic=selected_topic,
group=selected_group_id,
bskid=selected_bskid,
ln=ln)
if not of.startswith('x'):
return (body, warnings, navtrail)
else:
return (body, None, None)
def __display_basket(uid, bskid,
basket_name,
last_update,
nb_items,
nb_subscribers,
share_rights,
share_level,
selected_category=CFG_WEBBASKET_CATEGORIES['PRIVATE'],
selected_topic="",
selected_group_id=0,
of="hb",
ln=CFG_SITE_LANG):
"""Private function. Display a basket giving its category and topic or group.
@param share_rights: rights user has on basket
@param share_level: None if basket is not shared,
0 if public basket,
> 0 if shared to usergroups but not public.
@param selected_category: selected category (see webbasket_config.py)
@param selected_topic: # of selected topic to display baskets
@param selected_group_id: id of group to display baskets
@param ln: language"""
_ = gettext_set_language(ln)
warnings = []
nb_total_notes = 0
last_note = _("N/A")
records = []
notes_dates = []
#date_modification = convert_datetext_to_dategui(date_modification, ln)
last_update = convert_datetext_to_dategui(last_update, ln)
items = db.get_basket_content(bskid, of)
external_recids = []
for (recid, collection_id, nb_notes, last_note, ext_val, int_val, score) in items:
notes_dates.append(convert_datetext_to_datestruct(last_note))
last_note = convert_datetext_to_dategui(last_note, ln)
colid = collection_id and collection_id or collection_id == 0 and -1 or 0
val = ""
nb_total_notes += nb_notes
# check if the current recid has been deleted and has been merged,
# in that case obtain the recid of the new record and redirect to it
merged_recid = get_merged_recid(recid)
record_status = record_exists(recid)
if record_status == -1 and merged_recid: # the record has been deleted and has been merged
# keep notes about the deleted record to store them in the merged record
deleted_record_notes = db.get_notes(bskid, recid)
# remove the deleted record recid from the basket bskid
db.delete_item(bskid, recid)
recid = merged_recid
# add the merged record recid in the basket bskid
db.add_to_basket(uid, [recid], 0, bskid)
# save the notes in the merged record recid
for note in deleted_record_notes:
note_title = note[2]
note_body = note[3]
date_creation= note[4]
reply_to = note[-1]
db.save_note(uid, bskid, recid, note_title, note_body, date_creation, reply_to)
int_val = ""
if recid < 0:
if ext_val:
val = decompress(ext_val)
else:
external_recids.append(recid)
else:
if int_val:
val = decompress(int_val)
else:
val = format_record(recid, of, on_the_fly=True)
## external item (record): colid = positive integet
## external item (url): colid = -1
## local item (record): colid = 0
records.append((recid, colid, nb_notes, last_note, val, score))
if external_recids:
external_records = format_external_records(external_recids, of)
for external_record in external_records:
for record in records:
if record[0] == -external_record[0]:
idx = records.index(record)
tuple_to_list = list(records.pop(idx))
tuple_to_list[4] = external_record[1]
records.insert(idx, tuple(tuple_to_list))
break
if notes_dates:
last_note = convert_datestruct_to_dategui(max(notes_dates), ln)
if of == 'hb' or of.startswith('x'):
body = webbasket_templates.tmpl_basket(bskid,
basket_name,
last_update,
nb_items,
nb_subscribers,
(check_sufficient_rights(share_rights, CFG_WEBBASKET_SHARE_LEVELS['READITM']),
check_sufficient_rights(share_rights, CFG_WEBBASKET_SHARE_LEVELS['MANAGE']),
check_sufficient_rights(share_rights, CFG_WEBBASKET_SHARE_LEVELS['READCMT']),
check_sufficient_rights(share_rights, CFG_WEBBASKET_SHARE_LEVELS['ADDCMT']),
check_sufficient_rights(share_rights, CFG_WEBBASKET_SHARE_LEVELS['ADDITM']),
check_sufficient_rights(share_rights, CFG_WEBBASKET_SHARE_LEVELS['DELITM'])),
nb_total_notes,
share_level,
selected_category,
selected_topic,
selected_group_id,
records,
of,
ln)
else:
body = ""
for rec in records:
body += rec[4]
return (body, warnings)
def __display_basket_single_item(uid, bskid,
basket_name,
recid,
last_update,
nb_items,
share_rights,
selected_category=CFG_WEBBASKET_CATEGORIES['PRIVATE'],
selected_topic="",
selected_group_id=0,
optional_params={},
of='hb',
ln=CFG_SITE_LANG):
"""Private function. Display a basket giving its category and topic or group.
@param share_rights: rights user has on basket
@param selected_category: selected category (see webbasket_config.py)
@param selected_topic: # of selected topic to display baskets
@param selected_group_id: id of group to display baskets
@param ln: language"""
_ = gettext_set_language(ln)
warnings = []
last_note = _("N/A")
notes_dates = []
#date_modification = convert_datetext_to_dategui(date_modification, ln)
last_update = convert_datetext_to_dategui(last_update, ln)
item = db.get_basket_item(bskid, recid, of)
if item:
(recid, collection_id, nb_notes, last_note, ext_val, int_val, score) = item[0]
previous_item_recid = item[1]
next_item_recid = item[2]
item_index = item[3]
else:
share_level = db.get_basket_share_level(bskid)
if share_level:
share_level = share_level[0][0]
else:
share_level = None
if share_level == 0:
nb_subscribers = db.count_public_basket_subscribers(bskid)
else:
nb_subscribers = None
(content, bsk_warnings) = __display_basket(uid, bskid,
basket_name,
last_update,
nb_items,
nb_subscribers,
share_rights,
share_level,
selected_category,
selected_topic,
selected_group_id,
of,
ln)
try:
raise InvenioWebBasketWarning(_('The selected item does not exist or you do not have access to it.'))
except InvenioWebBasketWarning, exc:
register_exception(stream='warning')
bsk_warnings.append(exc.message)
#bsk_warnings.append('WRN_WEBBASKET_INVALID_OR_RESTRICTED_ITEM')
return (content, bsk_warnings)
notes_dates.append(convert_datetext_to_datestruct(last_note))
last_note = convert_datetext_to_dategui(last_note, ln)
colid = collection_id and collection_id or collection_id == 0 and -1 or 0
val = ""
if recid < 0:
if ext_val:
val = decompress(ext_val)
else:
external_record = format_external_records([recid], of)
val = external_record and external_record[0][1] or ""
else:
if int_val:
val = decompress(int_val)
else:
val = format_record(recid, of, on_the_fly=True)
item = (recid, colid, nb_notes, last_note, val, score)
comments = db.get_notes(bskid, recid)
if notes_dates:
last_note = convert_datestruct_to_dategui(max(notes_dates), ln)
body = webbasket_templates.tmpl_basket_single_item(bskid,
basket_name,
nb_items,
(check_sufficient_rights(share_rights, CFG_WEBBASKET_SHARE_LEVELS['READITM']),
check_sufficient_rights(share_rights, CFG_WEBBASKET_SHARE_LEVELS['READCMT']),
check_sufficient_rights(share_rights, CFG_WEBBASKET_SHARE_LEVELS['ADDCMT']),
check_sufficient_rights(share_rights, CFG_WEBBASKET_SHARE_LEVELS['DELCMT'])),
selected_category,
selected_topic,
selected_group_id,
item, comments,
previous_item_recid, next_item_recid, item_index,
optional_params,
of,
ln)
return (body, warnings)
def perform_request_search(uid,
selected_category="",
selected_topic="",
selected_group_id=0,
p="",
b="",
n=0,
#format='xm',
ln=CFG_SITE_LANG):
"""Search the baskets...
@param uid: user id
@param category: selected category (see webbasket_config.py)
@param selected_topic: # of selected topic to display baskets
@param selected_group_id: id of group to display baskets
@param ln: language"""
_ = gettext_set_language(ln)
body = ""
#warnings = []
warnings_html = ""
(b_category, b_topic_or_grpid, b_warnings) = wash_b_search(b)
# we extract the category from the washed b GET variable.
# if a valid category was returned we use it as the selected category.
if b_category:
selected_category = b_category
if selected_category == CFG_WEBBASKET_CATEGORIES['PRIVATE']:
selected_topic = b_topic_or_grpid
elif selected_category == CFG_WEBBASKET_CATEGORIES['GROUP']:
selected_group_id = b_topic_or_grpid
# if no category was returned and there were warnings it means there was a
# bad input, send the warning to the user and return the page.
elif b_warnings:
navtrail = create_webbasket_navtrail(uid, search_baskets=True, ln=ln)
body = webbasket_templates.tmpl_warnings(b_warnings, ln)
return (body, navtrail)
# if no category was returned and there were no warnings it means no category
# was defined in the b GET variable. If the user has not defined a category
# either using the category GET variable it means there is no category defined
# whatsoever.
elif not selected_category:
selected_category = ""
# finally, if no category was returned but the user has defined a category
# using the category GET variable we extract the category after washing the
# variable.
else:
(selected_category, category_warnings) = wash_category(selected_category)
if not selected_category and category_warnings:
navtrail = create_webbasket_navtrail(uid, search_baskets=True, ln=ln)
body = webbasket_templates.tmpl_warnings(category_warnings, ln)
return (body, navtrail)
if selected_category == CFG_WEBBASKET_CATEGORIES['PRIVATE'] and selected_topic:
(selected_topic, topic_warnings) = wash_topic(uid, selected_topic)
if not selected_topic and topic_warnings:
navtrail = create_webbasket_navtrail(uid, search_baskets=True, ln=ln)
body = webbasket_templates.tmpl_warnings(topic_warnings, ln)
return (body, navtrail)
if selected_category == CFG_WEBBASKET_CATEGORIES['GROUP'] and selected_group_id:
(selected_group_id, group_warnings) = wash_group(uid, selected_group_id)
if not selected_group_id and group_warnings:
navtrail = create_webbasket_navtrail(uid, search_baskets=True, ln=ln)
body = webbasket_templates.tmpl_warnings(group_warnings, ln)
return (body, navtrail)
# IDEA: in case we pass an "action=search" GET variable we can use the
# following bit to warn the user he's searching for an empty search pattern.
#if action == "search" and not p:
# warnings_html += webbasket_templates.tmpl_warnings('WRN_WEBBASKET_NO_SEARCH_PATTERN', ln)
# perform_search = 0
if p:
# Let's set some initial values
personal_search_results = None
total_no_personal_search_results = 0
group_search_results = None
total_no_group_search_results = 0
public_search_results = None
total_no_public_search_results = 0
all_public_search_results = None
total_no_all_public_search_results = 0
# Let's precalculate the local search resutls
# and the pattern for the external search results
local_search_results = search_unit(p)
# How strict should the pattern be? Look for the exact word
# (using word boundaries: \b) or is any substring enough?
# not that strict:
# since we remove the html markup before searching for the pattern we
# can use a rather simple pattern here.
# INFO: we choose a not so strict pattern, since there are issues with
# word bounderies and utf-8 strings (ex. with greek that was tested)
pattern = re.compile(r'%s' % (re.escape(p),), re.DOTALL + re.MULTILINE + re.IGNORECASE + re.UNICODE)
#pattern = re.compile(r'%s(?!([^<]+)?>)' % (p,), re.DOTALL + re.MULTILINE + re.IGNORECASE + re.UNICODE)
# strict:
# since we remove the html markup before searching for the pattern we
# can use a rather simple pattern here.
#pattern = re.compile(r'\b%s\b' % (re.escape(p),), re.DOTALL + re.MULTILINE + re.IGNORECASE + re.UNICODE)
#pattern = re.compile(r'%s\b(?!([^<]+)?>)' % (p,), re.DOTALL + re.MULTILINE + re.IGNORECASE + re.UNICODE)
# TODO: All the external records are now saved automatically first in xml.
# So, the search should be done on the "xm" formatted records in the database
# and not the "hb" ones. (That is not the case for their comments though).
# Records in xml in the database are stored escaped. It's then suggested
# that the pattern is also escaped before we performed the search for more
# consistent resutls. We could also use .replace("\n", "") to clean the
# content (after the removal of html markup) from all the newline characters.
# The search format for external records. This means in which format will
# the external records be fetched from the database to be searched then.
format = 'xm'
### Calculate the search results for the user's personal baskets ###
if b.startswith("P") or not b:
personal_search_results = {}
personal_items = db.get_all_items_in_user_personal_baskets(uid, selected_topic, format)
personal_local_items = personal_items[0]
personal_external_items = personal_items[1]
personal_external_items_xml_records = {}
for local_info_per_basket in personal_local_items:
bskid = local_info_per_basket[0]
basket_name = local_info_per_basket[1]
topic = local_info_per_basket[2]
recid_list = local_info_per_basket[3]
local_recids_per_basket = intbitset(map(int, recid_list.strip(',').split(',')))
intsec = local_search_results.intersection(local_recids_per_basket)
if intsec:
personal_search_results[bskid] = [basket_name, topic, len(intsec), list(intsec)]
total_no_personal_search_results += len(intsec)
for external_info_per_basket in personal_external_items:
bskid = external_info_per_basket[0]
basket_name = external_info_per_basket[1]
topic = external_info_per_basket[2]
recid = external_info_per_basket[3]
value = external_info_per_basket[4]
xml_record = decompress(value)
personal_external_items_xml_records[recid] = xml_record
text = remove_html_markup(xml_record, remove_escaped_chars_p=False)
text = unescape(text)
#text = text.replace('\n', '')
result = pattern.search(text)
if result:
if personal_search_results.has_key(bskid):
personal_search_results[bskid][2] += 1
personal_search_results[bskid][3].append(recid)
else:
personal_search_results[bskid] = [basket_name, topic, 1, [recid]]
total_no_personal_search_results += 1
if n:
personal_items_by_matching_notes = db.get_all_items_in_user_personal_baskets_by_matching_notes(uid, selected_topic, p)
for info_per_basket_by_matching_notes in personal_items_by_matching_notes:
bskid = info_per_basket_by_matching_notes[0]
basket_name = info_per_basket_by_matching_notes[1]
topic = info_per_basket_by_matching_notes[2]
recid_list = info_per_basket_by_matching_notes[3]
recids_per_basket_by_matching_notes = set(map(int, recid_list.strip(',').split(',')))
if personal_search_results.has_key(bskid):
no_personal_search_results_per_basket_so_far = personal_search_results[bskid][2]
personal_search_results[bskid][3] = list(set(personal_search_results[bskid][3]).union(recids_per_basket_by_matching_notes))
personal_search_results[bskid][2] = len(personal_search_results[bskid][3])
total_no_personal_search_results += ( personal_search_results[bskid][2] - no_personal_search_results_per_basket_so_far )
else:
personal_search_results[bskid] = [basket_name, topic, len(recids_per_basket_by_matching_notes), list(recids_per_basket_by_matching_notes)]
total_no_personal_search_results += len(recids_per_basket_by_matching_notes)
# For every found record: calculate the number of notes
# and the HTML representation of the record.
for bskid in personal_search_results.keys():
recids = personal_search_results[bskid][3]
number_of_notes_per_record = db.get_number_of_notes_per_record_in_basket(bskid, recids)
records = []
for recid_and_notes in number_of_notes_per_record:
recid = recid_and_notes[0]
number_of_notes = recid_and_notes[1]
if recid < 0:
xml_record = personal_external_items_xml_records[recid]
record_html = format_record(None, of='bsr', xml_record=xml_record)
records.append((recid, number_of_notes, record_html))
else:
record_html = format_record(recid, of='bsr', on_the_fly=True)
records.append((recid, number_of_notes, record_html))
personal_search_results[bskid][3] = records
### Calculate the search results for the user's group baskets ###
if b.startswith("G") or not b:
group_search_results = {}
group_items = db.get_all_items_in_user_group_baskets(uid, selected_group_id, format)
group_local_items = group_items[0]
group_external_items = group_items[1]
group_external_items_xml_records = {}
for local_info_per_basket in group_local_items:
bskid = local_info_per_basket[0]
basket_name = local_info_per_basket[1]
grpid = local_info_per_basket[2]
group_name = local_info_per_basket[3]
share_rights = local_info_per_basket[4]
recid_list = local_info_per_basket[5]
local_recids_per_basket = intbitset(map(int, recid_list.strip(',').split(',')))
intsec = local_search_results.intersection(local_recids_per_basket)
if intsec:
share_rights_view_notes = check_sufficient_rights(share_rights, CFG_WEBBASKET_SHARE_LEVELS['READCMT'])
share_rights_add_notes = check_sufficient_rights(share_rights, CFG_WEBBASKET_SHARE_LEVELS['ADDCMT'])
share_rights_notes = (share_rights_view_notes, share_rights_add_notes)
group_search_results[bskid] = [basket_name, grpid, group_name, share_rights_notes, len(intsec), list(intsec)]
total_no_group_search_results += len(intsec)
for external_info_per_basket in group_external_items:
bskid = external_info_per_basket[0]
basket_name = external_info_per_basket[1]
grpid = external_info_per_basket[2]
group_name = external_info_per_basket[3]
share_rights = external_info_per_basket[4]
recid = external_info_per_basket[5]
value = external_info_per_basket[6]
xml_record = decompress(value)
group_external_items_xml_records[recid] = xml_record
text = remove_html_markup(xml_record, remove_escaped_chars_p=False)
text = unescape(text)
#text = text.replace('\n', '')
result = pattern.search(text)
if result:
if group_search_results.has_key(bskid):
group_search_results[bskid][4] += 1
group_search_results[bskid][5].append(recid)
else:
share_rights_view_notes = check_sufficient_rights(share_rights, CFG_WEBBASKET_SHARE_LEVELS['READCMT'])
share_rights_add_notes = check_sufficient_rights(share_rights, CFG_WEBBASKET_SHARE_LEVELS['ADDCMT'])
share_rights_notes = (share_rights_view_notes, share_rights_add_notes)
group_search_results[bskid] = [basket_name, grpid, group_name, share_rights_notes, 1, [recid]]
total_no_group_search_results += 1
if n:
group_items_by_matching_notes = db.get_all_items_in_user_group_baskets_by_matching_notes(uid, selected_group_id, p)
for info_per_basket_by_matching_notes in group_items_by_matching_notes:
bskid = info_per_basket_by_matching_notes[0]
basket_name = info_per_basket_by_matching_notes[1]
grpid = info_per_basket_by_matching_notes[2]
group_name = info_per_basket_by_matching_notes[3]
share_rights = info_per_basket_by_matching_notes[4]
recid_list = info_per_basket_by_matching_notes[5]
recids_per_basket_by_matching_notes = set(map(int, recid_list.strip(',').split(',')))
if group_search_results.has_key(bskid):
no_group_search_results_per_basket_so_far = group_search_results[bskid][4]
group_search_results[bskid][5] = list(set(group_search_results[bskid][5]).union(recids_per_basket_by_matching_notes))
group_search_results[bskid][4] = len(group_search_results[bskid][5])
total_no_group_search_results += ( group_search_results[bskid][4] - no_group_search_results_per_basket_so_far )
else:
share_rights_view_notes = check_sufficient_rights(share_rights, CFG_WEBBASKET_SHARE_LEVELS['READCMT'])
share_rights_add_notes = check_sufficient_rights(share_rights, CFG_WEBBASKET_SHARE_LEVELS['ADDCMT'])
share_rights_notes = (share_rights_view_notes, share_rights_add_notes)
group_search_results[bskid] = [basket_name, grpid, group_name, share_rights_notes, len(recids_per_basket_by_matching_notes), list(recids_per_basket_by_matching_notes)]
total_no_group_search_results += len(recids_per_basket_by_matching_notes)
# For every found record: calculate the number of notes
# and the HTML representation of the record.
for bskid in group_search_results.keys():
recids = group_search_results[bskid][5]
number_of_notes_per_record = db.get_number_of_notes_per_record_in_basket(bskid, recids)
records = []
for recid_and_notes in number_of_notes_per_record:
recid = recid_and_notes[0]
number_of_notes = recid_and_notes[1]
if recid < 0:
xml_record = group_external_items_xml_records[recid]
record_html = format_record(None, of='bsr', xml_record=xml_record)
records.append((recid, number_of_notes, record_html))
else:
record_html = format_record(recid, of='bsr', on_the_fly=True)
records.append((recid, number_of_notes, record_html))
group_search_results[bskid][5] = records
### Calculate the search results for the user's public baskets ###
if b.startswith("E") or not b:
public_search_results = {}
public_items = db.get_all_items_in_user_public_baskets(uid, format)
public_local_items = public_items[0]
public_external_items = public_items[1]
public_external_items_xml_records = {}
for local_info_per_basket in public_local_items:
bskid = local_info_per_basket[0]
basket_name = local_info_per_basket[1]
share_rights = local_info_per_basket[2]
recid_list = local_info_per_basket[3]
local_recids_per_basket = intbitset(map(int, recid_list.strip(',').split(',')))
intsec = local_search_results.intersection(local_recids_per_basket)
if intsec:
share_rights_view_notes = check_sufficient_rights(share_rights, CFG_WEBBASKET_SHARE_LEVELS['READCMT'])
share_rights_add_notes = check_sufficient_rights(share_rights, CFG_WEBBASKET_SHARE_LEVELS['ADDCMT'])
share_rights_notes = (share_rights_view_notes, share_rights_add_notes)
public_search_results[bskid] = [basket_name, share_rights_notes, len(intsec), list(intsec)]
total_no_public_search_results += len(intsec)
for external_info_per_basket in public_external_items:
bskid = external_info_per_basket[0]
basket_name = external_info_per_basket[1]
share_rights = external_info_per_basket[2]
recid = external_info_per_basket[3]
value = external_info_per_basket[4]
xml_record = decompress(value)
public_external_items_xml_records[recid] = xml_record
text = remove_html_markup(xml_record, remove_escaped_chars_p=False)
text = unescape(text)
#text = text.replace('\n', '')
result = pattern.search(text)
if result:
if public_search_results.has_key(bskid):
public_search_results[bskid][2] += 1
public_search_results[bskid][3].append(recid)
else:
share_rights_view_notes = check_sufficient_rights(share_rights, CFG_WEBBASKET_SHARE_LEVELS['READCMT'])
share_rights_add_notes = check_sufficient_rights(share_rights, CFG_WEBBASKET_SHARE_LEVELS['ADDCMT'])
share_rights_notes = (share_rights_view_notes, share_rights_add_notes)
public_search_results[bskid] = [basket_name, share_rights_notes, 1, [recid]]
total_no_public_search_results += 1
if n:
public_items_by_matching_notes = db.get_all_items_in_user_public_baskets_by_matching_notes(uid, p)
for info_per_basket_by_matching_notes in public_items_by_matching_notes:
bskid = info_per_basket_by_matching_notes[0]
basket_name = info_per_basket_by_matching_notes[1]
share_rights = info_per_basket_by_matching_notes[2]
recid_list = info_per_basket_by_matching_notes[3]
recids_per_basket_by_matching_notes = set(map(int, recid_list.strip(',').split(',')))
if public_search_results.has_key(bskid):
no_public_search_results_per_basket_so_far = public_search_results[bskid][2]
public_search_results[bskid][3] = list(set(public_search_results[bskid][3]).union(recids_per_basket_by_matching_notes))
public_search_results[bskid][2] = len(public_search_results[bskid][3])
total_no_public_search_results += ( public_search_results[bskid][2] - no_public_search_results_per_basket_so_far )
else:
share_rights_view_notes = check_sufficient_rights(share_rights, CFG_WEBBASKET_SHARE_LEVELS['READCMT'])
share_rights_add_notes = check_sufficient_rights(share_rights, CFG_WEBBASKET_SHARE_LEVELS['ADDCMT'])
share_rights_notes = (share_rights_view_notes, share_rights_add_notes)
public_search_results[bskid] = [basket_name, share_rights_notes, len(recids_per_basket_by_matching_notes), list(recids_per_basket_by_matching_notes)]
total_no_public_search_results += len(recids_per_basket_by_matching_notes)
# For every found record: calculate the number of notes
# and the HTML representation of the record.
for bskid in public_search_results.keys():
recids = public_search_results[bskid][3]
number_of_notes_per_record = db.get_number_of_notes_per_record_in_basket(bskid, recids)
records = []
for recid_and_notes in number_of_notes_per_record:
recid = recid_and_notes[0]
number_of_notes = recid_and_notes[1]
if recid < 0:
xml_record = public_external_items_xml_records[recid]
record_html = format_record(None, of='bsr', xml_record=xml_record)
records.append((recid, number_of_notes, record_html))
else:
record_html = format_record(recid, of='bsr', on_the_fly=True)
records.append((recid, number_of_notes, record_html))
public_search_results[bskid][3] = records
### Calculate the search results for all the public baskets ###
if b.startswith("A"):
all_public_search_results = {}
all_public_items = db.get_all_items_in_all_public_baskets(format)
all_public_local_items = all_public_items[0]
all_public_external_items = all_public_items[1]
all_public_external_items_xml_records = {}
for local_info_per_basket in all_public_local_items:
bskid = local_info_per_basket[0]
basket_name = local_info_per_basket[1]
share_rights = local_info_per_basket[2]
recid_list = local_info_per_basket[3]
local_recids_per_basket = intbitset(map(int, recid_list.strip(',').split(',')))
intsec = local_search_results.intersection(local_recids_per_basket)
if intsec:
share_rights_view_notes = check_sufficient_rights(share_rights, CFG_WEBBASKET_SHARE_LEVELS['READCMT'])
share_rights_add_notes = check_sufficient_rights(share_rights, CFG_WEBBASKET_SHARE_LEVELS['ADDCMT'])
share_rights_notes = (share_rights_view_notes, share_rights_add_notes)
all_public_search_results[bskid] = [basket_name, share_rights_notes, len(intsec), list(intsec)]
total_no_all_public_search_results += len(intsec)
for external_info_per_basket in all_public_external_items:
bskid = external_info_per_basket[0]
basket_name = external_info_per_basket[1]
share_rights = external_info_per_basket[2]
recid = external_info_per_basket[3]
value = external_info_per_basket[4]
xml_record = decompress(value)
all_public_external_items_xml_records[recid] = xml_record
text = remove_html_markup(xml_record, remove_escaped_chars_p=False)
text = unescape(text)
#text = text.replace('\n', '')
result = pattern.search(text)
if result:
if all_public_search_results.has_key(bskid):
all_public_search_results[bskid][2] += 1
all_public_search_results[bskid][3].append(recid)
else:
share_rights_view_notes = check_sufficient_rights(share_rights, CFG_WEBBASKET_SHARE_LEVELS['READCMT'])
share_rights_add_notes = check_sufficient_rights(share_rights, CFG_WEBBASKET_SHARE_LEVELS['ADDCMT'])
share_rights_notes = (share_rights_view_notes, share_rights_add_notes)
all_public_search_results[bskid] = [basket_name, share_rights_notes, 1, [recid]]
total_no_all_public_search_results += 1
if n:
all_public_items_by_matching_notes = db.get_all_items_in_all_public_baskets_by_matching_notes(p)
for info_per_basket_by_matching_notes in all_public_items_by_matching_notes:
bskid = info_per_basket_by_matching_notes[0]
basket_name = info_per_basket_by_matching_notes[1]
share_rights = info_per_basket_by_matching_notes[2]
recid_list = info_per_basket_by_matching_notes[3]
recids_per_basket_by_matching_notes = set(map(int, recid_list.strip(',').split(',')))
if all_public_search_results.has_key(bskid):
no_all_public_search_results_per_basket_so_far = all_public_search_results[bskid][2]
all_public_search_results[bskid][3] = list(set(all_public_search_results[bskid][3]).union(recids_per_basket_by_matching_notes))
all_public_search_results[bskid][2] = len(all_public_search_results[bskid][3])
total_no_all_public_search_results += ( all_public_search_results[bskid][2] - no_all_public_search_results_per_basket_so_far )
else:
share_rights_view_notes = check_sufficient_rights(share_rights, CFG_WEBBASKET_SHARE_LEVELS['READCMT'])
share_rights_add_notes = check_sufficient_rights(share_rights, CFG_WEBBASKET_SHARE_LEVELS['ADDCMT'])
share_rights_notes = (share_rights_view_notes, share_rights_add_notes)
all_public_search_results[bskid] = [basket_name, share_rights_notes, len(recids_per_basket_by_matching_notes), list(recids_per_basket_by_matching_notes)]
total_no_all_public_search_results += len(recids_per_basket_by_matching_notes)
# For every found record: calculate the number of notes
# and the HTML representation of the record.
for bskid in all_public_search_results.keys():
recids = all_public_search_results[bskid][3]
number_of_notes_per_record = db.get_number_of_notes_per_record_in_basket(bskid, recids)
records = []
for recid_and_notes in number_of_notes_per_record:
recid = recid_and_notes[0]
number_of_notes = recid_and_notes[1]
if recid < 0:
xml_record = all_public_external_items_xml_records[recid]
record_html = format_record(None, of='bsr', xml_record=xml_record)
records.append((recid, number_of_notes, record_html))
else:
record_html = format_record(recid, of='bsr', on_the_fly=True)
records.append((recid, number_of_notes, record_html))
all_public_search_results[bskid][3] = records
search_results_html = webbasket_templates.tmpl_search_results(personal_search_results,
total_no_personal_search_results,
group_search_results,
total_no_group_search_results,
public_search_results,
total_no_public_search_results,
all_public_search_results,
total_no_all_public_search_results,
ln)
else:
search_results_html = None
search_box = __create_search_box(uid=uid,
category=selected_category,
topic=selected_topic,
grpid=selected_group_id,
p=p,
n=n,
ln=ln)
body = webbasket_templates.tmpl_display(search_box=search_box,
search_results=search_results_html)
body = warnings_html + body
navtrail = create_webbasket_navtrail(uid,
search_baskets=True,
ln=ln)
return (body, navtrail)
def perform_request_write_note(uid,
category=CFG_WEBBASKET_CATEGORIES['PRIVATE'],
topic="",
group_id=0,
bskid=0,
recid=0,
cmtid=0,
ln=CFG_SITE_LANG):
"""Display a note writing form
@param uid: user id
@param bskid: basket id
@param recid: record id (comments are on a specific record in a specific basket)
@param cmtid: if provided this comment is a reply to comment cmtid.
@param category: selected category
@param topic: selected topic
@param group_id: selected group id
@param ln: language
"""
_ = gettext_set_language(ln)
optional_params = {}
#warnings_rights = []
warnings_html = ""
if not check_user_can_comment(uid, bskid):
try:
raise InvenioWebBasketWarning(_('You do not have permission to write notes to this item.'))
except InvenioWebBasketWarning, exc:
register_exception(stream='warning')
#warnings_rights = ['WRN_WEBBASKET_RESTRICTED_WRITE_NOTES']
warnings_html += webbasket_templates.tmpl_warnings(exc.message, ln)
else:
if cmtid and db.note_belongs_to_item_in_basket_p(cmtid, recid, bskid):
optional_params["Add note"] = db.get_note(cmtid)
optional_params["Reply to"] = cmtid
elif cmtid:
optional_params["Add note"] = ()
try:
raise InvenioWebBasketWarning(_('The note you are quoting does not exist or you do not have access to it.'))
except InvenioWebBasketWarning, exc:
register_exception(stream='warning')
optional_params["Warnings"] = exc.message
else:
optional_params["Add note"] = ()
(body, warnings, navtrail) = perform_request_display(uid=uid,
selected_category=category,
selected_topic=topic,
selected_group_id=group_id,
selected_bskid=bskid,
selected_recid=recid,
optional_params=optional_params,
of='hb',
ln=CFG_SITE_LANG)
if not warnings:
body = warnings_html + body
#warnings = warnings_rights
return (body, navtrail)
def perform_request_save_note(uid,
category=CFG_WEBBASKET_CATEGORIES['PRIVATE'],
topic="",
group_id=0,
bskid=0,
recid=0,
note_title="",
note_body="",
editor_type='textarea',
date_creation="",
ln=CFG_SITE_LANG,
reply_to=None):
""" Save a given comment if able to.
@param uid: user id (int)
@param bskid: basket id (int)
@param recid: record id (int)
@param title: title of comment (string)
@param text: comment's body (string)
@param ln: language (string)
@param editor_type: the kind of editor/input used for the comment: 'textarea', 'ckeditor'
@param reply_to: the id of the comment we are replying to
"""
_ = gettext_set_language(ln)
optional_params = {}
#warnings_rights = []
warnings_html = ""
if not check_user_can_comment(uid, bskid):
try:
raise InvenioWebBasketWarning(_('You do not have permission to write notes to this item.'))
except InvenioWebBasketWarning, exc:
register_exception(stream='warning')
#warnings_rights = exc.message
#warnings_rights = ['WRN_WEBBASKET_RESTRICTED_WRITE_NOTES']
warnings_html += webbasket_templates.tmpl_warnings(exc.message, ln)
else:
if not note_title or \
((not note_body and editor_type != 'ckeditor') or \
(not remove_html_markup(note_body, '').replace('\n', '').replace('\r', '').strip() and editor_type == 'ckeditor')):
optional_params["Incomplete note"] = (note_title, note_body)
try:
raise InvenioWebBasketWarning(_('You must fill in both the subject and the body of the note.'))
except InvenioWebBasketWarning, exc:
register_exception(stream='warning')
optional_params["Warnings"] = exc.message
else:
if editor_type == 'ckeditor':
# Here we remove the line feeds introduced by CKEditor (they
# have no meaning for the user) and replace the HTML line
# breaks by linefeeds, so that we are close to an input that
# would be done without the CKEditor. That's much better if a
# reply to a comment is made with a browser that does not
# support CKEditor.
note_body = note_body.replace('\n', '').replace('\r', '').replace('<br />', '\n')
if not(db.save_note(uid, bskid, recid, note_title, note_body, date_creation, reply_to)):
# TODO: The note could not be saved. DB problem?
pass
else:
# TODO: inform about successful annotating.
pass
(body, warnings, navtrail) = perform_request_display(uid=uid,
selected_category=category,
selected_topic=topic,
selected_group_id=group_id,
selected_bskid=bskid,
selected_recid=recid,
optional_params=optional_params,
of='hb',
ln=CFG_SITE_LANG)
if not warnings:
body = warnings_html + body
#warnings = warnings_rights
return (body, navtrail)
def perform_request_delete_note(uid,
category=CFG_WEBBASKET_CATEGORIES['PRIVATE'],
topic="",
group_id=0,
bskid=0,
recid=0,
cmtid=0,
ln=CFG_SITE_LANG):
"""Delete comment cmtid on record recid for basket bskid."""
_ = gettext_set_language(ln)
#warnings_notes = []
warnings_html = ""
if not __check_user_can_perform_action(uid, bskid, CFG_WEBBASKET_SHARE_LEVELS['DELCMT']):
try:
raise InvenioWebBasketWarning(_('You do not have permission to delete this note.'))
except InvenioWebBasketWarning, exc:
register_exception(stream='warning')
#warnings_notes.append(exc.message)
#warnings_notes.append('WRN_WEBBASKET_RESTRICTED_DELETE_NOTES')
warnings_html += webbasket_templates.tmpl_warnings(exc.message, ln)
else:
if cmtid and db.note_belongs_to_item_in_basket_p(cmtid, recid, bskid):
db.delete_note(bskid, recid, cmtid)
else:
try:
raise InvenioWebBasketWarning(_('The note you are deleting does not exist or you do not have access to it.'))
except InvenioWebBasketWarning, exc:
register_exception(stream='warning')
#warnings_notes.append(exc.message)
#warnings_notes.append('WRN_WEBBASKET_DELETE_INVALID_NOTE')
warnings_html += webbasket_templates.tmpl_warnings(exc.message, ln)
(body, warnings, navtrail) = perform_request_display(uid=uid,
selected_category=category,
selected_topic=topic,
selected_group_id=group_id,
selected_bskid=bskid,
selected_recid=recid,
of='hb',
ln=CFG_SITE_LANG)
body = warnings_html + body
#warnings.extend(warnings_notes)
return (body, navtrail)
def perform_request_add(uid,
recids=[],
category='',
bskid=0,
colid=0,
es_title='',
es_desc='',
es_url='',
note_body='',
date_creation='',
editor_type='',
b='',
successful_add=False,
copy=False,
wait=False,
move_from_basket=0,
referer='',
ln=CFG_SITE_LANG):
"""Add records to baskets
@param uid: user id
@param recids: list of records to add
@param colid: in case of external collections, the id of the collection the records belong to
@param bskids: list of baskets to add records to. if not provided, will return a
page where user can select baskets
@param es_title: the title of the external source
@param es_desc: the description of the external source
@param es_url: the url of the external source
@param move_from_basket: instead of creating a new item, move the item with
the specified recid from another basket
@param referer: URL of the referring page
@param ln: language"""
_ = gettext_set_language(ln)
if successful_add:
body = webbasket_templates.tmpl_add(recids=recids,
category=category,
bskid=bskid,
colid=colid,
successful_add=True,
copy=copy,
referer=referer,
ln=ln)
#warnings = []
navtrail = create_webbasket_navtrail(uid,
add_to_basket=True,
ln=ln)
return (body, navtrail)
#warnings = []
warnings_html = ""
if type(recids) is not list:
recids = [recids]
validated_recids = []
if colid == 0:
# Local records
for recid in recids:
recid = int(recid)
if recid > 0 and record_exists(recid) == 1:
validated_recids.append(recid)
elif recid < 0 and copy:
# if we are copying a record, colid will always be 0 but we may
# still get negative recids when it comes to external items.
# In that case, we just skip the checking and add them directly
# to the validated_recids.
validated_recids.append(recid)
user_info = collect_user_info(uid)
recids_to_remove = []
for recid in validated_recids:
(auth_code, dummy) = check_user_can_view_record(user_info, recid)
if auth_code:
# User is not authorized to view record.
# We should not remove items from the list while we parse it.
# Better store them in another list and in the end remove them.
#validated_recids.remove(recid)
recids_to_remove.append(recid)
try:
raise InvenioWebBasketWarning(_('Sorry, you do not have sufficient rights to add record #%i.') % recid)
except InvenioWebBasketWarning, exc:
register_exception(stream='warning')
#warnings.append(exc.message)
#warnings.append(('WRN_WEBBASKET_NO_RIGHTS_TO_ADD_THIS_RECORD', recid))
try:
raise InvenioWebBasketWarning(_('Some of the items were not added due to lack of sufficient rights.'))
except InvenioWebBasketWarning, exc:
register_exception(stream='warning')
warnings_html = webbasket_templates.tmpl_warnings(exc.message, ln)
#warnings_html = webbasket_templates.tmpl_warnings('WRN_WEBBASKET_NO_RIGHTS_TO_ADD_RECORDS', ln)
for recid in recids_to_remove:
validated_recids.remove(recid)
elif colid > 0:
# External records, no need to validate.
validated_recids.extend(recids)
elif colid == -1:
# External source.
es_warnings = []
if not es_title:
try:
raise InvenioWebBasketWarning(_('Please provide a title for the external source.'))
except InvenioWebBasketWarning, exc:
register_exception(stream='warning')
es_warnings.append(exc.message)
if not es_desc:
try:
raise InvenioWebBasketWarning(_('Please provide a description for the external source.'))
except InvenioWebBasketWarning, exc:
register_exception(stream='warning')
es_warnings.append(exc.message)
if not es_url:
try:
raise InvenioWebBasketWarning(_('Please provide a url for the external source.'))
except InvenioWebBasketWarning, exc:
register_exception(stream='warning')
es_warnings.append(exc.message)
else:
(is_valid, status, dummy) = url_is_valid(es_url)
if not is_valid:
if str(status).startswith('0'):
try:
raise InvenioWebBasketWarning(_('The url you have provided is not valid.'))
except InvenioWebBasketWarning, exc:
register_exception(stream='warning')
es_warnings.append(exc.message)
#es_warnings.append('WRN_WEBBASKET_NO_VALID_URL_0')
elif str(status).startswith('4'):
try:
raise InvenioWebBasketWarning(_('The url you have provided is not valid: The request contains bad syntax or cannot be fulfilled.'))
except InvenioWebBasketWarning, exc:
register_exception(stream='warning')
es_warnings.append(exc.message)
#es_warnings.append('WRN_WEBBASKET_NO_VALID_URL_4')
elif str(status).startswith('5'):
try:
raise InvenioWebBasketWarning(_('The url you have provided is not valid: The server failed to fulfil an apparently valid request.'))
except InvenioWebBasketWarning, exc:
register_exception(stream='warning')
es_warnings.append(exc.message)
#es_warnings.append('WRN_WEBBASKET_NO_VALID_URL_5')
elif not (es_url.startswith("http://") or es_url.startswith("https://")):
es_url = "http://" + es_url
if es_warnings:
#warnings.extend(es_warnings)
warnings_html += webbasket_templates.tmpl_warnings(es_warnings, ln)
if not validated_recids:
# in case there are no record ids select assume we want to add an
# external source.
colid = -1
# This part of code is under the current circumstances never ran,
# since if there no validated_recids, colid is set to -1.
# IDEA: colid should by default (i.e. when not set) be -2 and when local
# recids are added we should use the 0 value.
#if not validated_recids and colid >= 0:
# warnings.append('WRN_WEBBASKET_NO_RECORD')
# body += webbasket_templates.tmpl_warnings(warnings, ln)
# if referer and not(referer.find(CFG_SITE_URL) == -1):
# body += webbasket_templates.tmpl_back_link(referer, ln)
# return (body, warnings)
if b or (category and bskid):
# if b was not defined we use category and bskid to construct it.
if not b:
b = category + "_" + str(bskid)
# we extract the category and the bskid from the washed b POST variable
# or the constracted b variable from category and bskid.
(category, b_bskid, b_warnings) = wash_b_add(b)
# if there were warnings it means there was a bad input.
# Send the warning to the user and return the page.
if b_warnings:
#warnings.extend(b_warnings)
warnings_html += webbasket_templates.tmpl_warnings(b_warnings, ln)
if not b_warnings:
(bskid, b_warnings) = wash_bskid(uid, category, b_bskid)
if b_warnings:
#warnings.extend(b_warnings)
warnings_html += webbasket_templates.tmpl_warnings(b_warnings, ln)
if not b_warnings:
if not(__check_user_can_perform_action(uid,
bskid,
CFG_WEBBASKET_SHARE_LEVELS['ADDITM'])):
try:
raise InvenioWebBasketWarning(_('Sorry, you do not have sufficient rights on this basket.'))
except InvenioWebBasketWarning, exc:
register_exception(stream='warning')
#warnings.append(exc.message)
#warnings.append('WRN_WEBBASKET_NO_RIGHTS')
warnings_html += webbasket_templates.tmpl_warnings(exc.message, ln)
# To move an item, user needs add and delete permissions
if move_from_basket > 0 and not(__check_user_can_perform_action(uid,
move_from_basket,
CFG_WEBBASKET_SHARE_LEVELS['DELITM'])):
try:
raise InvenioWebBasketWarning(_('Sorry, you do not have sufficient rights on this basket.'))
except InvenioWebBasketWarning, exc:
register_exception(stream='warning')
#warnings.append(exc.message)
#warnings.append('WRN_WEBBASKET_NO_RIGHTS')
warnings_html += webbasket_templates.tmpl_warnings(exc.message, ln)
if not warnings_html:
if ( colid >= 0 and not validated_recids ) or ( colid == -1 and ( not es_title or not es_desc or not es_url ) ):
try:
raise InvenioWebBasketWarning(_('No records to add.'))
except InvenioWebBasketWarning, exc:
register_exception(stream='warning')
#warnings.append(exc.message)
if not warnings_html and not wait:
if colid == -1:
es_title = es_title
es_desc = nl2br(es_desc)
if move_from_basket > 0:
added_items = db.move_to_basket(uid, validated_recids, move_from_basket, bskid)
else:
added_items = db.add_to_basket(uid, validated_recids, colid, bskid, es_title, es_desc, es_url)
if added_items:
if (note_body and editor_type != 'ckeditor') or \
(editor_type == 'ckeditor' and \
remove_html_markup(note_body, '').replace('\n', '').replace('\r', '').strip()):
if editor_type == 'ckeditor':
# Here we remove the line feeds introduced by CKEditor (they
# have no meaning for the user) and replace the HTML line
# breaks by linefeeds, so that we are close to an input that
# would be done without the CKEditor. That's much better if a
# reply to a comment is made with a browser that does not
# support CKEditor.
note_title = ''
note_body = note_body.replace('\n', '').replace('\r', '').replace('<br />', '\n')
else:
note_title = ''
for recid in added_items:
if not(db.save_note(uid, bskid, recid, note_title, note_body, date_creation, reply_to=None)):
# TODO: The note could not be saved. DB problem?
pass
if colid > 0:
format_external_records(added_items, of="xm")
return perform_request_add(uid=uid,
recids=recids,
category=category,
bskid=bskid,
colid=colid,
successful_add=True,
copy=copy,
referer=referer)
else:
try:
if move_from_basket > 0 and len(validated_recids) > len(added_items):
raise InvenioWebBasketWarning(_('Some items could not be moved. The destination basket already contains those items.'))
else:
raise InvenioWebBasketWarning(_('Cannot add items to the selected basket. Invalid parameters.'))
except InvenioWebBasketWarning, exc:
register_exception(stream='warning')
#warnings.append(exc.message)
#warnings.append('WRN_WEBBASKET_INVALID_ADD_TO_PARAMETERS')
warnings_html += webbasket_templates.tmpl_warnings(exc.message, ln)
personal_basket_list = db.get_all_user_personal_basket_ids_by_topic(uid)
group_basket_list = db.get_all_user_group_basket_ids_by_group_with_add_rights(uid)
if not personal_basket_list and not group_basket_list:
bskid = db.create_basket(uid=uid, basket_name=_('Untitled basket'), topic=_('Untitled topic'))
try:
raise InvenioWebBasketWarning(_('A default topic and basket have been automatically created. Edit them to rename them as you see fit.'))
except InvenioWebBasketWarning, exc:
register_exception(stream='warning')
#warnings.append(exc.message)
#warnings.append('WRN_WEBBASKET_DEFAULT_TOPIC_AND_BASKET')
warnings_html += webbasket_templates.tmpl_warnings(exc.message, ln)
if colid >= 0 and validated_recids:
(body, navtrail) = perform_request_add(uid=uid,
recids=validated_recids,
category=CFG_WEBBASKET_CATEGORIES['PRIVATE'],
bskid=bskid,
colid=colid,
referer=referer,
ln=ln)
body = warnings_html + body
return (body, navtrail)
else:
personal_basket_list = db.get_all_user_personal_basket_ids_by_topic(uid)
body = webbasket_templates.tmpl_add(recids=recids,
category=category,
bskid=bskid,
colid=colid,
es_title=es_title,
es_desc=es_desc,
es_url=es_url,
note_body=note_body,
personal_basket_list=personal_basket_list,
group_basket_list=group_basket_list,
copy=copy,
move_from_basket=move_from_basket,
referer=referer,
ln=ln)
body = warnings_html + body
navtrail = create_webbasket_navtrail(uid,
add_to_basket=True,
ln=ln)
return (body, navtrail)
def perform_request_delete(uid, bskid, confirmed=0,
category=CFG_WEBBASKET_CATEGORIES['PRIVATE'],
selected_topic="", selected_group_id=0,
ln=CFG_SITE_LANG):
"""Delete a given basket.
@param uid: user id (user has to be owner of this basket)
@param bskid: basket id
@param confirmed: if 0 will return a confirmation page; if 1 will delete basket.
@param category: category currently displayed
@param selected_topic: topic currently displayed
@param selected_group_id: if category is group, id of the group currently displayed
@param ln: language"""
_ = gettext_set_language(ln)
body = ''
#warnings = []
if not(db.check_user_owns_baskets(uid, [bskid])):
try:
raise InvenioWebBasketWarning(_('Sorry, you do not have sufficient rights on this basket.'))
except InvenioWebBasketWarning, exc:
register_exception(stream='warning')
#warnings.append(exc.message)
#warnings.append(('WRN_WEBBASKET_NO_RIGHTS',))
return body
if confirmed:
if not db.delete_basket(bskid):
# TODO: The item was not deleted. DB problem?
pass
else:
body = webbasket_templates.tmpl_confirm_delete(bskid,
db.count_subscribers(uid, bskid),
category,
selected_topic, selected_group_id,
ln)
return body
def delete_record(uid, bskid, recid):
"""Delete a given record in a given basket.
@param uid: user id (user has to have sufficient rights on basket
@param bskid: basket id
@param recid: record id
"""
if __check_user_can_perform_action(uid,
bskid,
CFG_WEBBASKET_SHARE_LEVELS['DELITM']):
db.delete_item(bskid, recid)
def move_record(uid, bskid, recid, direction):
"""Move a record up or down in a basket (change score).
@param uid: user id (user has to have manage rights over this basket)
@param bskid: basket id
@param recid: record we want to move
@param direction: CFG_WEBBASKET_ACTIONS['UP'] or CFG_WEBBASKET_ACTIONS['DOWN']
"""
if __check_user_can_perform_action(uid,
bskid,
CFG_WEBBASKET_SHARE_LEVELS['MANAGE']):
db.move_item(bskid, recid, direction)
def perform_request_edit(uid, bskid, topic="", new_name='',
new_topic = '', new_topic_name='',
groups=[], external='',
ln=CFG_SITE_LANG):
"""Interface for management of basket. If names, groups or external is
provided, will save new rights into database, else will provide interface.
@param uid: user id (user has to have sufficient rights on this basket
@param bskid: basket id to change rights on
@param topic: topic currently used (int)
@param new_name: new name of basket
@param new_topic: topic in which to move basket (int),
new_topic_name must be left blank
@param new_topic_name: new topic in which to move basket
(will overwrite param new_topic)
@param groups: list of strings formed in this way: group_id + '_' + rights
@param external: rights for everybody (can be 'NO')
@param ln: language
"""
body = ''
#warnings = []
# TODO: external rights must be washed, it can only be one of the following:
# NO, READITM, READCMT, ADDCMT
_ = gettext_set_language(ln)
rights = db.get_max_user_rights_on_basket(uid, bskid)
if rights != CFG_WEBBASKET_SHARE_LEVELS['MANAGE']:
try:
raise InvenioWebBasketWarning(_('Sorry, you do not have sufficient rights on this basket.'))
except InvenioWebBasketWarning, exc:
register_exception(stream='warning')
#warnings.append(exc.message)
#warnings.append(('WRN_WEBBASKET_NO_RIGHTS',))
return body
bsk_name = db.get_basket_name(bskid)
if not(groups) and not(external) and not(new_name) and not(new_topic) and not(new_topic_name):
# display interface
topics = map(lambda x: x[0], db.get_personal_topics_infos(uid))
groups_rights = db.get_groups_subscribing_to_basket(bskid)
external_rights = ''
if groups_rights and groups_rights[0][0] == 0:
external_rights = groups_rights[0][2]
groups_rights = groups_rights[1:]
display_delete = db.check_user_owns_baskets(uid, bskid)
display_general = display_delete
if isGuestUser(uid):
display_sharing = 0
else:
display_sharing = 1
body = webbasket_templates.tmpl_edit(bskid=bskid, bsk_name=bsk_name,
display_general=display_general,
topics=topics, topic=topic,
display_delete=display_delete,
display_sharing=display_sharing,
groups_rights=groups_rights,
external_rights=external_rights,
ln=ln)
else:
out_groups = {}
if len(groups):
for group in groups:
(group_id, group_rights) = group.split('_')
out_groups[group_id] = group_rights
out_groups['0'] = external
if not(isGuestUser(uid)):
db.update_rights(bskid, out_groups)
if new_name != bsk_name:
db.rename_basket(bskid, new_name)
if new_topic_name:
db.move_baskets_to_topic(uid, bskid, new_topic_name)
elif not (new_topic == "-1" or new_topic == topic):
if db.check_user_owns_baskets(uid, bskid):
topics = map(lambda x: x[0], db.get_personal_topics_infos(uid))
if new_topic in topics:
new_topic_name = new_topic
db.move_baskets_to_topic(uid, bskid, new_topic_name)
else:
# TODO: inform the admin
#errors.append(('ERR_WEBBASKET_DB_ERROR'))
pass
else:
topic = ""
#warnings.append(('ERR_WEBBASKET_NOT_OWNER'))
return body
def perform_request_edit_topic(uid, topic='', new_name='', ln=CFG_SITE_LANG):
"""Interface for editing of topic.
@param uid: user id (user has to have sufficient rights on this basket
@param topic: topic to be edited
@param new_name: new name of topic
@param ln: language
"""
body = ''
#warnings = []
#rights = db.get_max_user_rights_on_basket(uid, bskid)
#if rights != CFG_WEBBASKET_SHARE_LEVELS['MANAGE']:
# errors.append(('ERR_WEBBASKET_NO_RIGHTS',))
# return (body, errors, warnings)
if not(new_name):
# display interface
#display_delete = db.check_user_owns_baskets(uid, bskid)
#display_general = display_delete
#if isGuestUser(uid):
#display_sharing = 0
#else:
#display_sharing = 1
display_general = True
display_delete = True
body = webbasket_templates.tmpl_edit_topic(display_general=display_general, topic=topic,
display_delete=display_delete, ln=ln)
else:
if cgi.escape(new_name, True) != cgi.escape(topic, True):
db.rename_topic(uid, topic, new_name)
return body
def perform_request_add_group(uid, bskid, topic="", group_id=0, ln=CFG_SITE_LANG):
"""If group id is specified, share basket bskid to this group with
READITM rights;
else return a page for selection of a group.
@param uid: user id (selection only of groups user is member of)
@param bskid: basket id
@param topic: topic currently displayed
@param group_id: id of group to share basket to
@param ln: language
"""
if group_id:
db.share_basket_with_group(bskid,
group_id,
CFG_WEBBASKET_SHARE_LEVELS['READITM'])
else:
groups = db.get_groups_user_member_of(uid)
body = webbasket_templates.tmpl_add_group(bskid, topic, groups, ln)
return body
def perform_request_create_basket(req, uid,
new_basket_name='',
new_topic_name='', create_in_topic="-1",
topic="-1",
recids=[],
colid=-1,
es_title='',
es_desc='',
es_url='',
copy = False,
move_from_basket=0,
referer = '',
ln=CFG_SITE_LANG):
"""if new_basket_name and topic infos are given create a basket and return topic number,
else return body with warnings of basket creation form.
@param req: request object for obtaining URL of the referring page
@param uid: user id (int)
@param new_basket_name: name of the basket to create (str)
@param new_topic_name: name of new topic to create new basket in (str)
@param create_in_topic: identification number of topic to create new basket in (int)
@param topic: topic to preselect on the creation form.
@pram ln: language
"""
warnings = []
warnings_html = ""
_ = gettext_set_language(ln)
if new_basket_name and (new_topic_name or create_in_topic != "-1"):
#topics_infos = map(lambda x: x[0], db.get_personal_topics_infos(uid))
new_topic_name = new_topic_name.strip()
if new_topic_name:
topic = new_topic_name
else:
topic = create_in_topic
bskid = db.create_basket(uid, new_basket_name, topic)
#topics = map(lambda x: x[0], topics_infos)
return (bskid, topic)
else:
local_referer = get_referer(req) # URL of the referring page
url = CFG_SITE_URL + '/yourbaskets/create_basket'
import string
if string.find(local_referer, url) == 0:
if not new_basket_name:
try:
raise InvenioWebBasketWarning(_('Please provide a name for the new basket.'))
except InvenioWebBasketWarning, exc:
register_exception(stream='warning', req=req)
warnings.append(exc.message)
if (not new_topic_name and create_in_topic == "-1"):
try:
raise InvenioWebBasketWarning(_('Please select an existing topic or create a new one.'))
except InvenioWebBasketWarning, exc:
register_exception(stream='warning', req=req)
warnings.append(exc.message)
topics = map(lambda x: x[0], db.get_personal_topics_infos(uid))
if topic in topics:
create_in_topic = topic
body = webbasket_templates.tmpl_create_basket(new_basket_name,
new_topic_name,
create_in_topic,
topics,
recids,
colid,
es_title,
es_desc,
es_url,
copy,
move_from_basket,
referer,
ln)
if warnings:
warnings_html += webbasket_templates.tmpl_warnings(warnings, ln)
body = warnings_html + body
return body
def perform_request_subscribe(uid,
bskid,
ln=CFG_SITE_LANG):
"""Subscribes user to the given public basket.
Returns warnings if there were any."""
_ = gettext_set_language(ln)
#warnings = []
warnings_html = ""
if db.is_basket_public(bskid):
if not db.subscribe(uid, bskid):
try:
raise InvenioWebBasketWarning(_('You cannot subscribe to this basket, you are the either owner or you have already subscribed.'))
except InvenioWebBasketWarning, exc:
register_exception(stream='warning')
#warnings.append(exc.message)
#warnings.append('WRN_WEBBASKET_CAN_NOT_SUBSCRIBE')
warnings_html += webbasket_templates.tmpl_warnings(exc.message, ln)
else:
try:
raise InvenioWebBasketWarning(_('The selected public basket does not exist or you do not have access to it.'))
except InvenioWebBasketWarning, exc:
register_exception(stream='warning')
#warnings.append(exc.message)
#warnings.append('WRN_WEBBASKET_INVALID_OR_RESTRICTED_PUBLIC_BASKET')
warnings_html += webbasket_templates.tmpl_warnings(exc.message, ln)
return warnings_html
def perform_request_unsubscribe(uid,
bskid,
ln=CFG_SITE_LANG):
"""Unsubscribes user from the given public basket.
Returns warnings if there were any."""
_ = gettext_set_language(ln)
#warnings = []
warnings_html = ""
if db.is_basket_public(bskid):
if not db.unsubscribe(uid, bskid):
try:
raise InvenioWebBasketWarning(_('You cannot unsubscribe from this basket, you are the either owner or you have already unsubscribed.'))
except InvenioWebBasketWarning, exc:
register_exception(stream='warning')
#warnings.append(exc.message)
#warnings.append('WRN_WEBBASKET_CAN_NOT_UNSUBSCRIBE')
warnings_html += webbasket_templates.tmpl_warnings(exc.message, ln)
else:
try:
raise InvenioWebBasketWarning(_('The selected public basket does not exist or you do not have access to it.'))
except InvenioWebBasketWarning, exc:
register_exception(stream='warning')
#warnings.append(exc.message)
#warnings.append('WRN_WEBBASKET_INVALID_OR_RESTRICTED_PUBLIC_BASKET')
warnings_html += webbasket_templates.tmpl_warnings(exc.message, ln)
return warnings_html
def check_user_can_comment(uid, bskid):
""" Private function. check if a user can comment """
min_right = CFG_WEBBASKET_SHARE_LEVELS['ADDCMT']
rights = db.get_max_user_rights_on_basket(uid, bskid)
if rights:
if CFG_WEBBASKET_SHARE_LEVELS_ORDERED.index(rights) >= CFG_WEBBASKET_SHARE_LEVELS_ORDERED.index(min_right):
return 1
return 0
def __check_user_can_perform_action(uid, bskid, rights):
""" Private function, check if a user has sufficient rights"""
min_right = rights
rights = db.get_max_user_rights_on_basket(uid, bskid)
if rights:
if CFG_WEBBASKET_SHARE_LEVELS_ORDERED.index(rights) >= CFG_WEBBASKET_SHARE_LEVELS_ORDERED.index(min_right):
return 1
return 0
def check_sufficient_rights(rights_user_has, rights_needed):
"""Private function, check if the rights are sufficient."""
try:
out = CFG_WEBBASKET_SHARE_LEVELS_ORDERED.index(rights_user_has) >= \
CFG_WEBBASKET_SHARE_LEVELS_ORDERED.index(rights_needed)
except ValueError:
out = 0
return out
def can_add_notes_to_public_basket_p(bskid):
""" Private function. Checks if notes can be added to the given public basket."""
min_right = CFG_WEBBASKET_SHARE_LEVELS['ADDCMT']
rights = db.get_rights_on_public_basket(bskid)
if rights:
if CFG_WEBBASKET_SHARE_LEVELS_ORDERED.index(rights[0][0]) >= CFG_WEBBASKET_SHARE_LEVELS_ORDERED.index(min_right):
return True
return False
def create_guest_warning_box(ln=CFG_SITE_LANG):
"""return a warning message about logging into system"""
return webbasket_templates.tmpl_create_guest_warning_box(ln)
def create_personal_baskets_selection_box(uid,
html_select_box_name='baskets',
selected_bskid=None,
ln=CFG_SITE_LANG):
"""Return HTML box for basket selection. Only for personal baskets.
@param uid: user id
@param html_select_box_name: name used in html form
@param selected_bskid: basket currently selected
@param ln: language
"""
baskets = db.get_all_personal_baskets_names(uid)
return webbasket_templates.tmpl_personal_baskets_selection_box(
baskets,
html_select_box_name,
selected_bskid,
ln)
def create_basket_navtrail(uid,
category=CFG_WEBBASKET_CATEGORIES['PRIVATE'],
topic="", group=0,
bskid=0, ln=CFG_SITE_LANG):
"""display navtrail for basket navigation.
@param uid: user id (int)
@param category: selected category (see CFG_WEBBASKET_CATEGORIES)
@param topic: selected topic if personal baskets
@param group: selected group id for displaying (int)
@param bskid: basket id (int)
@param ln: language"""
_ = gettext_set_language(ln)
out = ''
if category == CFG_WEBBASKET_CATEGORIES['PRIVATE']:
category_html = """ &gt; <a class="navtrail" href="%s/yourbaskets/display?%s">%s</a>""" % \
(CFG_SITE_URL,
'category=' + category + '&amp;ln=' + ln,
_("Personal baskets"))
out += category_html
topics = map(lambda x: x[0], db.get_personal_topics_infos(uid))
if topic in topics:
topic_html = """ &gt; <a class="navtrail" href="%s/yourbaskets/display?%s">%s</a>""" % \
(CFG_SITE_URL,
'category=' + category + '&amp;topic=' + \
urllib.quote(topic) + '&amp;ln=' + ln,
cgi.escape(topic))
out += topic_html
if bskid:
basket = db.get_public_basket_infos(bskid)
if basket:
basket_html = """ &gt; <a class="navtrail" href="%s/yourbaskets/display?%s">%s</a>""" % \
(CFG_SITE_URL,
'category=' + category + '&amp;topic=' + \
urllib.quote(topic) + '&amp;ln=' + ln + '#bsk' + str(bskid),
cgi.escape(basket[1]))
out += basket_html
elif category == CFG_WEBBASKET_CATEGORIES['GROUP']:
category_html = """ &gt; <a class="navtrail" href="%s/yourbaskets/display?%s">%s</a>""" % \
(CFG_SITE_URL,
'category=' + category + '&amp;ln=' + ln,
_("Group baskets"))
out += category_html
groups = db.get_group_infos(uid)
if group:
groups = filter(lambda x: x[0] == group, groups)
if len(groups):
group_html = """ &gt; <a class="navtrail" href="%s/yourbaskets/display?%s">%s</a>""" % \
(CFG_SITE_URL,
'category=' + category + '&amp;group=' + \
str(group) + '&amp;ln=' + ln,
cgi.escape(groups[0][1]))
out += group_html
if bskid:
basket = db.get_public_basket_infos(bskid)
if basket:
basket_html = """ &gt; <a class="navtrail" href="%s/yourbaskets/display?%s">%s</a>""" % \
(CFG_SITE_URL,
'category=' + category + '&amp;group=' + \
str(group) + '&amp;ln=' + ln + '#bsk' + str(bskid),
cgi.escape(basket[1]))
out += basket_html
elif category == CFG_WEBBASKET_CATEGORIES['EXTERNAL']:
category_html = """ &gt; <a class="navtrail" href="%s/yourbaskets/display?%s">%s</a>""" % \
(CFG_SITE_URL,
'category=' + category + '&amp;ln=' + ln,
_("Others' baskets"))
out += category_html
if bskid:
basket = db.get_public_basket_infos(bskid)
if basket:
basket_html = """ &gt; <a class="navtrail" href="%s/yourbaskets/display?%s">""" % \
(CFG_SITE_URL,
'category=' + category + '&amp;ln=' + ln + \
'#bsk' + str(bskid),
cgi.escape(basket[1]))
out += basket_html
return out
def create_webbasket_navtrail(uid,
category="",
topic="",
group=0,
bskid=0,
public_basket=False,
search_baskets=False,
add_to_basket=False,
ln=CFG_SITE_LANG):
"""Create the navtrail for navigation withing WebBasket.
@param uid: user id (int)
@param category: selected category (see CFG_WEBBASKET_CATEGORIES)
@param topic: selected topic (str)
@param group: selected group (int)
@param bskid: selected basket id (int)
@param ln: language"""
_ = gettext_set_language(ln)
out = """<a class="navtrail" href="%s/youraccount/display?ln=%s">%s</a>""" % \
(CFG_SITE_URL, ln, _("Your Account"))
out += " &gt; "
out += """<a class="navtrail" href="%s/yourbaskets/display?ln=%s">%s</a>""" % \
(CFG_SITE_URL, ln, _("Your Baskets"))
if public_basket:
out += " &gt; "
out += """<a class="navtrail" href="%s/yourbaskets/list_public_baskets?ln=%s">%s</a>""" % \
(CFG_SITE_URL, ln, _("List of public baskets"))
if bskid:
basket = db.get_basket_name(bskid)
if basket:
out += " &gt; "
out += """<a class="navtrail" href="%s/yourbaskets/display_public?bskid=%i&amp;ln=%s">%s</a>""" % \
(CFG_SITE_URL, bskid, ln, cgi.escape(basket, True))
elif search_baskets:
out += " &gt; "
out += """<a class="navtrail" href="%s/yourbaskets/search?ln=%s">%s</a>""" % \
(CFG_SITE_URL, ln, _("Search baskets"))
elif add_to_basket:
out += " &gt; "
out += """<a class="navtrail" href="%s/yourbaskets/add?ln=%s">%s</a>""" % \
(CFG_SITE_URL, ln, _("Add to basket"))
else:
if category == CFG_WEBBASKET_CATEGORIES['PRIVATE']:
out += " &gt; "
out += """<a class="navtrail" href="%s/yourbaskets/display?category=%s&amp;ln=%s">%s</a>""" % \
(CFG_SITE_URL, CFG_WEBBASKET_CATEGORIES['PRIVATE'], ln, _("Personal baskets"))
if topic:
topic_names = map(lambda x: x[0], db.get_personal_topics_infos(uid))
if topic in topic_names:
out += " &gt; "
out += """<a class="navtrail" href="%s/yourbaskets/display?category=%s&amp;topic=%s&amp;ln=%s">%s</a>""" % \
(CFG_SITE_URL, CFG_WEBBASKET_CATEGORIES['PRIVATE'], urllib.quote(topic), ln, cgi.escape(topic, True))
if bskid:
basket = db.get_basket_name(bskid)
if basket:
out += " &gt; "
out += """<a class="navtrail" href="%s/yourbaskets/display?category=%s&amp;topic=%s&amp;bskid=%i&amp;ln=%s">%s</a>""" % \
(CFG_SITE_URL, CFG_WEBBASKET_CATEGORIES['PRIVATE'], urllib.quote(topic), bskid, ln, cgi.escape(basket, True))
elif category == CFG_WEBBASKET_CATEGORIES['GROUP']:
out += " &gt; "
out += """<a class="navtrail" href="%s/yourbaskets/display?category=%s&amp;ln=%s">%s</a>""" % \
(CFG_SITE_URL, CFG_WEBBASKET_CATEGORIES['GROUP'], ln, _("Group baskets"))
if group:
group_names = map(lambda x: x[0] == group and x[1], db.get_group_infos(uid))
if group_names and group_names[0]:
out += " &gt; "
out += """<a class="navtrail" href="%s/yourbaskets/display?category=%s&amp;group=%i&amp;ln=%s">%s</a>""" % \
(CFG_SITE_URL, CFG_WEBBASKET_CATEGORIES['GROUP'], group, ln, cgi.escape(group_names[0], True))
if bskid:
basket = db.get_basket_name(bskid)
if basket:
out += " &gt; "
out += """<a class="navtrail" href="%s/yourbaskets/display?category=%s&amp;topic=%s&amp;bskid=%i&amp;ln=%s">%s</a>""" % \
(CFG_SITE_URL, CFG_WEBBASKET_CATEGORIES['GROUP'], group, bskid, ln, cgi.escape(basket, True))
elif category == CFG_WEBBASKET_CATEGORIES['EXTERNAL']:
out += " &gt; "
out += """<a class="navtrail" href="%s/yourbaskets/display?category=%s&amp;ln=%s">%s</a>""" % \
(CFG_SITE_URL, category, ln, _("Public baskets"))
if bskid:
basket = db.get_basket_name(bskid)
if basket:
out += " &gt; "
out += """<a class="navtrail" href="%s/yourbaskets/display?category=%s&amp;topic=%s&amp;bskid=%i&amp;ln=%s">%s</a>""" % \
(CFG_SITE_URL, category, group, bskid, ln, cgi.escape(basket, True))
return out
def account_list_baskets(uid, ln=CFG_SITE_LANG):
"""Display baskets informations on account page"""
_ = gettext_set_language(ln)
(personal, group, external) = db.count_baskets(uid)
link = '<a href="%s">%s</a>'
base_url = CFG_SITE_URL + '/yourbaskets/display?category=%s&amp;ln=' + ln
personal_text = personal
if personal:
url = base_url % CFG_WEBBASKET_CATEGORIES['PRIVATE']
personal_text = link % (url, personal_text)
group_text = group
if group:
url = base_url % CFG_WEBBASKET_CATEGORIES['GROUP']
group_text = link % (url, group_text)
external_text = external
if external:
url = base_url % CFG_WEBBASKET_CATEGORIES['EXTERNAL']
else:
url = CFG_SITE_URL + '/yourbaskets/list_public_baskets?ln=' + ln
external_text = link % (url, external_text)
out = _("You have %(x_nb_perso)s personal baskets and are subscribed to %(x_nb_group)s group baskets and %(x_nb_public)s other users public baskets.") %\
{'x_nb_perso': personal_text,
'x_nb_group': group_text,
'x_nb_public': external_text}
return out
def page_start(req, of='xm'):
"""Set the content type and send the headers for the page."""
if of.startswith('x'):
req.content_type = "text/xml"
if of == 'xr':
req.content_type = "application/rss+xml"
req.send_http_header()
req.write("""<?xml version="1.0" encoding="UTF-8"?>\n""")
print_records_prologue(req, of)
else: # assuming HTML by default
req.content_type = "text/html"
req.send_http_header()
def page_end(req, of='xm'):
"""Print page footer"""
if of.startswith('x'):
print_records_epilogue(req, of)
def perform_request_export_xml(body):
"""Export an xml representation of the selected baskets/items."""
return webbasket_templates.tmpl_export_xml(body)
################################
### External items functions ###
################################
def format_external_records(recids, of='hb'):
"""Given a list of external records' recids, this function returns a list of tuples
with each recid and the actual formatted record using the selected output format.
It also stores the formatted record in the database for future use."""
# TODO: add a returnp variable to control whether we actually want anything
# to be returned or not. For example when we just want to store the xml
# formatted records for newly added items.
# TODO: take care of external urls. Invent an xml format for them.
# NOTE: this function is meant to format external records from other
# libraries. It's not meant to handle custom external sources like urls
# submitted manually by the user. These items are directly formatted and
# stored by the add_to_basket database function.
formatted_records = []
if type(recids) is not list:
recids = [recids]
existing_xml_formatted_records = db.get_external_records(recids, "xm")
for existing_xml_formatted_record in existing_xml_formatted_records:
xml_record = decompress(existing_xml_formatted_record[2])
xml_record_id = existing_xml_formatted_record[1]
xml_record_colid = existing_xml_formatted_record[0]
recids.remove(-xml_record_id)
if of == "hb":
if xml_record_colid > 0:
htmlbrief_record = format_record(None, of, xml_record=xml_record)
formatted_records.append((xml_record_id, htmlbrief_record))
elif of != "hb":
#formatted_records.append((xml_record_id, xml_record))
formatted_records.append((xml_record_id, format_record([], of, xml_record=xml_record, on_the_fly=True)))
# formatted_records.append((xml_record_id, repr(xml_record)))
if formatted_records and of == "hb":
db.store_external_records(formatted_records, of)
records_grouped_by_collection = db.get_external_records_by_collection(recids)
if records_grouped_by_collection:
for records in records_grouped_by_collection:
colid = records[2]
if colid:
external_records = fetch_and_store_external_records(records, of)
formatted_records.extend(external_records)
return formatted_records
def fetch_and_store_external_records(records, of="hb"):
"""Function that fetches the formatted records for one collection and stores them
into the database. It also calculates and stores the original external url for each
record."""
results = []
formatted_records = []
if of == 'xm':
re_controlfield = re.compile(r'<controlfield\b[^>]*>.*?</controlfield>', re.DOTALL + re.MULTILINE + re.IGNORECASE)
re_blankline = re.compile(r'\s*\n', re.DOTALL + re.MULTILINE + re.IGNORECASE)
# the locally saved external ids
local_ext_ids = records[0].split(",")
# the locally saved original external ids
external_ids = records[1].split(",")
collection_name = get_collection_name_by_id(records[2])
collection_engine_set = select_hosted_search_engines(collection_name)
collection_engine = collection_engine_set.pop()
external_ids_urls = collection_engine.build_record_urls(external_ids)
external_urls = [external_id_url[1] for external_id_url in external_ids_urls]
#external_urls_dict = {}
#for (local_id, url) in zip(local_ext_ids, external_urls):
#external_urls_dict[local_id] = url
#db.store_external_urls(external_urls_dict)
db.store_external_urls(zip(local_ext_ids, external_urls))
url = collection_engine.build_search_url(None, req_args=external_ids)
pagegetters = [HTTPAsyncPageGetter(url)]
def finished(pagegetter, dummy_data, dummy_time):
"""Function to be called when a page has been downloaded."""
results.append(pagegetter)
finished_list = async_download(pagegetters, finish_function=finished, timeout=CFG_EXTERNAL_COLLECTION_TIMEOUT)
if finished_list[0]:
collection_engine.parser.parse_and_get_results(results[0].data, feedonly=True)
(dummy, parsed_results_dict) = collection_engine.parser.parse_and_extract_records(of=of)
for (local_ext_id, external_id) in zip(local_ext_ids, external_ids):
formatted_record = parsed_results_dict[external_id]
if of == 'xm':
formatted_record = re_controlfield.sub('', formatted_record)
formatted_record = re_blankline.sub('\n', formatted_record)
formatted_records.append((int(local_ext_id), formatted_record))
db.store_external_records(formatted_records, of)
else:
for (local_ext_id, external_id) in zip(local_ext_ids, external_ids):
formatted_records.append((int(local_ext_id), "There was a timeout when fetching the record."))
return formatted_records
###############################
### Miscellaneous functions ###
###############################
def url_is_valid(url):
"""Returns (True, status, reason) if the url is valid or (False, status, reason) if different."""
common_errors_list = [400, 404, 500]
url_tuple = urlsplit(url)
if not url_tuple[0]:
url = "http://" + url
url_tuple = urlsplit(url)
if not url_tuple[0] and not url_tuple[1]:
return (False, 000, "Not Valid")
# HTTPConnection had the timeout parameter introduced in python 2.6
# for the older versions we have to get and set the default timeout
# In order to use a custom timeout pass it as an extra argument to this function
#old_timeout = getdefaulttimeout()
#setdefaulttimeout(timeout)
conn = HTTPConnection(url_tuple[1])
#setdefaulttimeout(old_timeout)
try:
conn.request("GET", url_tuple[2])
except:
return (False, 000, "Not Valid")
response = conn.getresponse()
status = response.status
reason = response.reason
if str(status).startswith('1') or str(status).startswith('2') or str(status).startswith('3'):
return (True, status, reason)
elif str(status).startswith('4') or str(status).startswith('5'):
if status in common_errors_list:
return (False, status, reason)
else:
return (True, status, reason)
def nl2br(text):
"""Replace newlines (\n) found in text with line breaks (<br />)."""
return '<br />'.join(text.split('\n'))
def wash_b_search(b):
"""Wash the b GET variable for the search interface."""
_ = gettext_set_language(CFG_SITE_LANG)
b = b.split('_', 1)
b_category = b[0].upper()
valid_categories = CFG_WEBBASKET_CATEGORIES.values()
valid_categories.append('')
if b_category not in valid_categories:
try:
raise InvenioWebBasketWarning(_('The category you have selected does not exist. Please select a valid category.'))
except InvenioWebBasketWarning, exc:
register_exception(stream='warning')
return ("", "", exc.message)
#return ("", "", ['WRN_WEBBASKET_INVALID_CATEGORY'])
if len(b) == 2:
if b_category == CFG_WEBBASKET_CATEGORIES['PRIVATE'] or b_category == CFG_WEBBASKET_CATEGORIES['GROUP']:
return (b_category, b[1], None)
# TODO: send a warning when the user has sent a second argument
# specifying a category other than PRIVATE or GROUP
#else:
#return (b_category, "", ['WRN_WEBBASKET_'])
return (b_category, "", None)
def wash_b_add(b):
"""Wash the b POST variable for the add interface."""
_ = gettext_set_language(CFG_SITE_LANG)
b = b.split('_', 1)
b_category = b[0].upper()
valid_categories = (CFG_WEBBASKET_CATEGORIES['PRIVATE'], CFG_WEBBASKET_CATEGORIES['GROUP'])
if b_category not in valid_categories or len(b) != 2 or not b[1]:
try:
raise InvenioWebBasketWarning(_('Cannot add items to the selected basket. Invalid parameters.'))
except InvenioWebBasketWarning, exc:
register_exception(stream='warning')
return ("", "", exc.message)
return (b_category, b[1], None)
def wash_category(category):
"""Wash the category."""
_ = gettext_set_language(CFG_SITE_LANG)
category = category.upper()
valid_categories = CFG_WEBBASKET_CATEGORIES.values()
valid_categories.append('')
if category not in valid_categories:
try:
raise InvenioWebBasketWarning(_('The category you have selected does not exist. Please select a valid category.'))
except InvenioWebBasketWarning, exc:
register_exception(stream='warning')
return ("", "", exc.message)
#return ("", ['WRN_WEBBASKET_INVALID_CATEGORY'])
return (category, None)
def wash_topic(uid, topic):
"""Wash the topic."""
_ = gettext_set_language(CFG_SITE_LANG)
if not db.is_topic_valid(uid, topic):
try:
raise InvenioWebBasketWarning(_('The selected topic does not exist or you do not have access to it.'))
except InvenioWebBasketWarning, exc:
register_exception(stream='warning')
return ("", "", exc.message)
#return ("", ['WRN_WEBBASKET_INVALID_OR_RESTRICTED_TOPIC'])
return (topic, None)
def wash_group(uid, group):
"""Wash the topic."""
_ = gettext_set_language(ln=CFG_SITE_LANG)
if not group.isdigit() or not db.is_group_valid(uid, group):
try:
raise InvenioWebBasketWarning(_('The selected group does not exist or you do not have access to it.'))
except InvenioWebBasketWarning, exc:
register_exception(stream='warning')
return (0, exc.message)
#return (0, ['WRN_WEBBASKET_INVALID_OR_RESTRICTED_GROUP'])
return (int(group), None)
def wash_bskid(uid, category, bskid):
"""Wash the bskid based on its category. This function expectes a washed
category, either for personal or for group baskets."""
_ = gettext_set_language(CFG_SITE_LANG)
if not bskid.isdigit():
try:
raise InvenioWebBasketWarning(_('The selected basket does not exist or you do not have access to it.'))
except InvenioWebBasketWarning, exc:
register_exception(stream='warning')
return (0, exc.message)
#return (0, ['WRN_WEBBASKET_INVALID_OR_RESTRICTED_BASKET'])
bskid = int(bskid)
if category == CFG_WEBBASKET_CATEGORIES['PRIVATE'] and not db.is_personal_basket_valid(uid, bskid):
try:
raise InvenioWebBasketWarning(_('The selected basket does not exist or you do not have access to it.'))
except InvenioWebBasketWarning, exc:
register_exception(stream='warning')
return (0, exc.message)
#return (0, ['WRN_WEBBASKET_INVALID_OR_RESTRICTED_BASKET'])
if category == CFG_WEBBASKET_CATEGORIES['GROUP'] and not db.is_group_basket_valid(uid, bskid):
try:
raise InvenioWebBasketWarning(_('The selected basket does not exist or you do not have access to it.'))
except InvenioWebBasketWarning, exc:
register_exception(stream='warning')
return (0, exc.message)
#return (0, ['WRN_WEBBASKET_INVALID_OR_RESTRICTED_BASKET'])
return (bskid, None)
def wash_of(of):
"""Wash the output format"""
_ = gettext_set_language(CFG_SITE_LANG)
list_of_accepted_formats = ['hb',
'xm',
'hx',
'xd',
'xe',
'xn',
'xw',
'xr',
'xp']
if of in list_of_accepted_formats:
return (of, None)
try:
raise InvenioWebBasketWarning(_('The selected output format is not available or is invalid.'))
except InvenioWebBasketWarning, exc:
register_exception(stream='warning')
return ('hb', exc.message)
def __create_search_box(uid,
category="",
topic="",
grpid=0,
p="",
n=0,
ln=CFG_SITE_LANG):
"""Private function.
Creates the search box and returns html code."""
topic_list = db.get_all_user_topics(uid)
group_list = db.get_all_user_groups(uid)
number_of_public_baskets = db.count_external_baskets(uid)
search_box = webbasket_templates.tmpl_create_search_box(category,
topic,
grpid,
topic_list,
group_list,
number_of_public_baskets,
p,
n,
ln=ln)
return search_box
diff --git a/invenio/legacy/webbasket/webinterface.py b/invenio/legacy/webbasket/webinterface.py
index 325e7de1c..ed0dcb7e0 100644
--- a/invenio/legacy/webbasket/webinterface.py
+++ b/invenio/legacy/webbasket/webinterface.py
@@ -1,1647 +1,1647 @@
## This file is part of Invenio.
## Copyright (C) 2006, 2007, 2008, 2009, 2010, 2011, 2013 CERN.
##
## Invenio is free software; you can redistribute it and/or
## modify it under the terms of the GNU General Public License as
## published by the Free Software Foundation; either version 2 of the
## License, or (at your option) any later version.
##
## Invenio is distributed in the hope that it will be useful, but
## WITHOUT ANY WARRANTY; without even the implied warranty of
## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
## General Public License for more details.
##
## You should have received a copy of the GNU General Public License
## along with Invenio; if not, write to the Free Software Foundation, Inc.,
## 59 Temple Place, Suite 330, Boston, MA 02111-1307, USA.
"""WebBasket Web Interface."""
__revision__ = "$Id$"
__lastupdated__ = """$Date$"""
from invenio.utils import apache
import os
import cgi
import urllib
from invenio.config import CFG_SITE_SECURE_URL, \
CFG_ACCESS_CONTROL_LEVEL_SITE, \
CFG_WEBSESSION_DIFFERENTIATE_BETWEEN_GUESTS, \
CFG_SITE_SECURE_URL, CFG_PREFIX, CFG_SITE_LANG
from invenio.base.i18n import gettext_set_language
from invenio.webpage import page
from invenio.webuser import getUid, page_not_authorized, isGuestUser
from invenio.webbasket import \
check_user_can_comment, \
check_sufficient_rights, \
perform_request_display, \
perform_request_search, \
create_guest_warning_box, \
create_basket_navtrail, \
perform_request_write_note, \
perform_request_save_note, \
perform_request_delete_note, \
perform_request_add_group, \
perform_request_edit, \
perform_request_edit_topic, \
perform_request_list_public_baskets, \
perform_request_unsubscribe, \
perform_request_subscribe, \
perform_request_display_public, \
perform_request_write_public_note, \
perform_request_save_public_note, \
delete_record, \
move_record, \
perform_request_add, \
perform_request_create_basket, \
perform_request_delete, \
wash_topic, \
wash_group, \
perform_request_export_xml, \
page_start, \
page_end
from invenio.webbasket_config import CFG_WEBBASKET_CATEGORIES, \
CFG_WEBBASKET_ACTIONS, \
CFG_WEBBASKET_SHARE_LEVELS
from invenio.webbasket_dblayer import get_basket_name, \
get_max_user_rights_on_basket
from invenio.utils.url import get_referer, redirect_to_url, make_canonical_urlargd
from invenio.webinterface_handler import wash_urlargd, WebInterfaceDirectory
from invenio.webstat import register_customevent
-from invenio.errorlib import register_exception
+from invenio.ext.logging import register_exception
from invenio.webuser import collect_user_info
from invenio.modules.comments.api import check_user_can_attach_file_to_comments
from invenio.access_control_engine import acc_authorize_action
from invenio.htmlutils import is_html_text_editor_installed
from invenio.ckeditor_invenio_connector import process_CKEditor_upload, send_response
from invenio.bibdocfile import stream_file
class WebInterfaceBasketCommentsFiles(WebInterfaceDirectory):
"""Handle upload and access to files for comments in WebBasket.
The upload is currently only available through the CKEditor.
"""
def _lookup(self, component, path):
""" This handler is invoked for the dynamic URLs (for getting
and putting attachments) Eg:
/yourbaskets/attachments/get/31/652/5/file/myfile.pdf
/yourbaskets/attachments/get/31/552/5/image/myfigure.png
bskid/recid/uid/
/yourbaskets/attachments/put/31/550/
bskid/recid
"""
if component == 'get' and len(path) > 4:
bskid = path[0] # Basket id
recid = path[1] # Record id
uid = path[2] # uid of the submitter
file_type = path[3] # file, image, flash or media (as
# defined by CKEditor)
if file_type in ['file', 'image', 'flash', 'media']:
file_name = '/'.join(path[4:]) # the filename
def answer_get(req, form):
"""Accessing files attached to comments."""
form['file'] = file_name
form['type'] = file_type
form['uid'] = uid
form['recid'] = recid
form['bskid'] = bskid
return self._get(req, form)
return answer_get, []
elif component == 'put' and len(path) > 1:
bskid = path[0] # Basket id
recid = path[1] # Record id
def answer_put(req, form):
"""Attaching file to a comment."""
form['recid'] = recid
form['bskid'] = bskid
return self._put(req, form)
return answer_put, []
# All other cases: file not found
return None, []
def _get(self, req, form):
"""
Returns a file attached to a comment.
A file is attached to a comment of a record of a basket, by a
user (who is the author of the comment), and is of a certain
type (file, image, etc). Therefore these 5 values are part of
the URL. Eg:
CFG_SITE_SECURE_URL/yourbaskets/attachments/get/31/91/5/file/myfile.pdf
bskid/recid/uid
"""
argd = wash_urlargd(form, {'file': (str, None),
'type': (str, None),
'uid': (int, 0),
'bskid': (int, 0),
'recid': (int, 0)})
_ = gettext_set_language(argd['ln'])
# Can user view this basket & record & comment, i.e. can user
# access its attachments?
#uid = getUid(req)
user_info = collect_user_info(req)
rights = get_max_user_rights_on_basket(argd['uid'], argd['bskid'])
if not user_info['precached_usebaskets']:
return page_not_authorized(req, "../", \
text = _("You are not authorized to use baskets."))
if user_info['email'] == 'guest':
# Ask to login
target = CFG_SITE_SECURE_URL + '/youraccount/login' + \
make_canonical_urlargd({'ln' : argd['ln'], 'referer' : \
CFG_SITE_SECURE_URL + user_info['uri']}, {})
return redirect_to_url(req, target)
elif not(check_sufficient_rights(rights, CFG_WEBBASKET_SHARE_LEVELS['READITM'])):
return page_not_authorized(req, "../", \
text = _("You are not authorized to view this attachment"))
if not argd['file'] is None:
# Prepare path to file on disk. Normalize the path so that
# ../ and other dangerous components are removed.
path = os.path.abspath(CFG_PREFIX + '/var/data/baskets/comments/' + \
str(argd['bskid']) + '/' + str(argd['recid']) + '/' + \
str(argd['uid']) + '/' + argd['type'] + '/' + \
argd['file'])
# Check that we are really accessing attachements
# directory, for the declared basket and record.
if path.startswith(CFG_PREFIX + '/var/data/baskets/comments/' + \
str(argd['bskid']) + '/' + str(argd['recid'])) and \
os.path.exists(path):
return stream_file(req, path)
# Send error 404 in all other cases
return apache.HTTP_NOT_FOUND
def _put(self, req, form):
"""
Process requests received from CKEditor to upload files, etc.
URL eg:
CFG_SITE_SECURE_URL/yourbaskets/attachments/put/31/91/
bskid/recid/
"""
if not is_html_text_editor_installed():
return
argd = wash_urlargd(form, {'bskid': (int, 0),
'recid': (int, 0)})
uid = getUid(req)
# URL where the file can be fetched after upload
user_files_path = '%(CFG_SITE_SECURE_URL)s/yourbaskets/attachments/get/%(bskid)s/%(recid)i/%(uid)s' % \
{'uid': uid,
'recid': argd['recid'],
'bskid': argd['bskid'],
'CFG_SITE_SECURE_URL': CFG_SITE_SECURE_URL}
# Path to directory where uploaded files are saved
user_files_absolute_path = '%(CFG_PREFIX)s/var/data/baskets/comments/%(bskid)s/%(recid)s/%(uid)s' % \
{'uid': uid,
'recid': argd['recid'],
'bskid': argd['bskid'],
'CFG_PREFIX': CFG_PREFIX}
# Check that user can
# 1. is logged in
# 2. comment records of this basket (to simplify, we use
# WebComment function to check this, even if it is not
# entirely adequate)
# 3. attach files
user_info = collect_user_info(req)
(auth_code, dummy) = check_user_can_attach_file_to_comments(user_info, argd['recid'])
fileurl = ''
callback_function = ''
if user_info['email'] == 'guest':
# 1. User is guest: must login prior to upload
data ='Please login before uploading file.'
if not user_info['precached_usebaskets']:
msg = 'Sorry, you are not allowed to use WebBasket'
elif not check_user_can_comment(uid, argd['bskid']):
# 2. User cannot edit comment of this basket
msg = 'Sorry, you are not allowed to submit files'
elif auth_code:
# 3. User cannot submit
msg = 'Sorry, you are not allowed to submit files.'
else:
# Process the upload and get the response
(msg, uploaded_file_path, filename, fileurl, callback_function) = \
process_CKEditor_upload(form, uid, user_files_path, user_files_absolute_path,
recid=argd['recid'])
send_response(req, msg, fileurl, callback_function)
class WebInterfaceYourBasketsPages(WebInterfaceDirectory):
"""Defines the set of /yourbaskets pages."""
_exports = ['',
'display_item',
'display',
'search',
'write_note',
'save_note',
'delete_note',
'add',
'delete',
'modify',
'edit',
'edit_topic',
'create_basket',
'display_public',
'list_public_baskets',
'subscribe',
'unsubscribe',
'write_public_note',
'save_public_note',
'attachments']
attachments = WebInterfaceBasketCommentsFiles()
def index(self, req, dummy):
"""Index page."""
redirect_to_url(req, '%s/yourbaskets/display?%s' % (CFG_SITE_SECURE_URL, req.args))
def display_item(self, req, dummy):
"""Legacy URL redirection."""
redirect_to_url(req, '%s/yourbaskets/display?%s' % (CFG_SITE_SECURE_URL, req.args))
def display(self, req, form):
"""Display basket interface."""
#import rpdb2; rpdb2.start_embedded_debugger('password', fAllowRemote=True)
argd = wash_urlargd(form, {'category':
(str, CFG_WEBBASKET_CATEGORIES['PRIVATE']),
'topic': (str, ""),
'group': (int, 0),
'bskid': (int, 0),
'recid': (int, 0),
'bsk_to_sort': (int, 0),
'sort_by_title': (str, ""),
'sort_by_date': (str, ""),
'of': (str, "hb"),
'ln': (str, CFG_SITE_LANG)})
_ = gettext_set_language(argd['ln'])
uid = getUid(req)
if uid == -1 or CFG_ACCESS_CONTROL_LEVEL_SITE >= 1:
return page_not_authorized(req, "../yourbaskets/display",
navmenuid = 'yourbaskets')
if isGuestUser(uid):
if not CFG_WEBSESSION_DIFFERENTIATE_BETWEEN_GUESTS:
return redirect_to_url(req, "%s/youraccount/login%s" % (
CFG_SITE_SECURE_URL,
make_canonical_urlargd({
'referer' : "%s/yourbaskets/display%s" % (
CFG_SITE_SECURE_URL,
make_canonical_urlargd(argd, {})),
"ln" : argd['ln']}, {})))
user_info = collect_user_info(req)
if not user_info['precached_usebaskets']:
return page_not_authorized(req, "../", \
text = _("You are not authorized to use baskets."))
(body, dummy, navtrail) = perform_request_display(uid=uid,
selected_category=argd['category'],
selected_topic=argd['topic'],
selected_group_id=argd['group'],
selected_bskid=argd['bskid'],
selected_recid=argd['recid'],
of=argd['of'],
ln=argd['ln'])
if isGuestUser(uid):
body = create_guest_warning_box(argd['ln']) + body
# register event in webstat
if user_info['email']:
user_str = "%s (%d)" % (user_info['email'], user_info['uid'])
else:
user_str = ""
try:
register_customevent("baskets", ["display", "", user_str])
except:
register_exception(suffix="Do the webstat tables exists? Try with 'webstatadmin --load-config'")
rssurl = CFG_SITE_SECURE_URL + "/rss"
if argd['of'] != 'hb':
page_start(req, of=argd['of'])
if argd['of'].startswith('x'):
req.write(body)
page_end(req, of=argd['of'])
return
elif argd['bskid']:
rssurl = "%s/yourbaskets/display?category=%s&amp;topic=%s&amp;group=%i&amp;bskid=%i&amp;of=xr" % \
(CFG_SITE_SECURE_URL,
argd['category'],
urllib.quote(argd['topic']),
argd['group'],
argd['bskid'])
return page(title = _("Display baskets"),
body = body,
navtrail = navtrail,
uid = uid,
lastupdated = __lastupdated__,
language = argd['ln'],
req = req,
navmenuid = 'yourbaskets',
of = argd['of'],
navtrail_append_title_p = 0,
secure_page_p=1,
rssurl=rssurl)
def search(self, req, form):
"""Search baskets interface."""
argd = wash_urlargd(form, {'category': (str, ""),
'topic': (str, ""),
'group': (int, 0),
'p': (str, ""),
'b': (str, ""),
'n': (int, 0),
'of': (str, "hb"),
'verbose': (int, 0),
'ln': (str, CFG_SITE_LANG)})
_ = gettext_set_language(argd['ln'])
uid = getUid(req)
if uid == -1 or CFG_ACCESS_CONTROL_LEVEL_SITE >= 1:
return page_not_authorized(req, "../yourbaskets/search",
navmenuid = 'yourbaskets')
if isGuestUser(uid):
if not CFG_WEBSESSION_DIFFERENTIATE_BETWEEN_GUESTS:
return redirect_to_url(req, "%s/youraccount/login%s" % (
CFG_SITE_SECURE_URL,
make_canonical_urlargd({
'referer' : "%s/yourbaskets/search%s" % (
CFG_SITE_SECURE_URL,
make_canonical_urlargd(argd, {})),
"ln" : argd['ln']}, {})))
user_info = collect_user_info(req)
if not user_info['precached_usebaskets']:
return page_not_authorized(req, "../", \
text = _("You are not authorized to use baskets."))
(body, navtrail) = perform_request_search(uid=uid,
selected_category=argd['category'],
selected_topic=argd['topic'],
selected_group_id=argd['group'],
p=argd['p'],
b=argd['b'],
n=argd['n'],
# format=argd['of'],
ln=argd['ln'])
# register event in webstat
if user_info['email']:
user_str = "%s (%d)" % (user_info['email'], user_info['uid'])
else:
user_str = ""
try:
register_customevent("baskets", ["search", "", user_str])
except:
register_exception(suffix="Do the webstat tables exists? Try with 'webstatadmin --load-config'")
return page(title = _("Search baskets"),
body = body,
navtrail = navtrail,
uid = uid,
lastupdated = __lastupdated__,
language = argd['ln'],
req = req,
navmenuid = 'yourbaskets',
of = argd['of'],
navtrail_append_title_p = 0,
secure_page_p=1)
def write_note(self, req, form):
"""Write a comment (just interface for writing)"""
argd = wash_urlargd(form, {'category': (str, CFG_WEBBASKET_CATEGORIES['PRIVATE']),
'topic': (str, ""),
'group': (int, 0),
'bskid': (int, 0),
'recid': (int, 0),
'cmtid': (int, 0),
'of' : (str, ''),
'ln': (str, CFG_SITE_LANG)})
_ = gettext_set_language(argd['ln'])
uid = getUid(req)
if uid == -1 or CFG_ACCESS_CONTROL_LEVEL_SITE >= 1:
return page_not_authorized(req, "../yourbaskets/write_note",
navmenuid = 'yourbaskets')
if isGuestUser(uid):
if not CFG_WEBSESSION_DIFFERENTIATE_BETWEEN_GUESTS:
return redirect_to_url(req, "%s/youraccount/login%s" % (
CFG_SITE_SECURE_URL,
make_canonical_urlargd({
'referer' : "%s/yourbaskets/write_note%s" % (
CFG_SITE_SECURE_URL,
make_canonical_urlargd(argd, {})),
"ln" : argd['ln']}, {})))
user_info = collect_user_info(req)
if not user_info['precached_usebaskets']:
return page_not_authorized(req, "../", \
text = _("You are not authorized to use baskets."))
(body, navtrail) = perform_request_write_note(uid=uid,
category=argd['category'],
topic=argd['topic'],
group_id=argd['group'],
bskid=argd['bskid'],
recid=argd['recid'],
cmtid=argd['cmtid'],
ln=argd['ln'])
# register event in webstat
basket_str = "%s (%d)" % (get_basket_name(argd['bskid']), argd['bskid'])
if user_info['email']:
user_str = "%s (%d)" % (user_info['email'], user_info['uid'])
else:
user_str = ""
try:
register_customevent("baskets", ["write_note", basket_str, user_str])
except:
register_exception(suffix="Do the webstat tables exists? Try with 'webstatadmin --load-config'")
return page(title = _("Add a note"),
body = body,
navtrail = navtrail,
uid = uid,
lastupdated = __lastupdated__,
language = argd['ln'],
req = req,
navmenuid = 'yourbaskets',
of = argd['of'],
secure_page_p=1)
def save_note(self, req, form):
"""Save comment on record in basket"""
argd = wash_urlargd(form, {'category': (str, CFG_WEBBASKET_CATEGORIES['PRIVATE']),
'topic': (str, ""),
'group': (int, 0),
'bskid': (int, 0),
'recid': (int, 0),
'note_title': (str, ""),
'note_body': (str, ""),
'date_creation': (str, ""),
'editor_type': (str, ""),
'of': (str, ''),
'ln': (str, CFG_SITE_LANG),
'reply_to': (int, 0)})
_ = gettext_set_language(argd['ln'])
uid = getUid(req)
if uid == -1 or CFG_ACCESS_CONTROL_LEVEL_SITE >= 1:
return page_not_authorized(req, "../yourbaskets/save_note",
navmenuid = 'yourbaskets')
if isGuestUser(uid):
if not CFG_WEBSESSION_DIFFERENTIATE_BETWEEN_GUESTS:
return redirect_to_url(req, "%s/youraccount/login%s" % (
CFG_SITE_SECURE_URL,
make_canonical_urlargd({
'referer' : "%s/yourbaskets/save_note%s" % (
CFG_SITE_SECURE_URL,
make_canonical_urlargd(argd, {})),
"ln" : argd['ln']}, {})))
user_info = collect_user_info(req)
if not user_info['precached_usebaskets']:
return page_not_authorized(req, "../", \
text = _("You are not authorized to use baskets."))
(body, navtrail) = perform_request_save_note(uid=uid,
category=argd['category'],
topic=argd['topic'],
group_id=argd['group'],
bskid=argd['bskid'],
recid=argd['recid'],
note_title=argd['note_title'],
note_body=argd['note_body'],
date_creation=argd['date_creation'],
editor_type=argd['editor_type'],
ln=argd['ln'],
reply_to=argd['reply_to'])
# TODO: do not stat event if save was not succussful
# register event in webstat
basket_str = "%s (%d)" % (get_basket_name(argd['bskid']), argd['bskid'])
if user_info['email']:
user_str = "%s (%d)" % (user_info['email'], user_info['uid'])
else:
user_str = ""
try:
register_customevent("baskets", ["save_note", basket_str, user_str])
except:
register_exception(suffix="Do the webstat tables exists? Try with 'webstatadmin --load-config'")
return page(title = _("Display item and notes"),
body = body,
navtrail = navtrail,
uid = uid,
lastupdated = __lastupdated__,
language = argd['ln'],
req = req,
navmenuid = 'yourbaskets',
of = argd['of'],
navtrail_append_title_p = 0,
secure_page_p=1)
def delete_note(self, req, form):
"""Delete a comment
@param bskid: id of basket (int)
@param recid: id of record (int)
@param cmtid: id of comment (int)
@param category: category (see webbasket_config) (str)
@param topic: nb of topic currently displayed (int)
@param group: id of group baskets currently displayed (int)
@param ln: language"""
argd = wash_urlargd(form, {'category': (str, CFG_WEBBASKET_CATEGORIES['PRIVATE']),
'topic': (str, ""),
'group': (int, 0),
'bskid': (int, 0),
'recid': (int, 0),
'cmtid': (int, 0),
'of' : (str, ''),
'ln': (str, CFG_SITE_LANG)})
_ = gettext_set_language(argd['ln'])
uid = getUid(req)
if uid == -1 or CFG_ACCESS_CONTROL_LEVEL_SITE >= 1:
return page_not_authorized(req, "../yourbaskets/delete_note",
navmenuid = 'yourbaskets')
if isGuestUser(uid):
if not CFG_WEBSESSION_DIFFERENTIATE_BETWEEN_GUESTS:
return redirect_to_url(req, "%s/youraccount/delete_note%s" % (
CFG_SITE_SECURE_URL,
make_canonical_urlargd({
'referer' : "%s/yourbaskets/display%s" % (
CFG_SITE_SECURE_URL,
make_canonical_urlargd(argd, {})),
"ln" : argd['ln']}, {})))
user_info = collect_user_info(req)
if not user_info['precached_usebaskets']:
return page_not_authorized(req, "../", \
text = _("You are not authorized to use baskets."))
(body, navtrail) = perform_request_delete_note(uid=uid,
category=argd['category'],
topic=argd['topic'],
group_id=argd['group'],
bskid=argd['bskid'],
recid=argd['recid'],
cmtid=argd['cmtid'],
ln=argd['ln'])
# TODO: do not stat event if delete was not succussful
# register event in webstat
basket_str = "%s (%d)" % (get_basket_name(argd['bskid']), argd['bskid'])
user_info = collect_user_info(req)
if user_info['email']:
user_str = "%s (%d)" % (user_info['email'], user_info['uid'])
else:
user_str = ""
try:
register_customevent("baskets", ["delete_note", basket_str, user_str])
except:
register_exception(suffix="Do the webstat tables exists? Try with 'webstatadmin --load-config'")
return page(title = _("Display item and notes"),
body = body,
navtrail = navtrail,
uid = uid,
lastupdated = __lastupdated__,
language = argd['ln'],
req = req,
navmenuid = 'yourbaskets',
of = argd['of'],
navtrail_append_title_p = 0,
secure_page_p=1)
def add(self, req, form):
"""Add records to baskets.
@param recid: list of records to add
@param colid: in case of external collections, the id of the collection the records belong to
@param bskids: list of baskets to add records to. if not provided,
will return a page where user can select baskets
@param referer: URL of the referring page
@param new_basket_name: add record to new basket
@param new_topic_name: new basket goes into new topic
@param create_in_topic: # of topic to put basket into
@param ln: language"""
# TODO: apply a maximum limit of items (100) that can be added to a basket
# at once. Also see the build_search_url function of websearch_..._searcher.py
# for the "rg" GET variable.
argd = wash_urlargd(form, {'recid': (list, []),
'category': (str, ""),
'bskid': (int, 0),
'colid': (int, 0),
'es_title': (str, ""),
'es_desc': (str, ""),
'es_url': (str, ""),
'note_body': (str, ""),
'date_creation': (str, ""),
'editor_type': (str, ""),
'b': (str, ""),
'copy': (int, 0),
'move_from_basket': (int, 0),
'wait': (int, 0),
'referer': (str, ""),
'of': (str, ''),
'ln': (str, CFG_SITE_LANG)})
_ = gettext_set_language(argd['ln'])
uid = getUid(req)
if uid == -1 or CFG_ACCESS_CONTROL_LEVEL_SITE >= 1:
return page_not_authorized(req, "../yourbaskets/add",
navmenuid = 'yourbaskets')
if isGuestUser(uid):
if not CFG_WEBSESSION_DIFFERENTIATE_BETWEEN_GUESTS:
return redirect_to_url(req, "%s/youraccount/login%s" % (
CFG_SITE_SECURE_URL,
make_canonical_urlargd({
'referer' : "%s/yourbaskets/add%s" % (
CFG_SITE_SECURE_URL,
make_canonical_urlargd(argd, {})),
"ln" : argd['ln']}, {})))
user_info = collect_user_info(req)
if not user_info['precached_usebaskets']:
return page_not_authorized(req, "../", \
text = _("You are not authorized to use baskets."))
if not argd['referer']:
argd['referer'] = get_referer(req)
(body, navtrail) = perform_request_add(uid=uid,
recids=argd['recid'],
colid=argd['colid'],
bskid=argd['bskid'],
es_title=argd['es_title'],
es_desc=argd['es_desc'],
es_url=argd['es_url'],
note_body=argd['note_body'],
date_creation=argd['date_creation'],
editor_type=argd['editor_type'],
category=argd['category'],
b=argd['b'],
copy=argd['copy'],
move_from_basket=argd['move_from_basket'],
wait=argd['wait'],
referer=argd['referer'],
ln=argd['ln'])
if isGuestUser(uid):
body = create_guest_warning_box(argd['ln']) + body
# register event in webstat
bskid = argd['bskid']
basket_str = "%s (%s)" % (get_basket_name(bskid), bskid)
if user_info['email']:
user_str = "%s (%d)" % (user_info['email'], user_info['uid'])
else:
user_str = ""
try:
register_customevent("baskets", ["add", basket_str, user_str])
except:
register_exception(suffix="Do the webstat tables exists? Try with 'webstatadmin --load-config'")
return page(title = _('Add to basket'),
body = body,
navtrail = navtrail,
uid = uid,
lastupdated = __lastupdated__,
language = argd['ln'],
req = req,
navmenuid = 'yourbaskets',
of = argd['of'],
navtrail_append_title_p = 0,
secure_page_p=1)
def delete(self, req, form):
"""Delete basket interface"""
argd = wash_urlargd(form, {'bskid' : (int, -1),
'confirmed' : (int, 0),
'category' : (str, CFG_WEBBASKET_CATEGORIES['PRIVATE']),
'topic' : (str, ""),
'group' : (int, 0),
'of' : (str, ''),
'ln' : (str, CFG_SITE_LANG)})
_ = gettext_set_language(argd['ln'])
uid = getUid(req)
if uid == -1 or CFG_ACCESS_CONTROL_LEVEL_SITE >= 1:
return page_not_authorized(req, "../yourbaskets/delete",
navmenuid = 'yourbaskets')
if isGuestUser(uid):
if not CFG_WEBSESSION_DIFFERENTIATE_BETWEEN_GUESTS:
return redirect_to_url(req, "%s/youraccount/login%s" % (
CFG_SITE_SECURE_URL,
make_canonical_urlargd({
'referer' : "%s/yourbaskets/delete%s" % (
CFG_SITE_SECURE_URL,
make_canonical_urlargd(argd, {})),
"ln" : argd['ln']}, {})))
user_info = collect_user_info(req)
if not user_info['precached_usebaskets']:
return page_not_authorized(req, "../", \
text = _("You are not authorized to use baskets."))
body=perform_request_delete(uid=uid,
bskid=argd['bskid'],
confirmed=argd['confirmed'],
category=argd['category'],
selected_topic=argd['topic'],
selected_group_id=argd['group'],
ln=argd['ln'])
if argd['confirmed']:
if argd['category'] == CFG_WEBBASKET_CATEGORIES['PRIVATE']:
argd['topic'] = wash_topic(uid, argd['topic'])[0]
elif argd['category'] == CFG_WEBBASKET_CATEGORIES['GROUP']:
argd['group'] = wash_group(uid, argd['group'])[0]
url = """%s/yourbaskets/display?category=%s&topic=%s&group=%i&ln=%s""" % \
(CFG_SITE_SECURE_URL,
argd['category'],
urllib.quote(argd['topic']),
argd['group'],
argd['ln'])
redirect_to_url(req, url)
else:
navtrail = '<a class="navtrail" href="%s/youraccount/display?ln=%s">'\
'%s</a>'
navtrail %= (CFG_SITE_SECURE_URL, argd['ln'], _("Your Account"))
navtrail_end = create_basket_navtrail(uid=uid,
category=argd['category'],
topic=argd['topic'],
group=argd['group'],
bskid=argd['bskid'],
ln=argd['ln'])
if isGuestUser(uid):
body = create_guest_warning_box(argd['ln']) + body
# register event in webstat
basket_str = "%s (%d)" % (get_basket_name(argd['bskid']), argd['bskid'])
if user_info['email']:
user_str = "%s (%d)" % (user_info['email'], user_info['uid'])
else:
user_str = ""
try:
register_customevent("baskets", ["delete", basket_str, user_str])
except:
register_exception(suffix="Do the webstat tables exists? Try with 'webstatadmin --load-config'")
return page(title = _("Delete a basket"),
body = body,
navtrail = navtrail + navtrail_end,
uid = uid,
lastupdated = __lastupdated__,
language = argd['ln'],
req = req,
navmenuid = 'yourbaskets',
of = argd['of'],
secure_page_p=1)
def modify(self, req, form):
"""Modify basket content interface (reorder, suppress record, etc.)"""
argd = wash_urlargd(form, {'action': (str, ""),
'bskid': (int, -1),
'recid': (int, 0),
'category': (str, CFG_WEBBASKET_CATEGORIES['PRIVATE']),
'topic': (str, ""),
'group': (int, 0),
'of' : (str, ''),
'ln': (str, CFG_SITE_LANG)})
_ = gettext_set_language(argd['ln'])
uid = getUid(req)
if uid == -1 or CFG_ACCESS_CONTROL_LEVEL_SITE >= 1:
return page_not_authorized(req, "../yourbaskets/modify",
navmenuid = 'yourbaskets')
if isGuestUser(uid):
if not CFG_WEBSESSION_DIFFERENTIATE_BETWEEN_GUESTS:
return redirect_to_url(req, "%s/youraccount/login%s" % (
CFG_SITE_SECURE_URL,
make_canonical_urlargd({
'referer' : "%s/yourbaskets/modify%s" % (
CFG_SITE_SECURE_URL,
make_canonical_urlargd(argd, {})),
"ln" : argd['ln']}, {})))
user_info = collect_user_info(req)
if not user_info['precached_usebaskets']:
return page_not_authorized(req, "../", \
text = _("You are not authorized to use baskets."))
url = CFG_SITE_SECURE_URL
url += '/yourbaskets/display?category=%s&topic=%s&group=%i&bskid=%i&ln=%s' % \
(argd['category'], urllib.quote(argd['topic']), argd['group'], argd['bskid'], argd['ln'])
if argd['action'] == CFG_WEBBASKET_ACTIONS['DELETE']:
delete_record(uid, argd['bskid'], argd['recid'])
redirect_to_url(req, url)
elif argd['action'] == CFG_WEBBASKET_ACTIONS['UP']:
move_record(uid, argd['bskid'], argd['recid'], argd['action'])
redirect_to_url(req, url)
elif argd['action'] == CFG_WEBBASKET_ACTIONS['DOWN']:
move_record(uid, argd['bskid'], argd['recid'], argd['action'])
redirect_to_url(req, url)
elif argd['action'] == CFG_WEBBASKET_ACTIONS['COPY'] or \
argd['action'] == CFG_WEBBASKET_ACTIONS['MOVE']:
if(argd['action'] == CFG_WEBBASKET_ACTIONS['MOVE']):
title = _("Move record to basket")
from_bsk = argd['bskid']
else:
title = _("Copy record to basket")
from_bsk = 0
referer = get_referer(req)
(body, navtrail) = perform_request_add(uid=uid,
recids=argd['recid'],
copy=True,
move_from_basket=from_bsk,
referer=referer,
ln=argd['ln'])
if isGuestUser(uid):
body = create_guest_warning_box(argd['ln']) + body
else:
title = ''
body = ''
# warnings = [('WRN_WEBBASKET_UNDEFINED_ACTION',)]
navtrail = '<a class="navtrail" href="%s/youraccount/display?ln=%s">'\
'%s</a>'
navtrail %= (CFG_SITE_SECURE_URL, argd['ln'], _("Your Account"))
navtrail_end = create_basket_navtrail(uid=uid,
category=argd['category'],
topic=argd['topic'],
group=argd['group'],
bskid=argd['bskid'],
ln=argd['ln'])
# register event in webstat
basket_str = "%s (%d)" % (get_basket_name(argd['bskid']), argd['bskid'])
if user_info['email']:
user_str = "%s (%d)" % (user_info['email'], user_info['uid'])
else:
user_str = ""
try:
register_customevent("baskets", ["modify", basket_str, user_str])
except:
register_exception(suffix="Do the webstat tables exists? Try with 'webstatadmin --load-config'")
return page(title = title,
body = body,
navtrail = navtrail + navtrail_end,
uid = uid,
lastupdated = __lastupdated__,
language = argd['ln'],
req = req,
navmenuid = 'yourbaskets',
of = argd['of'],
secure_page_p=1)
def edit(self, req, form):
"""Edit basket interface"""
argd = wash_urlargd(form, {'bskid': (int, 0),
'groups': (list, []),
'topic': (str, ""),
'add_group': (str, ""),
'group_cancel': (str, ""),
'submit': (str, ""),
'cancel': (str, ""),
'delete': (str, ""),
'new_name': (str, ""),
'new_topic': (str, ""),
'new_topic_name': (str, ""),
'new_group': (str, ""),
'external': (str, ""),
'of' : (str, ''),
'ln': (str, CFG_SITE_LANG)})
uid = getUid(req)
if uid == -1 or CFG_ACCESS_CONTROL_LEVEL_SITE >= 1:
return page_not_authorized(req, "../yourbaskets/edit",
navmenuid = 'yourbaskets')
if isGuestUser(uid):
if not CFG_WEBSESSION_DIFFERENTIATE_BETWEEN_GUESTS:
return redirect_to_url(req, "%s/youraccount/login%s" % (
CFG_SITE_SECURE_URL,
make_canonical_urlargd({
'referer' : "%s/yourbaskets/edit%s" % (
CFG_SITE_SECURE_URL,
make_canonical_urlargd(argd, {})),
"ln" : argd['ln']}, {})))
_ = gettext_set_language(argd['ln'])
user_info = collect_user_info(req)
if not user_info['precached_usebaskets']:
return page_not_authorized(req, "../", \
text = _("You are not authorized to use baskets."))
if argd['cancel']:
url = CFG_SITE_SECURE_URL + '/yourbaskets/display?category=%s&topic=%s&ln=%s'
url %= (CFG_WEBBASKET_CATEGORIES['PRIVATE'],
urllib.quote(argd['topic']),
argd['ln'])
redirect_to_url(req, url)
elif argd['delete']:
url = CFG_SITE_SECURE_URL
url += '/yourbaskets/delete?bskid=%i&category=%s&topic=%s&ln=%s' % \
(argd['bskid'],
CFG_WEBBASKET_CATEGORIES['PRIVATE'],
urllib.quote(argd['topic']),
argd['ln'])
redirect_to_url(req, url)
elif argd['add_group'] and not(argd['new_group']):
body = perform_request_add_group(uid=uid,
bskid=argd['bskid'],
topic=argd['topic'],
ln=argd['ln'])
# warnings = []
elif (argd['add_group'] and argd['new_group']) or argd['group_cancel']:
if argd['add_group']:
perform_request_add_group(uid=uid,
bskid=argd['bskid'],
topic=argd['topic'],
group_id=argd['new_group'],
ln=argd['ln'])
body = perform_request_edit(uid=uid,
bskid=argd['bskid'],
topic=argd['topic'],
ln=argd['ln'])
elif argd['submit']:
body = perform_request_edit(uid=uid,
bskid=argd['bskid'],
topic=argd['topic'],
new_name=argd['new_name'],
new_topic=argd['new_topic'],
new_topic_name=argd['new_topic_name'],
groups=argd['groups'],
external=argd['external'],
ln=argd['ln'])
if argd['new_topic'] != "-1":
argd['topic'] = argd['new_topic']
url = CFG_SITE_SECURE_URL + '/yourbaskets/display?category=%s&topic=%s&ln=%s' % \
(CFG_WEBBASKET_CATEGORIES['PRIVATE'],
urllib.quote(argd['topic']),
argd['ln'])
redirect_to_url(req, url)
else:
body = perform_request_edit(uid=uid,
bskid=argd['bskid'],
topic=argd['topic'],
ln=argd['ln'])
navtrail = '<a class="navtrail" href="%s/youraccount/display?ln=%s">'\
'%s</a>'
navtrail %= (CFG_SITE_SECURE_URL, argd['ln'], _("Your Account"))
navtrail_end = create_basket_navtrail(
uid=uid,
category=CFG_WEBBASKET_CATEGORIES['PRIVATE'],
topic=argd['topic'],
group=0,
bskid=argd['bskid'],
ln=argd['ln'])
if isGuestUser(uid):
body = create_guest_warning_box(argd['ln']) + body
# register event in webstat
basket_str = "%s (%d)" % (get_basket_name(argd['bskid']), argd['bskid'])
if user_info['email']:
user_str = "%s (%d)" % (user_info['email'], user_info['uid'])
else:
user_str = ""
try:
register_customevent("baskets", ["edit", basket_str, user_str])
except:
register_exception(suffix="Do the webstat tables exists? Try with 'webstatadmin --load-config'")
return page(title = _("Edit basket"),
body = body,
navtrail = navtrail + navtrail_end,
uid = uid,
lastupdated = __lastupdated__,
language = argd['ln'],
req = req,
navmenuid = 'yourbaskets',
of = argd['of'],
secure_page_p=1)
def edit_topic(self, req, form):
"""Edit topic interface"""
argd = wash_urlargd(form, {'topic': (str, ""),
'submit': (str, ""),
'cancel': (str, ""),
'delete': (str, ""),
'new_name': (str, ""),
'of' : (str, ''),
'ln': (str, CFG_SITE_LANG)})
uid = getUid(req)
if uid == -1 or CFG_ACCESS_CONTROL_LEVEL_SITE >= 1:
return page_not_authorized(req, "../yourbaskets/edit",
navmenuid = 'yourbaskets')
if isGuestUser(uid):
if not CFG_WEBSESSION_DIFFERENTIATE_BETWEEN_GUESTS:
return redirect_to_url(req, "%s/youraccount/login%s" % (
CFG_SITE_SECURE_URL,
make_canonical_urlargd({
'referer' : "%s/yourbaskets/edit_topic%s" % (
CFG_SITE_SECURE_URL,
make_canonical_urlargd(argd, {})),
"ln" : argd['ln']}, {})))
_ = gettext_set_language(argd['ln'])
user_info = collect_user_info(req)
if not user_info['precached_usebaskets']:
return page_not_authorized(req, "../", \
text = _("You are not authorized to use baskets."))
if argd['cancel']:
url = CFG_SITE_SECURE_URL + '/yourbaskets/display?category=%s&ln=%s'
url %= (CFG_WEBBASKET_CATEGORIES['PRIVATE'], argd['ln'])
redirect_to_url(req, url)
elif argd['delete']:
url = CFG_SITE_SECURE_URL
url += '/yourbaskets/delete?bskid=%i&category=%s&topic=%s&ln=%s' % \
(argd['bskid'],
CFG_WEBBASKET_CATEGORIES['PRIVATE'],
urllib.quote(argd['topic']),
argd['ln'])
redirect_to_url(req, url)
elif argd['submit']:
body = perform_request_edit_topic(uid=uid,
topic=argd['topic'],
new_name=argd['new_name'],
ln=argd['ln'])
url = CFG_SITE_SECURE_URL + '/yourbaskets/display?category=%s&ln=%s' % \
(CFG_WEBBASKET_CATEGORIES['PRIVATE'], argd['ln'])
redirect_to_url(req, url)
else:
body = perform_request_edit_topic(uid=uid,
topic=argd['topic'],
ln=argd['ln'])
navtrail = '<a class="navtrail" href="%s/youraccount/display?ln=%s">'\
'%s</a>'
navtrail %= (CFG_SITE_SECURE_URL, argd['ln'], _("Your Account"))
navtrail_end = ""
#navtrail_end = create_basket_navtrail(
# uid=uid,
# category=CFG_WEBBASKET_CATEGORIES['PRIVATE'],
# topic=argd['topic'],
# group=0,
# ln=argd['ln'])
if isGuestUser(uid):
body = create_guest_warning_box(argd['ln']) + body
# register event in webstat
#basket_str = "%s (%d)" % (get_basket_name(argd['bskid']), argd['bskid'])
#if user_info['email']:
# user_str = "%s (%d)" % (user_info['email'], user_info['uid'])
#else:
# user_str = ""
#try:
# register_customevent("baskets", ["edit", basket_str, user_str])
#except:
# register_exception(suffix="Do the webstat tables exists? Try with 'webstatadmin --load-config'")
return page(title = _("Edit topic"),
body = body,
navtrail = navtrail + navtrail_end,
uid = uid,
lastupdated = __lastupdated__,
language = argd['ln'],
req = req,
navmenuid = 'yourbaskets',
of = argd['of'],
secure_page_p=1)
def create_basket(self, req, form):
"""Create basket interface"""
argd = wash_urlargd(form, {'new_basket_name': (str, ""),
'new_topic_name' : (str, ""),
'create_in_topic': (str, "-1"),
'topic' : (str, ""),
'recid' : (list, []),
'colid' : (int, -1),
'es_title' : (str, ''),
'es_desc' : (str, ''),
'es_url' : (str, ''),
'copy' : (int, 0),
'move_from_basket':(int, 0),
'referer' : (str, ''),
'of' : (str, ''),
'ln' : (str, CFG_SITE_LANG)})
uid = getUid(req)
if uid == -1 or CFG_ACCESS_CONTROL_LEVEL_SITE >= 1:
return page_not_authorized(req, "../yourbaskets/create_basket",
navmenuid = 'yourbaskets')
if isGuestUser(uid):
if not CFG_WEBSESSION_DIFFERENTIATE_BETWEEN_GUESTS:
return redirect_to_url(req, "%s/youraccount/login%s" % (
CFG_SITE_SECURE_URL,
make_canonical_urlargd({
'referer' : "%s/yourbaskets/create_basket%s" % (
CFG_SITE_SECURE_URL,
make_canonical_urlargd(argd, {})),
"ln" : argd['ln']}, {})))
user_info = collect_user_info(req)
_ = gettext_set_language(argd['ln'])
if not user_info['precached_usebaskets']:
return page_not_authorized(req, "../", \
text = _("You are not authorized to use baskets."))
if argd['new_basket_name'] and \
(argd['new_topic_name'] or argd['create_in_topic'] != "-1"):
(bskid, topic) = perform_request_create_basket(
req,
uid=uid,
new_basket_name=argd['new_basket_name'],
new_topic_name=argd['new_topic_name'],
create_in_topic=argd['create_in_topic'],
recids=argd['recid'],
colid=argd['colid'],
es_title=argd['es_title'],
es_desc=argd['es_desc'],
es_url=argd['es_url'],
copy=argd['copy'],
move_from_basket=argd['move_from_basket'],
referer=argd['referer'],
ln=argd['ln'])
# register event in webstat
basket_str = "%s ()" % argd['new_basket_name']
if user_info['email']:
user_str = "%s (%d)" % (user_info['email'], user_info['uid'])
else:
user_str = ""
try:
register_customevent("baskets", ["create_basket", basket_str, user_str])
except:
register_exception(suffix="Do the webstat tables exists? Try with 'webstatadmin --load-config'")
if ( argd['recid'] and argd['colid'] >= 0 ):
url = CFG_SITE_SECURE_URL + '/yourbaskets/add?category=%s&copy=%i&referer=%s&bskid=%i&colid=%i&move_from_basket=%i&recid=%s&wait=1&ln=%s'
url %= (CFG_WEBBASKET_CATEGORIES['PRIVATE'],
argd['copy'],
urllib.quote(argd['referer']),
bskid,
argd['colid'],
argd['move_from_basket'],
'&recid='.join(str(recid) for recid in argd['recid']),
argd['ln'])
elif ( argd['es_title'] and argd['es_desc'] and argd['es_url'] and argd['colid'] == -1 ):
# Adding NEW external record - this does not need 'move_from_basket' data
url = CFG_SITE_SECURE_URL + '/yourbaskets/add?category=%s&bskid=%i&colid=%i&es_title=%s&es_desc=%s&es_url=%s&wait=1&ln=%s'
url %= (CFG_WEBBASKET_CATEGORIES['PRIVATE'],
bskid,
argd['colid'],
urllib.quote(argd['es_title']),
urllib.quote(argd['es_desc']),
urllib.quote(argd['es_url']),
argd['ln'])
else:
url = CFG_SITE_SECURE_URL + '/yourbaskets/display?category=%s&topic=%s&ln=%s'
url %= (CFG_WEBBASKET_CATEGORIES['PRIVATE'],
urllib.quote(topic),
argd['ln'])
redirect_to_url(req, url)
else:
body = perform_request_create_basket(req,
uid=uid,
new_basket_name=argd['new_basket_name'],
new_topic_name=argd['new_topic_name'],
create_in_topic=argd['create_in_topic'],
topic=argd['topic'],
recids=argd['recid'],
colid=argd['colid'],
es_title=argd['es_title'],
es_desc=argd['es_desc'],
es_url=argd['es_url'],
copy=argd['copy'],
move_from_basket=argd['move_from_basket'],
referer=argd['referer'],
ln=argd['ln'])
navtrail = '<a class="navtrail" href="%s/youraccount/'\
'display?ln=%s">%s</a>'
navtrail %= (CFG_SITE_SECURE_URL, argd['ln'], _("Your Account"))
if isGuestUser(uid):
body = create_guest_warning_box(argd['ln']) + body
return page(title = _("Create basket"),
body = body,
navtrail = navtrail,
uid = uid,
lastupdated = __lastupdated__,
language = argd['ln'],
req = req,
navmenuid = 'yourbaskets',
of = argd['of'],
secure_page_p=1)
def display_public(self, req, form):
"""Display a public basket"""
argd = wash_urlargd(form, {'bskid': (int, 0),
'recid': (int, 0),
'of': (str, "hb"),
'ln': (str, CFG_SITE_LANG)})
_ = gettext_set_language(argd['ln'])
uid = getUid(req)
if uid == -1 or CFG_ACCESS_CONTROL_LEVEL_SITE >= 1:
return page_not_authorized(req, "../yourbaskets/display",
navmenuid = 'yourbaskets')
user_info = collect_user_info(req)
if not argd['bskid']:
(body, navtrail) = perform_request_list_public_baskets(uid)
title = _('List of public baskets')
# register event in webstat
if user_info['email']:
user_str = "%s (%d)" % (user_info['email'], user_info['uid'])
else:
user_str = ""
try:
register_customevent("baskets", ["list_public_baskets", "", user_str])
except:
register_exception(suffix="Do the webstat tables exists? Try with 'webstatadmin --load-config'")
else:
(body, dummy, navtrail) = perform_request_display_public(uid=uid,
selected_bskid=argd['bskid'],
selected_recid=argd['recid'],
of=argd['of'],
ln=argd['ln'])
title = _('Public basket')
# register event in webstat
basket_str = "%s (%d)" % (get_basket_name(argd['bskid']), argd['bskid'])
if user_info['email']:
user_str = "%s (%d)" % (user_info['email'], user_info['uid'])
else:
user_str = ""
try:
register_customevent("baskets", ["display_public", basket_str, user_str])
except:
register_exception(suffix="Do the webstat tables exists? Try with 'webstatadmin --load-config'")
rssurl = CFG_SITE_SECURE_URL + "/rss"
if argd['of'] != 'hb':
page_start(req, of=argd['of'])
if argd['of'].startswith('x'):
req.write(body)
page_end(req, of=argd['of'])
return
elif argd['bskid']:
rssurl = "%s/yourbaskets/display_public?&amp;bskid=%i&amp;of=xr" % \
(CFG_SITE_SECURE_URL,
argd['bskid'])
return page(title = title,
body = body,
navtrail = navtrail,
uid = uid,
lastupdated = __lastupdated__,
language = argd['ln'],
req = req,
navmenuid = 'yourbaskets',
of = argd['of'],
navtrail_append_title_p = 0,
secure_page_p=1,
rssurl=rssurl)
def list_public_baskets(self, req, form):
"""List of public baskets interface."""
argd = wash_urlargd(form, {'limit': (int, 1),
'sort': (str, 'name'),
'asc': (int, 1),
'of': (str, ''),
'ln': (str, CFG_SITE_LANG)})
_ = gettext_set_language(argd['ln'])
uid = getUid(req)
if uid == -1 or CFG_ACCESS_CONTROL_LEVEL_SITE == 2:
return page_not_authorized(req, "../yourbaskets/list_public_baskets",
navmenuid = 'yourbaskets')
user_info = collect_user_info(req)
nb_views_show = acc_authorize_action(user_info, 'runwebstatadmin')
nb_views_show_p = not(nb_views_show[0])
(body, navtrail) = perform_request_list_public_baskets(uid,
argd['limit'],
argd['sort'],
argd['asc'],
nb_views_show_p,
argd['ln'])
return page(title = _("List of public baskets"),
body = body,
navtrail = navtrail,
uid = uid,
lastupdated = __lastupdated__,
language = argd['ln'],
req = req,
navmenuid = 'yourbaskets',
of = argd['of'],
navtrail_append_title_p = 0,
secure_page_p=1)
def subscribe(self, req, form):
"""Subscribe to a basket pseudo-interface."""
argd = wash_urlargd(form, {'bskid': (int, 0),
'of': (str, 'hb'),
'ln': (str, CFG_SITE_LANG)})
_ = gettext_set_language(argd['ln'])
uid = getUid(req)
if uid == -1 or CFG_ACCESS_CONTROL_LEVEL_SITE == 2:
return page_not_authorized(req, "../yourbaskets/subscribe",
navmenuid = 'yourbaskets')
if isGuestUser(uid):
if not CFG_WEBSESSION_DIFFERENTIATE_BETWEEN_GUESTS:
return redirect_to_url(req, "%s/youraccount/login%s" % (
CFG_SITE_SECURE_URL,
make_canonical_urlargd({
'referer' : "%s/yourbaskets/subscribe%s" % (
CFG_SITE_SECURE_URL,
make_canonical_urlargd(argd, {})),
"ln" : argd['ln']}, {})))
user_info = collect_user_info(req)
if not user_info['precached_usebaskets']:
return page_not_authorized(req, "../", \
text = _("You are not authorized to use baskets."))
if not argd['bskid']:
(body, navtrail) = perform_request_list_public_baskets(uid)
title = _('List of public baskets')
else:
# TODO: Take care of XML output as shown below
#req.content_type = "text/xml"
#req.send_http_header()
#return perform_request_display_public(bskid=argd['bskid'], of=argd['of'], ln=argd['ln'])
subscribe_warnings_html = perform_request_subscribe(uid, argd['bskid'], argd['ln'])
(body, dummy, navtrail) = perform_request_display_public(uid=uid,
selected_bskid=argd['bskid'],
selected_recid=0,
of=argd['of'],
ln=argd['ln'])
#warnings.extend(subscribe_warnings)
body = subscribe_warnings_html + body
title = _('Public basket')
return page(title = title,
body = body,
navtrail = navtrail,
uid = uid,
lastupdated = __lastupdated__,
language = argd['ln'],
req = req,
navmenuid = 'yourbaskets',
of = argd['of'],
navtrail_append_title_p = 0,
secure_page_p=1)
def unsubscribe(self, req, form):
"""Unsubscribe from basket pseudo-interface."""
argd = wash_urlargd(form, {'bskid': (int, 0),
'of': (str, 'hb'),
'ln': (str, CFG_SITE_LANG)})
_ = gettext_set_language(argd['ln'])
uid = getUid(req)
if uid == -1 or CFG_ACCESS_CONTROL_LEVEL_SITE == 2:
return page_not_authorized(req, "../yourbaskets/unsubscribe",
navmenuid = 'yourbaskets')
if isGuestUser(uid):
if not CFG_WEBSESSION_DIFFERENTIATE_BETWEEN_GUESTS:
return redirect_to_url(req, "%s/youraccount/login%s" % (
CFG_SITE_SECURE_URL,
make_canonical_urlargd({
'referer' : "%s/yourbaskets/unsubscribe%s" % (
CFG_SITE_SECURE_URL,
make_canonical_urlargd(argd, {})),
"ln" : argd['ln']}, {})))
user_info = collect_user_info(req)
if not user_info['precached_usebaskets']:
return page_not_authorized(req, "../", \
text = _("You are not authorized to use baskets."))
if not argd['bskid']:
(body, navtrail) = perform_request_list_public_baskets(uid)
title = _('List of public baskets')
else:
# TODO: Take care of XML output as shown below
#req.content_type = "text/xml"
#req.send_http_header()
#return perform_request_display_public(bskid=argd['bskid'], of=argd['of'], ln=argd['ln'])
unsubscribe_warnings_html = perform_request_unsubscribe(uid, argd['bskid'], argd['ln'])
(body, dummy, navtrail) = perform_request_display_public(uid=uid,
selected_bskid=argd['bskid'],
selected_recid=0,
of=argd['of'],
ln=argd['ln'])
# warnings.extend(unsubscribe_warnings)
body = unsubscribe_warnings_html + body
title = _('Public basket')
return page(title = title,
body = body,
navtrail = navtrail,
uid = uid,
lastupdated = __lastupdated__,
language = argd['ln'],
req = req,
navmenuid = 'yourbaskets',
of = argd['of'],
navtrail_append_title_p = 0,
secure_page_p=1)
def write_public_note(self, req, form):
"""Write a comment (just interface for writing)"""
argd = wash_urlargd(form, {'bskid': (int, 0),
'recid': (int, 0),
'cmtid': (int, 0),
'of' : (str, ''),
'ln' : (str, CFG_SITE_LANG)})
_ = gettext_set_language(argd['ln'])
uid = getUid(req)
if uid == -1 or CFG_ACCESS_CONTROL_LEVEL_SITE >= 1:
return page_not_authorized(req, "../yourbaskets/write_public_note",
navmenuid = 'yourbaskets')
if isGuestUser(uid):
if not CFG_WEBSESSION_DIFFERENTIATE_BETWEEN_GUESTS:
return redirect_to_url(req, "%s/youraccount/login%s" % (
CFG_SITE_SECURE_URL,
make_canonical_urlargd({
'referer' : "%s/yourbaskets/write_public_note%s" % (
CFG_SITE_SECURE_URL,
make_canonical_urlargd(argd, {})),
"ln" : argd['ln']}, {})))
user_info = collect_user_info(req)
if not user_info['precached_usebaskets']:
return page_not_authorized(req, "../", \
text = _("You are not authorized to use baskets."))
(body, navtrail) = perform_request_write_public_note(uid=uid,
bskid=argd['bskid'],
recid=argd['recid'],
cmtid=argd['cmtid'],
ln=argd['ln'])
# register event in webstat
basket_str = "%s (%d)" % (get_basket_name(argd['bskid']), argd['bskid'])
if user_info['email']:
user_str = "%s (%d)" % (user_info['email'], user_info['uid'])
else:
user_str = ""
try:
register_customevent("baskets", ["write_public_note", basket_str, user_str])
except:
register_exception(suffix="Do the webstat tables exists? Try with 'webstatadmin --load-config'")
return page(title = _("Add a note"),
body = body,
navtrail = navtrail,
uid = uid,
lastupdated = __lastupdated__,
language = argd['ln'],
req = req,
navmenuid = 'yourbaskets',
of = argd['of'],
secure_page_p=1)
def save_public_note(self, req, form):
"""Save comment on record in basket"""
argd = wash_urlargd(form, {'bskid': (int, 0),
'recid': (int, 0),
'note_title': (str, ""),
'note_body': (str, ""),
'editor_type': (str, ""),
'of': (str, ''),
'ln': (str, CFG_SITE_LANG),
'reply_to': (str, 0)})
_ = gettext_set_language(argd['ln'])
uid = getUid(req)
if uid == -1 or CFG_ACCESS_CONTROL_LEVEL_SITE >= 1:
return page_not_authorized(req, "../yourbaskets/save_public_note",
navmenuid = 'yourbaskets')
if isGuestUser(uid):
if not CFG_WEBSESSION_DIFFERENTIATE_BETWEEN_GUESTS:
return redirect_to_url(req, "%s/youraccount/login%s" % (
CFG_SITE_SECURE_URL,
make_canonical_urlargd({
'referer' : "%s/yourbaskets/save_public_note%s" % (
CFG_SITE_SECURE_URL,
make_canonical_urlargd(argd, {})),
"ln" : argd['ln']}, {})))
user_info = collect_user_info(req)
if not user_info['precached_usebaskets']:
return page_not_authorized(req, "../", \
text = _("You are not authorized to use baskets."))
(body, navtrail) = perform_request_save_public_note(uid=uid,
bskid=argd['bskid'],
recid=argd['recid'],
note_title=argd['note_title'],
note_body=argd['note_body'],
editor_type=argd['editor_type'],
ln=argd['ln'],
reply_to=argd['reply_to'])
# TODO: do not stat event if save was not succussful
# register event in webstat
basket_str = "%s (%d)" % (get_basket_name(argd['bskid']), argd['bskid'])
if user_info['email']:
user_str = "%s (%d)" % (user_info['email'], user_info['uid'])
else:
user_str = ""
try:
register_customevent("baskets", ["save_public_note", basket_str, user_str])
except:
register_exception(suffix="Do the webstat tables exists? Try with 'webstatadmin --load-config'")
return page(title = _("Display item and notes"),
body = body,
navtrail = navtrail,
uid = uid,
lastupdated = __lastupdated__,
language = argd['ln'],
req = req,
navmenuid = 'yourbaskets',
of = argd['of'],
navtrail_append_title_p = 0,
secure_page_p=1)
diff --git a/invenio/legacy/webcomment/adminlib.py b/invenio/legacy/webcomment/adminlib.py
index 194bcb4d2..9fbcdf400 100644
--- a/invenio/legacy/webcomment/adminlib.py
+++ b/invenio/legacy/webcomment/adminlib.py
@@ -1,690 +1,690 @@
# -*- coding: utf-8 -*-
##
## This file is part of Invenio.
## Copyright (C) 2005, 2006, 2007, 2008, 2009, 2010, 2011, 2013 CERN.
##
## Invenio is free software; you can redistribute it and/or
## modify it under the terms of the GNU General Public License as
## published by the Free Software Foundation; either version 2 of the
## License, or (at your option) any later version.
##
## Invenio is distributed in the hope that it will be useful, but
## WITHOUT ANY WARRANTY; without even the implied warranty of
## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
## General Public License for more details.
##
## You should have received a copy of the GNU General Public License
## along with Invenio; if not, write to the Free Software Foundation, Inc.,
## 59 Temple Place, Suite 330, Boston, MA 02111-1307, USA.
__revision__ = "$Id$"
from invenio.config import CFG_SITE_LANG, CFG_SITE_URL, \
CFG_WEBCOMMENT_NB_REPORTS_BEFORE_SEND_EMAIL_TO_ADMIN
from invenio.modules.comments.config import InvenioWebCommentWarning
from invenio.modules.comments.api import query_get_comment, \
get_reply_order_cache_data
from invenio.utils.url import wash_url_argument
from invenio.dbquery import run_sql
from invenio.base.i18n import gettext_set_language, wash_language
-from invenio.errorlib import register_exception
+from invenio.ext.logging import register_exception
from invenio.webuser import get_user_info, collect_user_info, \
isUserAdmin
from invenio.access_control_engine import acc_authorize_action, \
acc_get_authorized_emails
from invenio.search_engine import perform_request_search
import invenio.template
webcomment_templates = invenio.template.load('webcomment')
def getnavtrail(previous = '', ln=CFG_SITE_LANG):
"""Get the navtrail"""
previous = wash_url_argument(previous, 'str')
ln = wash_language(ln)
_ = gettext_set_language(ln)
navtrail = """<a class="navtrail" href="%s/help/admin">%s</a> """ % (CFG_SITE_URL, _("Admin Area"))
navtrail = navtrail + previous
return navtrail
def get_nb_reviews(recID, count_deleted=True):
"""
Return number of reviews for the record recID
if count_deleted is True, deleted reviews are also counted
"""
query = """SELECT count(*)
FROM cmtRECORDCOMMENT c
WHERE c.id_bibrec = %s and c.star_score > 0
"""
if not count_deleted:
query += "and c.status != 'dm' and c.status != 'da'"
res = run_sql(query, (recID,))
return res[0][0]
def get_nb_comments(recID, count_deleted=True):
"""
Return number of comments for the record recID
if count_deleted is True, deleted comments are also counted
"""
query = """SELECT count(*)
FROM cmtRECORDCOMMENT c
WHERE c.id_bibrec = %s and c.star_score = 0
"""
if not count_deleted:
query += "and c.status != 'dm' and c.status != 'da'"
res = run_sql(query, (recID,))
return res[0][0]
def get_user_collections(req):
"""
Return collections for which the user is moderator
"""
user_info = collect_user_info(req)
res = []
collections = run_sql('SELECT name FROM collection')
for collection in collections:
collection_emails = acc_get_authorized_emails('moderatecomments', collection=collection[0])
if user_info['email'] in collection_emails or isUserAdmin(user_info):
res.append(collection[0])
return res
def perform_request_index(ln=CFG_SITE_LANG):
"""
"""
return webcomment_templates.tmpl_admin_index(ln=ln)
def perform_request_delete(comID=-1, recID=-1, uid=-1, reviews="", ln=CFG_SITE_LANG):
"""
"""
_ = gettext_set_language(ln)
from invenio.search_engine import record_exists
warnings = []
ln = wash_language(ln)
comID = wash_url_argument(comID, 'int')
recID = wash_url_argument(recID, 'int')
uid = wash_url_argument(uid, 'int')
# parameter reviews is deduced from comID when needed
if comID is not None and recID is not None and uid is not None:
if comID <= 0 and recID <= 0 and uid <= 0:
if comID != -1:
try:
raise InvenioWebCommentWarning(_('Invalid comment ID.'))
except InvenioWebCommentWarning, exc:
register_exception(stream='warning')
warnings.append((exc.message, ''))
#warnings.append(("WRN_WEBCOMMENT_ADMIN_INVALID_COMID",))
return webcomment_templates.tmpl_admin_delete_form(ln, warnings)
if comID > 0 and not recID > 0:
comment = query_get_comment(comID)
if comment:
# Figure out if this is a review or a comment
c_star_score = 5
if comment[c_star_score] > 0:
reviews = 1
else:
reviews = 0
return (perform_request_comments(ln=ln, comID=comID, recID=recID, reviews=reviews), None, warnings)
else:
try:
raise InvenioWebCommentWarning(_('Comment ID %s does not exist.') % comID)
except InvenioWebCommentWarning, exc:
register_exception(stream='warning')
warnings.append((exc.message, ''))
#warnings.append(('WRN_WEBCOMMENT_ADMIN_COMID_INEXISTANT', comID))
return webcomment_templates.tmpl_admin_delete_form(ln, warnings)
elif recID > 0:
if record_exists(recID):
comID = ''
reviews = wash_url_argument(reviews, 'int')
return (perform_request_comments(ln=ln, comID=comID, recID=recID, reviews=reviews), None, warnings)
else:
try:
raise InvenioWebCommentWarning(_('Record ID %s does not exist.') % comID)
except InvenioWebCommentWarning, exc:
register_exception(stream='warning')
warnings.append((exc.message, ''))
#warnings.append(('WRN_WEBCOMMENT_ADMIN_RECID_INEXISTANT', comID))
return webcomment_templates.tmpl_admin_delete_form(ln, warnings)
else:
return webcomment_templates.tmpl_admin_delete_form(ln, warnings)
else:
return webcomment_templates.tmpl_admin_delete_form(ln, warnings)
def perform_request_users(ln=CFG_SITE_LANG):
"""
"""
ln = wash_language(ln)
users_data = query_get_users_reported()
return webcomment_templates.tmpl_admin_users(ln=ln, users_data=users_data)
def query_get_users_reported():
"""
Get the users who have been reported at least once.
@return: tuple of ct, i.e. (ct, ct, ...)
where ct is a tuple (total_number_reported, total_comments_reported, total_reviews_reported,
total_nb_votes_yes_of_reported, total_nb_votes_total_of_reported, user_id, user_email, user_nickname)
sorted by order of ct having highest total_number_reported
"""
query1 = "SELECT c.nb_abuse_reports, c.nb_votes_yes, c.nb_votes_total, u.id, u.email, u.nickname, c.star_score " \
"FROM user AS u, cmtRECORDCOMMENT AS c " \
"WHERE c.id_user=u.id AND c.nb_abuse_reports > 0 " \
"ORDER BY u.id "
res1 = run_sql(query1)
if type(res1) is None:
return ()
users = {}
for cmt in res1:
uid = int(cmt[3])
if users.has_key(uid):
users[uid] = (users[uid][0]+int(cmt[0]), int(cmt[6])>0 and users[uid][1] or users[uid][1]+1, int(cmt[6])>0 and users[uid][2]+1 or users[uid][2],
users[uid][3]+int(cmt[1]), users[uid][4]+int(cmt[2]), int(cmt[3]), cmt[4], cmt[5])
else:
users[uid] = (int(cmt[0]), int(cmt[6])==0 and 1 or 0, int(cmt[6])>0 and 1 or 0, int(cmt[1]), int(cmt[2]), int(cmt[3]), cmt[4], cmt[5])
users = users.values()
users.sort()
users.reverse()
users = tuple(users)
return users
def perform_request_comments(req=None, ln=CFG_SITE_LANG, uid="", comID="", recID="", reviews=0, abuse=False, collection=""):
"""
Display the list of comments/reviews along with information about the comment.
Display the comment given by its ID, or the list of comments for
the given record ID.
If abuse == True, only list records reported as abuse.
If comID and recID are not provided, list all comments, or all
abused comments (check parameter 'abuse')
"""
ln = wash_language(ln)
uid = wash_url_argument(uid, 'int')
comID = wash_url_argument(comID, 'int')
recID = wash_url_argument(recID, 'int')
reviews = wash_url_argument(reviews, 'int')
collection = wash_url_argument(collection, 'str')
user_info = collect_user_info(req)
user_collections = ['Show all']
user_collections.extend(get_user_collections(req))
if collection and collection != 'Show all':
(auth_code, auth_msg) = acc_authorize_action(req, 'moderatecomments', collection=collection)
if auth_code:
return webcomment_templates.tmpl_admin_comments(ln=ln, uid=uid,
comID=comID,
recID=recID,
comment_data=None,
reviews=reviews,
error=1,
user_collections=user_collections,
collection=collection)
if collection:
if recID or uid:
comments = query_get_comments(uid, comID, recID, reviews, ln, abuse=abuse, user_collections=user_collections, collection=collection)
else:
comments = query_get_comments('', comID, '', reviews, ln, abuse=abuse, user_collections=user_collections, collection=collection)
else:
if recID or uid:
comments = query_get_comments(uid, comID, recID, reviews, ln, abuse=abuse, user_collections=user_collections, collection=user_collections[0])
else:
comments = query_get_comments('', comID, '', reviews, ln, abuse=abuse, user_collections=user_collections, collection=user_collections[0])
if comments:
return webcomment_templates.tmpl_admin_comments(ln=ln, uid=uid,
comID=comID,
recID=recID,
comment_data=comments,
reviews=reviews,
error=0,
user_collections=user_collections,
collection=collection)
else:
return webcomment_templates.tmpl_admin_comments(ln=ln, uid=uid,
comID=comID,
recID=recID,
comment_data=comments,
reviews=reviews,
error=2,
user_collections=user_collections,
collection=collection)
def perform_request_hot(req=None, ln=CFG_SITE_LANG, comments=1, top=10, collection="Show all"):
"""
Display the list of hottest comments/reviews along with information about the comment.
@param req: request object for obtaining user information
@param ln: language
@param comments: boolean activated if using comments, deactivated for reviews
@param top: specify number of results to be shown
@param collection: filter by collection
"""
ln = wash_language(ln)
comments = wash_url_argument(comments, 'int')
top = wash_url_argument(top, 'int')
collection = wash_url_argument(collection, 'str')
user_info = collect_user_info(req)
user_collections = ['Show all']
user_collections.extend(get_user_collections(req))
if collection and collection != 'Show all':
(auth_code, auth_msg) = acc_authorize_action(req, 'moderatecomments', collection=collection)
if auth_code:
return webcomment_templates.tmpl_admin_hot(ln=ln,
comment_data = None,
comments=comments, error=1, user_collections=user_collections, collection=collection)
if collection:
comments_retrieved = query_get_hot(comments, ln, top, user_collections, collection)
else:
comments_retrieved = query_get_hot(comments, ln, top, user_collections, user_collections[0])
if comments_retrieved:
return webcomment_templates.tmpl_admin_hot(ln=ln,
comment_data=comments_retrieved,
comments=comments, error=0, user_collections=user_collections, collection=collection)
else:
return webcomment_templates.tmpl_admin_hot(ln=ln,
comment_data=comments_retrieved,
comments=comments, error=2, user_collections=user_collections, collection=collection)
def perform_request_latest(req=None, ln=CFG_SITE_LANG, comments=1, top=10, collection=""):
"""
Display the list of latest comments/reviews along with information about the comment.
@param req: request object for obtaining user information
@param ln: language
@param comments: boolean activated if using comments, deactivated for reviews
@param top: Specify number of results to be shown
@param collection: filter by collection
"""
ln = wash_language(ln)
comments = wash_url_argument(comments, 'int')
top = wash_url_argument(top, 'int')
collection = wash_url_argument(collection, 'str')
user_info = collect_user_info(req)
user_collections = ['Show all']
user_collections.extend(get_user_collections(req))
if collection and collection != 'Show all':
(auth_code, auth_msg) = acc_authorize_action(req, 'moderatecomments', collection=collection)
if auth_code:
return webcomment_templates.tmpl_admin_latest(ln=ln,
comment_data=None,
comments=comments, error=1, user_collections=user_collections, collection=collection)
if collection:
comments_retrieved = query_get_latest(comments, ln, top, user_collections, collection)
else:
comments_retrieved = query_get_latest(comments, ln, top, user_collections, user_collections[0])
if comments_retrieved:
return webcomment_templates.tmpl_admin_latest(ln=ln,
comment_data=comments_retrieved,
comments=comments, error=0, user_collections=user_collections, collection=collection)
else:
return webcomment_templates.tmpl_admin_latest(ln=ln,
comment_data=comments_retrieved,
comments=comments, error=2, user_collections=user_collections, collection=collection)
def perform_request_undel_single_com(ln=CFG_SITE_LANG, id=id):
"""
Mark comment referenced by id as active
"""
ln = wash_language(ln)
id = wash_url_argument(id, 'int')
return query_undel_single_comment(id)
def query_get_comments(uid, cmtID, recID, reviews, ln, abuse=False, user_collections='', collection=''):
"""
private function
@param user_collections: allowed collections for the user
@param collection: collection to display
@return tuple of comment where comment is
tuple (nickname, uid, date_creation, body, id, status) if ranking disabled or
tuple (nickname, uid, date_creation, body, nb_votes_yes, nb_votes_total, star_score, title, id, status)
"""
qdict = {'id': 0, 'id_bibrec': 1, 'uid': 2, 'date_creation': 3, 'body': 4,
'status': 5, 'nb_abuse_reports': 6, 'nb_votes_yes': 7, 'nb_votes_total': 8,
'star_score': 9, 'title': 10, 'email': -2, 'nickname': -1}
query = """SELECT c.id, c.id_bibrec, c.id_user,
DATE_FORMAT(c.date_creation, '%%Y-%%m-%%d %%H:%%i:%%S'), c.body,
c.status, c.nb_abuse_reports,
%s
u.email, u.nickname
FROM cmtRECORDCOMMENT c LEFT JOIN user u
ON c.id_user = u.id
%s
ORDER BY c.nb_abuse_reports DESC, c.nb_votes_yes DESC, c.date_creation
"""
select_fields = reviews and 'c.nb_votes_yes, c.nb_votes_total, c.star_score, c.title,' or ''
where_clause = "WHERE " + (reviews and 'c.star_score>0' or 'c.star_score=0')
if uid:
where_clause += ' AND c.id_user=%i' % uid
if recID:
where_clause += ' AND c.id_bibrec=%i' % recID
if cmtID:
where_clause += ' AND c.id=%i' % cmtID
if abuse:
where_clause += ' AND c.nb_abuse_reports>0'
res = run_sql(query % (select_fields, where_clause))
collection_records = []
if collection == 'Show all':
for collection_name in user_collections:
collection_records.extend(perform_request_search(cc=collection_name))
else:
collection_records.extend(perform_request_search(cc=collection))
output = []
for qtuple in res:
if qtuple[qdict['id_bibrec']] in collection_records:
nickname = qtuple[qdict['nickname']] or get_user_info(qtuple[qdict['uid']], ln)[2]
if reviews:
comment_tuple = (nickname,
qtuple[qdict['uid']],
qtuple[qdict['date_creation']],
qtuple[qdict['body']],
qtuple[qdict['nb_votes_yes']],
qtuple[qdict['nb_votes_total']],
qtuple[qdict['star_score']],
qtuple[qdict['title']],
qtuple[qdict['id']],
qtuple[qdict['status']])
else:
comment_tuple = (nickname,
qtuple[qdict['uid']],
qtuple[qdict['date_creation']],
qtuple[qdict['body']],
qtuple[qdict['id']],
qtuple[qdict['status']])
general_infos_tuple = (nickname,
qtuple[qdict['uid']],
qtuple[qdict['email']],
qtuple[qdict['id']],
qtuple[qdict['id_bibrec']],
qtuple[qdict['nb_abuse_reports']])
out_tuple = (comment_tuple, general_infos_tuple)
output.append(out_tuple)
return tuple(output)
def query_get_hot(comments, ln, top, user_collections, collection):
"""
private function
@param comments: boolean indicating if we want to retrieve comments or reviews
@param ln: language
@param top: number of results to display
@param user_collections: allowed collections for the user
@param collection: collection to display
@return: tuple (id_bibrec, date_last_comment, users, count)
"""
qdict = {'id_bibrec': 0, 'date_last_comment': 1, 'users': 2, 'total_count': 3}
query = """SELECT c.id_bibrec,
DATE_FORMAT(max(c.date_creation), '%%Y-%%m-%%d %%H:%%i:%%S') as date_last_comment,
count(distinct c.id_user) as users,
count(*) as count
FROM cmtRECORDCOMMENT c
%s
GROUP BY c.id_bibrec
ORDER BY count(*) DESC
LIMIT %s
"""
where_clause = "WHERE " + (comments and 'c.star_score=0' or 'c.star_score>0') + ' AND c.status="ok" AND c.nb_abuse_reports < %s' % CFG_WEBCOMMENT_NB_REPORTS_BEFORE_SEND_EMAIL_TO_ADMIN
res = run_sql(query % (where_clause, top))
collection_records = []
if collection == 'Show all':
for collection_name in user_collections:
collection_records.extend(perform_request_search(cc=collection_name))
else:
collection_records.extend(perform_request_search(cc=collection))
output = []
for qtuple in res:
if qtuple[qdict['id_bibrec']] in collection_records:
general_infos_tuple = (qtuple[qdict['id_bibrec']],
qtuple[qdict['date_last_comment']],
qtuple[qdict['users']],
qtuple[qdict['total_count']])
output.append(general_infos_tuple)
return tuple(output)
def query_get_latest(comments, ln, top, user_collections, collection):
"""
private function
@param comments: boolean indicating if we want to retrieve comments or reviews
@param ln: language
@param top: number of results to display
@param user_collections: allowed collections for the user
@param collection: collection to display
@return tuple of comment where comment is
tuple (nickname, uid, date_creation, body, id) if latest comments or
tuple (nickname, uid, date_creation, body, star_score, id) if latest reviews
"""
qdict = {'id': 0, 'id_bibrec': 1, 'uid': 2, 'date_creation': 3, 'body': 4,
'nb_abuse_reports': 5, 'star_score': 6, 'nickname': -1}
query = """SELECT c.id, c.id_bibrec, c.id_user,
DATE_FORMAT(c.date_creation, '%%Y-%%m-%%d %%H:%%i:%%S'), c.body,
c.nb_abuse_reports,
%s
u.nickname
FROM cmtRECORDCOMMENT c LEFT JOIN user u
ON c.id_user = u.id
%s
ORDER BY c.date_creation DESC
LIMIT %s
"""
select_fields = not comments and 'c.star_score, ' or ''
where_clause = "WHERE " + (comments and 'c.star_score=0' or 'c.star_score>0') + ' AND c.status="ok" AND c.nb_abuse_reports < %s' % CFG_WEBCOMMENT_NB_REPORTS_BEFORE_SEND_EMAIL_TO_ADMIN
res = run_sql(query % (select_fields, where_clause, top))
collection_records = []
if collection == 'Show all':
for collection_name in user_collections:
collection_records.extend(perform_request_search(cc=collection_name))
else:
collection_records.extend(perform_request_search(cc=collection))
output = []
for qtuple in res:
if qtuple[qdict['id_bibrec']] in collection_records:
nickname = qtuple[qdict['nickname']] or get_user_info(qtuple[qdict['uid']], ln)[2]
if not comments:
comment_tuple = (nickname,
qtuple[qdict['uid']],
qtuple[qdict['date_creation']],
qtuple[qdict['body']],
qtuple[qdict['star_score']],
qtuple[qdict['id']])
else:
comment_tuple = (nickname,
qtuple[qdict['uid']],
qtuple[qdict['date_creation']],
qtuple[qdict['body']],
qtuple[qdict['id']])
general_infos_tuple = (nickname,
qtuple[qdict['uid']],
qtuple[qdict['id']],
qtuple[qdict['id_bibrec']],
qtuple[qdict['nb_abuse_reports']])
out_tuple = (comment_tuple, general_infos_tuple)
output.append(out_tuple)
return tuple(output)
def perform_request_del_com(ln=CFG_SITE_LANG, comIDs=[]):
"""
private function
Delete the comments and say whether successful or not
@param ln: language
@param comIDs: list of comment ids
"""
ln = wash_language(ln)
comIDs = wash_url_argument(comIDs, 'list')
# map ( fct, list, arguments of function)
comIDs = map(wash_url_argument, comIDs, ('int '*len(comIDs)).split(' ')[:-1])
if not comIDs:
comIDs = map(coerce, comIDs, ('0 '*len(comIDs)).split(' ')[:-1])
return webcomment_templates.tmpl_admin_del_com(del_res=comIDs, ln=ln)
del_res = []
for comID in comIDs:
del_res.append((comID, query_delete_comment_mod(comID)))
return webcomment_templates.tmpl_admin_del_com(del_res=del_res, ln=ln)
def perform_request_undel_com(ln=CFG_SITE_LANG, comIDs=[]):
"""
private function
Undelete the comments and say whether successful or not
@param ln: language
@param comIDs: list of comment ids
"""
ln = wash_language(ln)
comIDs = wash_url_argument(comIDs, 'list')
# map ( fct, list, arguments of function)
comIDs = map(wash_url_argument, comIDs, ('int '*len(comIDs)).split(' ')[:-1])
if not comIDs:
comIDs = map(coerce, comIDs, ('0 '*len(comIDs)).split(' ')[:-1])
return webcomment_templates.tmpl_admin_undel_com(del_res=comIDs, ln=ln)
del_res = []
for comID in comIDs:
del_res.append((comID, query_undel_single_comment(comID)))
return webcomment_templates.tmpl_admin_undel_com(del_res=del_res, ln=ln)
def perform_request_del_single_com_mod(ln=CFG_SITE_LANG, id=id):
"""
private function
Delete a single comment requested by a moderator
@param ln: language
@param id: comment id to be deleted
"""
ln = wash_language(ln)
id = wash_url_argument(id, 'int')
return query_delete_comment_mod(id)
def perform_request_del_single_com_auth(ln=CFG_SITE_LANG, id=id):
"""
private function
Delete a single comment requested by the author
@param ln: language
@param id: comment id to be deleted
"""
ln = wash_language(ln)
id = wash_url_argument(id, 'int')
return query_delete_comment_auth(id)
def perform_request_unreport_single_com(ln=CFG_SITE_LANG, id=""):
"""
private function
Unreport a single comment
@param ln: language
@param id: comment id to be deleted
"""
ln = wash_language(ln)
id = wash_url_argument(id, 'int')
return query_suppress_abuse_report(id)
def suppress_abuse_report(ln=CFG_SITE_LANG, comIDs=[]):
"""
private function
suppress the abuse reports for the given comIDs.
@param ln: language
@param comIDs: list of ids to suppress attached reports.
"""
ln = wash_language(ln)
comIDs = wash_url_argument(comIDs, 'list')
# map ( fct, list, arguments of function)
comIDs = map(wash_url_argument, comIDs, ('int '*len(comIDs)).split(' ')[:-1])
if not comIDs:
comIDs = map(coerce, comIDs, ('0 '*len(comIDs)).split(' ')[:-1])
return webcomment_templates.tmpl_admin_del_com(del_res=comIDs, ln=ln)
del_res = []
for comID in comIDs:
del_res.append((comID, query_suppress_abuse_report(comID)))
return webcomment_templates.tmpl_admin_suppress_abuse_report(del_res=del_res, ln=ln)
def query_suppress_abuse_report(comID):
""" suppress abuse report for a given comment
@return: integer 1 if successful, integer 0 if not
"""
query = "UPDATE cmtRECORDCOMMENT SET nb_abuse_reports=0, status='ap' WHERE id=%s"
params = (comID,)
res = run_sql(query, params)
return int(res)
def query_delete_comment_mod(comID):
"""
delete comment with id comID
@return: integer 1 if successful, integer 0 if not
"""
query1 = "UPDATE cmtRECORDCOMMENT SET status='dm' WHERE id=%s"
params1 = (comID,)
res1 = run_sql(query1, params1)
return int(res1)
def query_delete_comment_auth(comID):
"""
delete comment with id comID
@return: integer 1 if successful, integer 0 if not
"""
query1 = "UPDATE cmtRECORDCOMMENT SET status='da' WHERE id=%s"
params1 = (comID,)
res1 = run_sql(query1, params1)
return int(res1)
def query_undel_single_comment(comID):
"""
undelete comment with id comID
@return: integer 1 if successful, integer 0 if not
"""
query = "UPDATE cmtRECORDCOMMENT SET status='ok' WHERE id=%s"
params = (comID,)
res = run_sql(query, params)
return int(res)
def check_user_is_author(user_id, com_id):
""" Check if the user is the author of the given comment """
res = run_sql("SELECT id, id_user FROM cmtRECORDCOMMENT WHERE id=%s and id_user=%s", (str(com_id), str(user_id)))
if res:
return 1
return 0
def migrate_comments_populate_threads_index():
"""
Fill in the `reply_order_cached_data' columns in cmtRECORDCOMMENT and
bskRECORDCOMMENT tables with adequate values so that thread
are displayed correctly.
"""
# Update WebComment comments
res = run_sql("SELECT id FROM cmtRECORDCOMMENT WHERE reply_order_cached_data is NULL")
for row in res:
reply_order_cached_data = get_reply_order_cache_data(row[0])
run_sql("UPDATE cmtRECORDCOMMENT set reply_order_cached_data=%s WHERE id=%s",
(reply_order_cached_data, row[0]))
# Update WebBasket comments
res = run_sql("SELECT id FROM bskRECORDCOMMENT WHERE reply_order_cached_data is NULL")
for row in res:
reply_order_cached_data = get_reply_order_cache_data(row[0])
run_sql("UPDATE cmtRECORDCOMMENT set reply_order_cached_data=%s WHERE id=%s",
(reply_order_cached_data, row[0]))
diff --git a/invenio/legacy/webcomment/webinterface.py b/invenio/legacy/webcomment/webinterface.py
index 85070c8c4..273a42e1f 100644
--- a/invenio/legacy/webcomment/webinterface.py
+++ b/invenio/legacy/webcomment/webinterface.py
@@ -1,930 +1,930 @@
# -*- coding: utf-8 -*-
## Comments and reviews for records.
## This file is part of Invenio.
## Copyright (C) 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2012, 2013 CERN.
##
## Invenio is free software; you can redistribute it and/or
## modify it under the terms of the GNU General Public License as
## published by the Free Software Foundation; either version 2 of the
## License, or (at your option) any later version.
##
## Invenio is distributed in the hope that it will be useful, but
## WITHOUT ANY WARRANTY; without even the implied warranty of
## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
## General Public License for more details.
##
## You should have received a copy of the GNU General Public License
## along with Invenio; if not, write to the Free Software Foundation, Inc.,
## 59 Temple Place, Suite 330, Boston, MA 02111-1307, USA.
""" Comments and reviews for records: web interface """
__lastupdated__ = """$Date$"""
__revision__ = """$Id$"""
import cgi
from invenio.modules.comments.api import check_recID_is_in_range, \
perform_request_display_comments_or_remarks, \
perform_request_add_comment_or_remark, \
perform_request_vote, \
perform_request_report, \
subscribe_user_to_discussion, \
unsubscribe_user_from_discussion, \
get_user_subscription_to_discussion, \
check_user_can_attach_file_to_comments, \
check_user_can_view_comments, \
check_user_can_send_comments, \
check_user_can_view_comment, \
query_get_comment, \
toggle_comment_visibility, \
check_comment_belongs_to_record, \
is_comment_deleted, \
perform_display_your_comments
from invenio.config import \
CFG_TMPDIR, \
CFG_SITE_LANG, \
CFG_SITE_URL, \
CFG_SITE_SECURE_URL, \
CFG_PREFIX, \
CFG_SITE_NAME, \
CFG_SITE_NAME_INTL, \
CFG_WEBCOMMENT_ALLOW_COMMENTS,\
CFG_WEBCOMMENT_ALLOW_REVIEWS, \
CFG_WEBCOMMENT_USE_MATHJAX_IN_COMMENTS, \
CFG_SITE_RECORD, \
CFG_WEBCOMMENT_MAX_ATTACHMENT_SIZE, \
CFG_WEBCOMMENT_MAX_ATTACHED_FILES, \
CFG_ACCESS_CONTROL_LEVEL_SITE
from invenio.webuser import getUid, page_not_authorized, isGuestUser, collect_user_info
from invenio.webpage import page, pageheaderonly, pagefooteronly
from invenio.search_engine import create_navtrail_links, \
guess_primary_collection_of_a_record, \
get_colID
from invenio.utils.url import redirect_to_url, \
make_canonical_urlargd
from invenio.htmlutils import get_mathjax_header
-from invenio.errorlib import register_exception
+from invenio.ext.logging import register_exception
from invenio.base.i18n import gettext_set_language
from invenio.webinterface_handler import wash_urlargd, WebInterfaceDirectory
from invenio.websearchadminlib import get_detailed_page_tabs, get_detailed_page_tabs_counts
from invenio.access_control_config import VIEWRESTRCOLL
from invenio.access_control_mailcookie import \
mail_cookie_create_authorize_action, \
mail_cookie_create_common, \
mail_cookie_check_common, \
InvenioWebAccessMailCookieDeletedError, \
InvenioWebAccessMailCookieError
from invenio.modules.comments.config import \
InvenioWebCommentError, \
InvenioWebCommentWarning
import invenio.template
webstyle_templates = invenio.template.load('webstyle')
websearch_templates = invenio.template.load('websearch')
import os
from invenio.utils import apache
from invenio.bibdocfile import \
stream_file, \
decompose_file, \
propose_next_docname
class WebInterfaceCommentsPages(WebInterfaceDirectory):
"""Defines the set of /comments pages."""
_exports = ['', 'display', 'add', 'vote', 'report', 'index', 'attachments',
'subscribe', 'unsubscribe', 'toggle']
def __init__(self, recid=-1, reviews=0):
self.recid = recid
self.discussion = reviews # 0:comments, 1:reviews
self.attachments = WebInterfaceCommentsFiles(recid, reviews)
def index(self, req, form):
"""
Redirects to display function
"""
return self.display(req, form)
def display(self, req, form):
"""
Display comments (reviews if enabled) associated with record having id recid where recid>0.
This function can also be used to display remarks associated with basket having id recid where recid<-99.
@param ln: language
@param recid: record id, integer
@param do: display order hh = highest helpful score, review only
lh = lowest helpful score, review only
hs = highest star score, review only
ls = lowest star score, review only
od = oldest date
nd = newest date
@param ds: display since all= no filtering by date
nd = n days ago
nw = n weeks ago
nm = n months ago
ny = n years ago
where n is a single digit integer between 0 and 9
@param nb: number of results per page
@param p: results page
@param voted: boolean, active if user voted for a review, see vote function
@param reported: int, active if user reported a certain comment/review, see report function
@param reviews: boolean, enabled for reviews, disabled for comments
@param subscribed: int, 1 if user just subscribed to discussion, -1 if unsubscribed
@return the full html page.
"""
argd = wash_urlargd(form, {'do': (str, "od"),
'ds': (str, "all"),
'nb': (int, 100),
'p': (int, 1),
'voted': (int, -1),
'reported': (int, -1),
'subscribed': (int, 0),
'cmtgrp': (list, ["latest"]) # 'latest' is now a reserved group/round name
})
_ = gettext_set_language(argd['ln'])
uid = getUid(req)
user_info = collect_user_info(req)
(auth_code, auth_msg) = check_user_can_view_comments(user_info, self.recid)
if auth_code and user_info['email'] == 'guest':
cookie = mail_cookie_create_authorize_action(VIEWRESTRCOLL, {'collection' : guess_primary_collection_of_a_record(self.recid)})
target = CFG_SITE_SECURE_URL + '/youraccount/login' + \
make_canonical_urlargd({'action': cookie, 'ln' : argd['ln'], 'referer' : \
CFG_SITE_SECURE_URL + user_info['uri']}, {})
return redirect_to_url(req, target, norobot=True)
elif auth_code:
return page_not_authorized(req, "../", \
text = auth_msg)
can_send_comments = False
(auth_code, auth_msg) = check_user_can_send_comments(user_info, self.recid)
if not auth_code:
can_send_comments = True
can_attach_files = False
(auth_code, auth_msg) = check_user_can_attach_file_to_comments(user_info, self.recid)
if not auth_code and (user_info['email'] != 'guest'):
can_attach_files = True
subscription = get_user_subscription_to_discussion(self.recid, uid)
if subscription == 1:
user_is_subscribed_to_discussion = True
user_can_unsubscribe_from_discussion = True
elif subscription == 2:
user_is_subscribed_to_discussion = True
user_can_unsubscribe_from_discussion = False
else:
user_is_subscribed_to_discussion = False
user_can_unsubscribe_from_discussion = False
unordered_tabs = get_detailed_page_tabs(get_colID(guess_primary_collection_of_a_record(self.recid)),
self.recid,
ln=argd['ln'])
ordered_tabs_id = [(tab_id, values['order']) for (tab_id, values) in unordered_tabs.iteritems()]
ordered_tabs_id.sort(lambda x, y: cmp(x[1], y[1]))
link_ln = ''
if argd['ln'] != CFG_SITE_LANG:
link_ln = '?ln=%s' % argd['ln']
tabs = [(unordered_tabs[tab_id]['label'], \
'%s/record/%s/%s%s' % (CFG_SITE_URL, self.recid, tab_id, link_ln), \
tab_id in ['comments', 'reviews'],
unordered_tabs[tab_id]['enabled']) \
for (tab_id, order) in ordered_tabs_id
if unordered_tabs[tab_id]['visible'] == True]
tabs_counts = get_detailed_page_tabs_counts(self.recid)
citedbynum = tabs_counts['Citations']
references = tabs_counts['References']
discussions = tabs_counts['Discussions']
top = webstyle_templates.detailed_record_container_top(self.recid,
tabs,
argd['ln'],
citationnum=citedbynum,
referencenum=references,
discussionnum=discussions)
bottom = webstyle_templates.detailed_record_container_bottom(self.recid,
tabs,
argd['ln'])
#display_comment_rounds = [cmtgrp for cmtgrp in argd['cmtgrp'] if cmtgrp.isdigit() or cmtgrp == "all" or cmtgrp == "-1"]
display_comment_rounds = argd['cmtgrp']
check_warnings = []
(ok, problem) = check_recID_is_in_range(self.recid, check_warnings, argd['ln'])
if ok:
body = perform_request_display_comments_or_remarks(req=req, recID=self.recid,
display_order=argd['do'],
display_since=argd['ds'],
nb_per_page=argd['nb'],
page=argd['p'],
ln=argd['ln'],
voted=argd['voted'],
reported=argd['reported'],
subscribed=argd['subscribed'],
reviews=self.discussion,
uid=uid,
can_send_comments=can_send_comments,
can_attach_files=can_attach_files,
user_is_subscribed_to_discussion=user_is_subscribed_to_discussion,
user_can_unsubscribe_from_discussion=user_can_unsubscribe_from_discussion,
display_comment_rounds=display_comment_rounds
)
title, description, keywords = websearch_templates.tmpl_record_page_header_content(req, self.recid, argd['ln'])
navtrail = create_navtrail_links(cc=guess_primary_collection_of_a_record(self.recid), ln=argd['ln'])
if navtrail:
navtrail += ' &gt; '
navtrail += '<a class="navtrail" href="%s/%s/%s?ln=%s">'% (CFG_SITE_URL, CFG_SITE_RECORD, self.recid, argd['ln'])
navtrail += cgi.escape(title)
navtrail += '</a>'
navtrail += ' &gt; <a class="navtrail">%s</a>' % (self.discussion==1 and _("Reviews") or _("Comments"))
mathjaxheader = ''
if CFG_WEBCOMMENT_USE_MATHJAX_IN_COMMENTS:
mathjaxheader = get_mathjax_header(req.is_https())
jqueryheader = '''
<script src="%(CFG_SITE_URL)s/js/jquery.MultiFile.pack.js" type="text/javascript" language="javascript"></script>
''' % {'CFG_SITE_URL': CFG_SITE_URL}
return pageheaderonly(title=title,
navtrail=navtrail,
uid=uid,
verbose=1,
metaheaderadd = mathjaxheader + jqueryheader,
req=req,
language=argd['ln'],
navmenuid='search',
navtrail_append_title_p=0) + \
websearch_templates.tmpl_search_pagestart(argd['ln']) + \
top + body + bottom + \
websearch_templates.tmpl_search_pageend(argd['ln']) + \
pagefooteronly(lastupdated=__lastupdated__, language=argd['ln'], req=req)
else:
return page(title=_("Record Not Found"),
body=problem,
uid=uid,
verbose=1,
req=req,
language=argd['ln'],
navmenuid='search')
# Return the same page wether we ask for /CFG_SITE_RECORD/123 or /CFG_SITE_RECORD/123/
__call__ = index
def add(self, req, form):
"""
Add a comment (review) to record with id recid where recid>0
Also works for adding a remark to basket with id recid where recid<-99
@param ln: languange
@param recid: record id
@param action: 'DISPLAY' to display add form
'SUBMIT' to submit comment once form is filled
'REPLY' to reply to an already existing comment
@param msg: the body of the comment/review or remark
@param score: star score of the review
@param note: title of the review
@param comid: comment id, needed for replying
@param editor_type: the type of editor used for submitting the
comment: 'textarea', 'ckeditor'.
@param subscribe: if set, subscribe user to receive email
notifications when new comment are added to
this discussion
@return the full html page.
"""
argd = wash_urlargd(form, {'action': (str, "DISPLAY"),
'msg': (str, ""),
'note': (str, ''),
'score': (int, 0),
'comid': (int, 0),
'editor_type': (str, ""),
'subscribe': (str, ""),
'cookie': (str, "")
})
_ = gettext_set_language(argd['ln'])
actions = ['DISPLAY', 'REPLY', 'SUBMIT']
uid = getUid(req)
# Is site ready to accept comments?
if uid == -1 or (not CFG_WEBCOMMENT_ALLOW_COMMENTS and not CFG_WEBCOMMENT_ALLOW_REVIEWS):
return page_not_authorized(req, "../comments/add",
navmenuid='search')
# Is user allowed to post comment?
user_info = collect_user_info(req)
(auth_code_1, auth_msg_1) = check_user_can_view_comments(user_info, self.recid)
(auth_code_2, auth_msg_2) = check_user_can_send_comments(user_info, self.recid)
if isGuestUser(uid):
cookie = mail_cookie_create_authorize_action(VIEWRESTRCOLL, {'collection' : guess_primary_collection_of_a_record(self.recid)})
# Save user's value in cookie, so that these "POST"
# parameters are not lost during login process
msg_cookie = mail_cookie_create_common('comment_msg',
{'msg': argd['msg'],
'note': argd['note'],
'score': argd['score'],
'editor_type': argd['editor_type'],
'subscribe': argd['subscribe']},
onetime=True)
target = CFG_SITE_SECURE_URL + '/youraccount/login' + \
make_canonical_urlargd({'action': cookie, 'ln' : argd['ln'], 'referer' : \
CFG_SITE_SECURE_URL + user_info['uri'] + '&cookie=' + msg_cookie}, {})
return redirect_to_url(req, target, norobot=True)
elif (auth_code_1 or auth_code_2):
return page_not_authorized(req, "../", \
text = auth_msg_1 + auth_msg_2)
if argd['comid']:
# If replying to a comment, are we on a record that
# matches the original comment user is replying to?
if not check_comment_belongs_to_record(argd['comid'], self.recid):
return page_not_authorized(req, "../", \
text = _("Specified comment does not belong to this record"))
# Is user trying to reply to a restricted comment? Make
# sure user has access to it. We will then inherit its
# restriction for the new comment
(auth_code, auth_msg) = check_user_can_view_comment(user_info, argd['comid'])
if auth_code:
return page_not_authorized(req, "../", \
text = _("You do not have access to the specified comment"))
# Is user trying to reply to a deleted comment? If so, we
# let submitted comment go (to not lose possibly submitted
# content, if comment is submitted while original is
# deleted), but we "reset" comid to make sure that for
# action 'REPLY' the original comment is not included in
# the reply
if is_comment_deleted(argd['comid']):
argd['comid'] = 0
user_info = collect_user_info(req)
can_attach_files = False
(auth_code, auth_msg) = check_user_can_attach_file_to_comments(user_info, self.recid)
if not auth_code and (user_info['email'] != 'guest'):
can_attach_files = True
warning_msgs = [] # list of warning tuples (warning_text, warning_color)
added_files = {}
if can_attach_files:
# User is allowed to attach files. Process the files
file_too_big = False
formfields = form.get('commentattachment[]', [])
if not hasattr(formfields, "__getitem__"): # A single file was uploaded
formfields = [formfields]
for formfield in formfields[:CFG_WEBCOMMENT_MAX_ATTACHED_FILES]:
if hasattr(formfield, "filename") and formfield.filename:
filename = formfield.filename
dir_to_open = os.path.join(CFG_TMPDIR, 'webcomment', str(uid))
try:
assert(dir_to_open.startswith(CFG_TMPDIR))
except AssertionError:
register_exception(req=req,
prefix='User #%s tried to upload file to forbidden location: %s' \
% (uid, dir_to_open))
if not os.path.exists(dir_to_open):
try:
os.makedirs(dir_to_open)
except:
register_exception(req=req, alert_admin=True)
## Before saving the file to disc, wash the filename (in particular
## washing away UNIX and Windows (e.g. DFS) paths):
filename = os.path.basename(filename.split('\\')[-1])
filename = filename.strip()
if filename != "":
# Check that file does not already exist
n = 1
while os.path.exists(os.path.join(dir_to_open, filename)):
basedir, name, extension = decompose_file(filename)
new_name = propose_next_docname(name)
filename = new_name + extension
fp = open(os.path.join(dir_to_open, filename), "w")
# FIXME: temporary, waiting for wsgi handler to be
# fixed. Once done, read chunk by chunk
## while formfield.file:
## fp.write(formfield.file.read(10240))
fp.write(formfield.file.read())
fp.close()
# Isn't this file too big?
file_size = os.path.getsize(os.path.join(dir_to_open, filename))
if CFG_WEBCOMMENT_MAX_ATTACHMENT_SIZE > 0 and \
file_size > CFG_WEBCOMMENT_MAX_ATTACHMENT_SIZE:
os.remove(os.path.join(dir_to_open, filename))
# One file is too big: record that,
# dismiss all uploaded files and re-ask to
# upload again
file_too_big = True
try:
raise InvenioWebCommentWarning(_('The size of file \\"%s\\" (%s) is larger than maximum allowed file size (%s). Select files again.') % (cgi.escape(filename), str(file_size/1024) + 'KB', str(CFG_WEBCOMMENT_MAX_ATTACHMENT_SIZE/1024) + 'KB'))
except InvenioWebCommentWarning, exc:
register_exception(stream='warning')
warning_msgs.append((exc.message, ''))
#warning_msgs.append(('WRN_WEBCOMMENT_MAX_FILE_SIZE_REACHED', cgi.escape(filename), str(file_size/1024) + 'KB', str(CFG_WEBCOMMENT_MAX_ATTACHMENT_SIZE/1024) + 'KB'))
else:
added_files[filename] = os.path.join(dir_to_open, filename)
if file_too_big:
# One file was too big. Removed all uploaded filed
for filepath in added_files.items():
try:
os.remove(filepath)
except:
# File was already removed or does not exist?
pass
client_ip_address = req.remote_ip
check_warnings = []
(ok, problem) = check_recID_is_in_range(self.recid, check_warnings, argd['ln'])
if ok:
title, description, keywords = websearch_templates.tmpl_record_page_header_content(req,
self.recid,
argd['ln'])
navtrail = create_navtrail_links(cc=guess_primary_collection_of_a_record(self.recid))
if navtrail:
navtrail += ' &gt; '
navtrail += '<a class="navtrail" href="%s/%s/%s?ln=%s">'% (CFG_SITE_URL, CFG_SITE_RECORD, self.recid, argd['ln'])
navtrail += cgi.escape(title)
navtrail += '</a>'
navtrail += '&gt; <a class="navtrail" href="%s/%s/%s/%s/?ln=%s">%s</a>' % (CFG_SITE_URL,
CFG_SITE_RECORD,
self.recid,
self.discussion==1 and 'reviews' or 'comments',
argd['ln'],
self.discussion==1 and _('Reviews') or _('Comments'))
if argd['action'] not in actions:
argd['action'] = 'DISPLAY'
if not argd['msg']:
# User had to login in-between, so retrieve msg
# from cookie
try:
(kind, cookie_argd) = mail_cookie_check_common(argd['cookie'],
delete=True)
argd.update(cookie_argd)
except InvenioWebAccessMailCookieDeletedError, e:
return redirect_to_url(req, CFG_SITE_SECURE_URL + '/'+ CFG_SITE_RECORD +'/' + \
str(self.recid) + (self.discussion==1 and \
'/reviews' or '/comments'))
except InvenioWebAccessMailCookieError, e:
# Invalid or empty cookie: continue
pass
subscribe = False
if argd['subscribe'] and \
get_user_subscription_to_discussion(self.recid, uid) == 0:
# User is not already subscribed, and asked to subscribe
subscribe = True
body = perform_request_add_comment_or_remark(recID=self.recid,
ln=argd['ln'],
uid=uid,
action=argd['action'],
msg=argd['msg'],
note=argd['note'],
score=argd['score'],
reviews=self.discussion,
comID=argd['comid'],
client_ip_address=client_ip_address,
editor_type=argd['editor_type'],
can_attach_files=can_attach_files,
subscribe=subscribe,
req=req,
attached_files=added_files,
warnings=warning_msgs)
if self.discussion:
title = _("Add Review")
else:
title = _("Add Comment")
jqueryheader = '''
<script src="%(CFG_SITE_URL)s/js/jquery.MultiFile.pack.js" type="text/javascript" language="javascript"></script>
''' % {'CFG_SITE_URL': CFG_SITE_URL}
return page(title=title,
body=body,
navtrail=navtrail,
uid=uid,
language=CFG_SITE_LANG,
verbose=1,
req=req,
navmenuid='search',
metaheaderadd=jqueryheader)
# id not in range
else:
return page(title=_("Record Not Found"),
body=problem,
uid=uid,
verbose=1,
req=req,
navmenuid='search')
def vote(self, req, form):
"""
Vote positively or negatively for a comment/review.
@param comid: comment/review id
@param com_value: +1 to vote positively
-1 to vote negatively
@param recid: the id of the record the comment/review is associated with
@param ln: language
@param do: display order hh = highest helpful score, review only
lh = lowest helpful score, review only
hs = highest star score, review only
ls = lowest star score, review only
od = oldest date
nd = newest date
@param ds: display since all= no filtering by date
nd = n days ago
nw = n weeks ago
nm = n months ago
ny = n years ago
where n is a single digit integer between 0 and 9
@param nb: number of results per page
@param p: results page
@param referer: http address of the calling function to redirect to (refresh)
@param reviews: boolean, enabled for reviews, disabled for comments
"""
argd = wash_urlargd(form, {'comid': (int, -1),
'com_value': (int, 0),
'recid': (int, -1),
'do': (str, "od"),
'ds': (str, "all"),
'nb': (int, 100),
'p': (int, 1),
'referer': (str, None)
})
_ = gettext_set_language(argd['ln'])
client_ip_address = req.remote_ip
uid = getUid(req)
user_info = collect_user_info(req)
(auth_code, auth_msg) = check_user_can_view_comments(user_info, self.recid)
if auth_code and user_info['email'] == 'guest':
cookie = mail_cookie_create_authorize_action(VIEWRESTRCOLL, {'collection' : guess_primary_collection_of_a_record(self.recid)})
target = CFG_SITE_SECURE_URL + '/youraccount/login' + \
make_canonical_urlargd({'action': cookie, 'ln' : argd['ln'], 'referer' : \
CFG_SITE_SECURE_URL + user_info['uri']}, {})
return redirect_to_url(req, target, norobot=True)
elif auth_code:
return page_not_authorized(req, "../", \
text = auth_msg)
# Check that comment belongs to this recid
if not check_comment_belongs_to_record(argd['comid'], self.recid):
return page_not_authorized(req, "../", \
text = _("Specified comment does not belong to this record"))
# Check that user can access the record
(auth_code, auth_msg) = check_user_can_view_comment(user_info, argd['comid'])
if auth_code:
return page_not_authorized(req, "../", \
text = _("You do not have access to the specified comment"))
# Check that comment is not currently deleted
if is_comment_deleted(argd['comid']):
return page_not_authorized(req, "../", \
text = _("You cannot vote for a deleted comment"),
ln=argd['ln'])
success = perform_request_vote(argd['comid'], client_ip_address, argd['com_value'], uid)
if argd['referer']:
argd['referer'] += "?ln=%s&do=%s&ds=%s&nb=%s&p=%s&voted=%s&" % (
argd['ln'], argd['do'], argd['ds'], argd['nb'], argd['p'], success)
redirect_to_url(req, argd['referer'])
else:
#Note: sent to comments display
referer = "%s/%s/%s/%s?&ln=%s&voted=1"
referer %= (CFG_SITE_SECURE_URL, CFG_SITE_RECORD, self.recid, self.discussion == 1 and 'reviews' or 'comments', argd['ln'])
redirect_to_url(req, referer)
def report(self, req, form):
"""
Report a comment/review for inappropriate content
@param comid: comment/review id
@param recid: the id of the record the comment/review is associated with
@param ln: language
@param do: display order hh = highest helpful score, review only
lh = lowest helpful score, review only
hs = highest star score, review only
ls = lowest star score, review only
od = oldest date
nd = newest date
@param ds: display since all= no filtering by date
nd = n days ago
nw = n weeks ago
nm = n months ago
ny = n years ago
where n is a single digit integer between 0 and 9
@param nb: number of results per page
@param p: results page
@param referer: http address of the calling function to redirect to (refresh)
@param reviews: boolean, enabled for reviews, disabled for comments
"""
argd = wash_urlargd(form, {'comid': (int, -1),
'recid': (int, -1),
'do': (str, "od"),
'ds': (str, "all"),
'nb': (int, 100),
'p': (int, 1),
'referer': (str, None)
})
_ = gettext_set_language(argd['ln'])
client_ip_address = req.remote_ip
uid = getUid(req)
user_info = collect_user_info(req)
(auth_code, auth_msg) = check_user_can_view_comments(user_info, self.recid)
if isGuestUser(uid):
cookie = mail_cookie_create_authorize_action(VIEWRESTRCOLL, {'collection' : guess_primary_collection_of_a_record(self.recid)})
target = CFG_SITE_SECURE_URL + '/youraccount/login' + \
make_canonical_urlargd({'action': cookie, 'ln' : argd['ln'], 'referer' : \
CFG_SITE_SECURE_URL + user_info['uri']}, {})
return redirect_to_url(req, target, norobot=True)
elif auth_code:
return page_not_authorized(req, "../", \
text = auth_msg)
# Check that comment belongs to this recid
if not check_comment_belongs_to_record(argd['comid'], self.recid):
return page_not_authorized(req, "../", \
text = _("Specified comment does not belong to this record"))
# Check that user can access the record
(auth_code, auth_msg) = check_user_can_view_comment(user_info, argd['comid'])
if auth_code:
return page_not_authorized(req, "../", \
text = _("You do not have access to the specified comment"))
# Check that comment is not currently deleted
if is_comment_deleted(argd['comid']):
return page_not_authorized(req, "../", \
text = _("You cannot report a deleted comment"),
ln=argd['ln'])
success = perform_request_report(argd['comid'], client_ip_address, uid)
if argd['referer']:
argd['referer'] += "?ln=%s&do=%s&ds=%s&nb=%s&p=%s&reported=%s&" % (argd['ln'], argd['do'], argd['ds'], argd['nb'], argd['p'], str(success))
redirect_to_url(req, argd['referer'])
else:
#Note: sent to comments display
referer = "%s/%s/%s/%s/display?ln=%s&voted=1"
referer %= (CFG_SITE_SECURE_URL, CFG_SITE_RECORD, self.recid, self.discussion==1 and 'reviews' or 'comments', argd['ln'])
redirect_to_url(req, referer)
def subscribe(self, req, form):
"""
Subscribe current user to receive email notification when new
comments are added to current discussion.
"""
argd = wash_urlargd(form, {'referer': (str, None)})
uid = getUid(req)
user_info = collect_user_info(req)
(auth_code, auth_msg) = check_user_can_view_comments(user_info, self.recid)
if isGuestUser(uid):
cookie = mail_cookie_create_authorize_action(VIEWRESTRCOLL, {'collection' : guess_primary_collection_of_a_record(self.recid)})
target = CFG_SITE_SECURE_URL + '/youraccount/login' + \
make_canonical_urlargd({'action': cookie, 'ln' : argd['ln'], 'referer' : \
CFG_SITE_SECURE_URL + user_info['uri']}, {})
return redirect_to_url(req, target, norobot=True)
elif auth_code:
return page_not_authorized(req, "../", \
text = auth_msg)
success = subscribe_user_to_discussion(self.recid, uid)
display_url = "%s/%s/%s/comments/display?subscribed=%s&ln=%s" % \
(CFG_SITE_SECURE_URL, CFG_SITE_RECORD, self.recid, str(success), argd['ln'])
redirect_to_url(req, display_url)
def unsubscribe(self, req, form):
"""
Unsubscribe current user from current discussion.
"""
argd = wash_urlargd(form, {'referer': (str, None)})
user_info = collect_user_info(req)
uid = getUid(req)
if isGuestUser(uid):
cookie = mail_cookie_create_authorize_action(VIEWRESTRCOLL, {'collection' : guess_primary_collection_of_a_record(self.recid)})
target = CFG_SITE_SECURE_URL + '/youraccount/login' + \
make_canonical_urlargd({'action': cookie, 'ln' : argd['ln'], 'referer' : \
CFG_SITE_SECURE_URL + user_info['uri']}, {})
return redirect_to_url(req, target, norobot=True)
success = unsubscribe_user_from_discussion(self.recid, uid)
display_url = "%s/%s/%s/comments/display?subscribed=%s&ln=%s" % \
(CFG_SITE_SECURE_URL, CFG_SITE_RECORD, self.recid, str(-success), argd['ln'])
redirect_to_url(req, display_url)
def toggle(self, req, form):
"""
Store the visibility of a comment for current user
"""
argd = wash_urlargd(form, {'comid': (int, -1),
'referer': (str, None),
'collapse': (int, 1)})
uid = getUid(req)
if isGuestUser(uid):
# We do not store information for guests
return ''
toggle_comment_visibility(uid, argd['comid'], argd['collapse'], self.recid)
if argd['referer']:
return redirect_to_url(req, CFG_SITE_SECURE_URL + \
(not argd['referer'].startswith('/') and '/' or '') + \
argd['referer'] + '#' + str(argd['comid']))
class WebInterfaceCommentsFiles(WebInterfaceDirectory):
"""Handle <strike>upload and </strike> access to files for comments.
<strike>The upload is currently only available through the Ckeditor.</strike>
"""
#_exports = ['put'] # 'get' is handled by _lookup(..)
def __init__(self, recid=-1, reviews=0):
self.recid = recid
self.discussion = reviews # 0:comments, 1:reviews
def _lookup(self, component, path):
""" This handler is invoked for the dynamic URLs (for getting
<strike>and putting attachments</strike>) Eg:
CFG_SITE_URL/CFG_SITE_RECORD/5953/comments/attachments/get/652/myfile.pdf
"""
if component == 'get' and len(path) > 1:
comid = path[0] # comment ID
file_name = '/'.join(path[1:]) # the filename
def answer_get(req, form):
"""Accessing files attached to comments."""
form['file'] = file_name
form['comid'] = comid
return self._get(req, form)
return answer_get, []
# All other cases: file not found
return None, []
def _get(self, req, form):
"""
Returns a file attached to a comment.
Example:
CFG_SITE_URL/CFG_SITE_RECORD/5953/comments/attachments/get/652/myfile.pdf
where 652 is the comment ID
"""
argd = wash_urlargd(form, {'file': (str, None),
'comid': (int, 0)})
_ = gettext_set_language(argd['ln'])
# Can user view this record, i.e. can user access its
# attachments?
uid = getUid(req)
user_info = collect_user_info(req)
# Check that user can view record, and its comments (protected
# with action "viewcomment")
(auth_code, auth_msg) = check_user_can_view_comments(user_info, self.recid)
if auth_code and user_info['email'] == 'guest':
cookie = mail_cookie_create_authorize_action(VIEWRESTRCOLL, {'collection' : guess_primary_collection_of_a_record(self.recid)})
target = CFG_SITE_SECURE_URL + '/youraccount/login' + \
make_canonical_urlargd({'action': cookie, 'ln' : argd['ln'], 'referer' : \
CFG_SITE_SECURE_URL + user_info['uri']}, {})
return redirect_to_url(req, target, norobot=True)
elif auth_code:
return page_not_authorized(req, "../", \
text = auth_msg)
# Does comment exist?
if not query_get_comment(argd['comid']):
req.status = apache.HTTP_NOT_FOUND
return page(title=_("Page Not Found"),
body=_('The requested comment could not be found'),
req=req)
# Check that user can view this particular comment, protected
# using its own restriction
(auth_code, auth_msg) = check_user_can_view_comment(user_info, argd['comid'])
if auth_code and user_info['email'] == 'guest':
cookie = mail_cookie_create_authorize_action(VIEWRESTRCOLL, {'collection' : guess_primary_collection_of_a_record(self.recid)})
target = CFG_SITE_SECURE_URL + '/youraccount/login' + \
make_canonical_urlargd({'action': cookie, 'ln' : argd['ln'], 'referer' : \
CFG_SITE_SECURE_URL + user_info['uri']}, {})
return redirect_to_url(req, target)
elif auth_code:
return page_not_authorized(req, "../", \
text = auth_msg,
ln=argd['ln'])
# Check that comment is not currently deleted
if is_comment_deleted(argd['comid']):
return page_not_authorized(req, "../", \
text = _("You cannot access files of a deleted comment"),
ln=argd['ln'])
if not argd['file'] is None:
# Prepare path to file on disk. Normalize the path so that
# ../ and other dangerous components are removed.
path = os.path.abspath(CFG_PREFIX + '/var/data/comments/' + \
str(self.recid) + '/' + str(argd['comid']) + \
'/' + argd['file'])
# Check that we are really accessing attachements
# directory, for the declared record.
if path.startswith(CFG_PREFIX + '/var/data/comments/' + \
str(self.recid)) and \
os.path.exists(path):
return stream_file(req, path)
# Send error 404 in all other cases
req.status = apache.HTTP_NOT_FOUND
return page(title=_("Page Not Found"),
body=_('The requested file could not be found'),
req=req,
language=argd['ln'])
class WebInterfaceYourCommentsPages(WebInterfaceDirectory):
"""Defines the set of /yourcomments pages."""
_exports = ['', ]
def index(self, req, form):
"""Index page."""
argd = wash_urlargd(form, {'page': (int, 1),
'format': (str, "rc"),
'order_by': (str, "lcf"),
'per_page': (str, "all"),
})
# TODO: support also "reviews", by adding new option to show/hide them if needed
uid = getUid(req)
# load the right language
_ = gettext_set_language(argd['ln'])
# Is site ready to accept comments?
if not CFG_WEBCOMMENT_ALLOW_COMMENTS or CFG_ACCESS_CONTROL_LEVEL_SITE >= 1:
return page_not_authorized(req, "%s/yourcomments" % \
(CFG_SITE_SECURE_URL,),
text="Comments are currently disabled on this site",
navmenuid="yourcomments")
elif uid == -1 or isGuestUser(uid):
return redirect_to_url(req, "%s/youraccount/login%s" % (
CFG_SITE_SECURE_URL,
make_canonical_urlargd({
'referer' : "%s/yourcomments%s" % (
CFG_SITE_SECURE_URL,
make_canonical_urlargd(argd, {})),
"ln" : argd['ln']}, {})))
user_info = collect_user_info(req)
if not user_info['precached_sendcomments']:
# Maybe we should still authorize if user submitted
# comments in the past?
return page_not_authorized(req, "../", \
text = _("You are not authorized to use comments."))
return page(title=_("Your Comments"),
body=perform_display_your_comments(user_info,
page_number=argd['page'],
selected_order_by_option=argd['order_by'],
selected_display_number_option=argd['per_page'],
selected_display_format_option=argd['format'],
ln=argd['ln']),
navtrail= """<a class="navtrail" href="%(sitesecureurl)s/youraccount/display?ln=%(ln)s">%(account)s</a>""" % {
'sitesecureurl' : CFG_SITE_SECURE_URL,
'ln': argd['ln'],
'account' : _("Your Account"),
},
description=_("%s View your previously submitted comments") % CFG_SITE_NAME_INTL.get(argd['ln'], CFG_SITE_NAME),
keywords=_("%s, personalize") % CFG_SITE_NAME_INTL.get(argd['ln'], CFG_SITE_NAME),
uid=uid,
language=argd['ln'],
req=req,
lastupdated=__lastupdated__,
navmenuid='youralerts',
secure_page_p=1)
# Return the same page wether we ask for /CFG_SITE_RECORD/123 or /CFG_SITE_RECORD/123/
__call__ = index
diff --git a/invenio/legacy/webjournal/adminlib.py b/invenio/legacy/webjournal/adminlib.py
index aba06db93..08711cbd7 100644
--- a/invenio/legacy/webjournal/adminlib.py
+++ b/invenio/legacy/webjournal/adminlib.py
@@ -1,967 +1,967 @@
## This file is part of Invenio.
## Copyright (C) 2008, 2009, 2010, 2011, 2012 CERN.
##
## Invenio is free software; you can redistribute it and/or
## modify it under the terms of the GNU General Public License as
## published by the Free Software Foundation; either version 2 of the
## License, or (at your option) any later version.
##
## Invenio is distributed in the hope that it will be useful, but
## WITHOUT ANY WARRANTY; without even the implied warranty of
## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
## General Public License for more details.
##
## You should have received a copy of the GNU General Public License
## along with Invenio; if not, write to the Free Software Foundation, Inc.,
## 59 Temple Place, Suite 330, Boston, MA 02111-1307, USA.
# pylint: disable=C0301
"""Invenio WebJournal Administration Interface."""
__revision__ = "$Id$"
import sys
import cPickle
import re
import os
if sys.hexversion < 0x2040000:
# pylint: disable=W0622
from sets import Set as set
# pylint: enable=W0622
-from invenio.errorlib import register_exception
+from invenio.ext.logging import register_exception
from invenio.config import \
CFG_SITE_URL, \
CFG_SITE_LANG, \
CFG_SITE_NAME, \
CFG_ETCDIR, \
CFG_CACHEDIR, \
CFG_TMPSHAREDDIR, \
CFG_SITE_SUPPORT_EMAIL, \
CFG_SITE_RECORD
from invenio.base.i18n import gettext_set_language
from invenio.ext.email import send_email
from invenio.access_control_engine import acc_authorize_action
from invenio.webjournal_config import \
InvenioWebJournalJournalIdNotFoundDBError, \
InvenioWebJournalReleaseUpdateError, \
InvenioWebJournalNoJournalOnServerError
from invenio.webjournal_utils import \
get_journals_ids_and_names, \
guess_journal_name, \
get_current_issue, \
get_issue_number_display, \
get_featured_records, \
add_featured_record, \
remove_featured_record, \
clear_cache_for_issue, \
get_next_journal_issues, \
get_release_datetime, \
get_journal_id, \
compare_issues, \
get_journal_info_path, \
get_journal_css_url, \
get_journal_alert_sender_email, \
get_journal_alert_recipient_email, \
get_journal_draft_keyword_to_remove, \
get_journal_categories, \
get_journal_articles, \
get_grouped_issues, \
get_journal_issue_grouping, \
get_journal_languages, \
get_journal_collection_to_refresh_on_release, \
get_journal_index_to_refresh_on_release, \
issue_is_later_than, \
WEBJOURNAL_OPENER
from invenio.dbquery import run_sql
from invenio.legacy.bibrecord import \
create_record, \
print_rec
from invenio.modules.formatter import format_record
from invenio.bibtask import task_low_level_submission, bibtask_allocate_sequenceid
from invenio.search_engine import get_all_collections_of_a_record
import invenio.template
wjt = invenio.template.load('webjournal')
def getnavtrail(previous = ''):
"""Get the navtrail"""
navtrail = """<a class="navtrail" href="%s/help/admin">Admin Area</a> """ % (CFG_SITE_URL,)
navtrail = navtrail + previous
return navtrail
def perform_index(ln=CFG_SITE_LANG, journal_name=None, action=None, uid=None):
"""
Index page
Lists the journals, and offers options to edit them, delete them
or add new journal.
Parameters:
journal_name - the journal affected by action, if any
action - one of ['', 'askDelete', _('Delete'), _('Cancel')]
ln - language
uid - user id
"""
_ = gettext_set_language(ln)
msg = None
if action == 'askDelete' and journal_name is not None:
msg = '''<fieldset style="display:inline;margin-left:auto;margin-right:auto;">
<legend>Delete Journal Configuration</legend><span style="color:#f00">Are you sure you want to delete the configuration of %(journal_name)s?
<form action="%(CFG_SITE_URL)s/admin/webjournal/webjournaladmin.py">
<input type="hidden" name="journal_name" value="%(journal_name)s" />
<input class="formbutton" type="submit" name="action" value="%(delete)s" />
<input class="formbutton" type="submit" name="action" value="%(cancel)s" />
</form></span></fieldset>''' % {'CFG_SITE_URL': CFG_SITE_URL,
'journal_name': journal_name,
'delete': _("Delete"),
'cancel': _("Cancel")}
if action == _("Delete") and journal_name is not None:
# User confirmed and clicked on "Delete" button
remove_journal(journal_name)
journals = get_journals_ids_and_names()
# Only keep journal that user can view or edit
journals = [(journal_info, acc_authorize_action(uid,
'cfgwebjournal',
name=journal_info['journal_name'],
with_editor_rights='yes')[0] == 0) \
for journal_info in journals \
if acc_authorize_action(uid,
'cfgwebjournal',
name=journal_info['journal_name'])[0] == 0]
return wjt.tmpl_admin_index(ln=ln,
journals=journals,
msg=msg)
def perform_administrate(ln=CFG_SITE_LANG, journal_name=None,
as_editor=True):
"""
Administration of a journal
Show the current and next issues/publications, and display links
to more specific administrative pages.
Parameters:
journal_name - the journal to be administrated
ln - language
with_editor_rights - True if can edit configuration. Read-only mode otherwise
"""
if journal_name is None:
try:
journal_name = guess_journal_name(ln)
except InvenioWebJournalNoJournalOnServerError, e:
return e.user_box()
if not can_read_xml_config(journal_name):
return '<span style="color:#f00">Configuration could not be read. Please check that %s/webjournal/%s/%s-config.xml exists and can be read by the server.</span><br/>' % (CFG_ETCDIR, journal_name, journal_name)
current_issue = get_current_issue(ln, journal_name)
current_publication = get_issue_number_display(current_issue,
journal_name,
ln)
issue_list = get_grouped_issues(journal_name, current_issue)
next_issue_number = get_next_journal_issues(issue_list[-1], journal_name, 1)
return wjt.tmpl_admin_administrate(journal_name,
current_issue,
current_publication,
issue_list,
next_issue_number[0],
ln,
as_editor=as_editor)
def perform_feature_record(journal_name,
recid,
img_url='',
action='',
ln=CFG_SITE_LANG):
"""
Interface to feature a record
Used to list, add and remove featured records of the journal.
Parameters:
journal_name - the journal for which the article is featured
recid - the record affected by 'action'
img_url - the URL to image displayed with given record
(only when action == 'add')
action - One of ['', 'add', 'askremove', _('Remove'), _('Cancel')]
ln - language
"""
_ = gettext_set_language(ln)
if action == 'add':
result = add_featured_record(journal_name, recid, img_url)
if result == 0:
msg ='''<span style="color:#0f0">Successfully featured
<a href="%(CFG_SITE_URL)s/%(CFG_SITE_RECORD)s/%(recid)s">record %(recid)s</a>.
Go to the <a href="%(CFG_SITE_URL)s/journal/%(name)s">%(name)s journal</a> to
see the result.</span>''' % {'CFG_SITE_URL': CFG_SITE_URL,
'CFG_SITE_RECORD': CFG_SITE_RECORD,
'name': journal_name,
'recid': recid}
elif result == 1:
msg = '''<span style="color:#f00"><a href="%(CFG_SITE_URL)s/%(CFG_SITE_RECORD)s/%(recid)s">record %(recid)s</a> is already featured. Choose another one or remove it first.</span>''' % \
{'CFG_SITE_URL': CFG_SITE_URL,
'CFG_SITE_RECORD': CFG_SITE_RECORD,
'recid': recid}
else:
msg = '''<span style="color:#f00">Record could not be featured. Check file permission.</span>'''
featured_records = get_featured_records(journal_name)
return wjt.tmpl_admin_feature_record(ln=ln,
journal_name=journal_name,
featured_records=featured_records,
msg=msg)
elif action == 'askremove':
msg = '''<fieldset style="display:inline;margin-left:auto;margin-right:auto;">
<legend>Remove featured record</legend><span style="color:#f00">Are you sure you want to remove <a href="%(CFG_SITE_URL)s/%(CFG_SITE_RECORD)s/%(recid)s">record %(recid)s</a> from the list of featured record?
<form action="%(CFG_SITE_URL)s/admin/webjournal/webjournaladmin.py/feature_record">
<input type="hidden" name="journal_name" value="%(name)s" />
<input type="hidden" name="recid" value="%(recid)s" />
<input class="formbutton" type="submit" name="action" value="%(remove)s" />
<input class="formbutton" type="submit" name="action" value="%(cancel)s" />
</form></span></fieldset>''' % \
{'CFG_SITE_URL': CFG_SITE_URL,
'CFG_SITE_RECORD': CFG_SITE_RECORD,
'name': journal_name,
'recid': recid,
'cancel': _("Cancel"),
'remove': _("Remove")}
featured_records = get_featured_records(journal_name)
return wjt.tmpl_admin_feature_record(ln=ln,
journal_name=journal_name,
featured_records=featured_records,
msg=msg)
elif action == _("Remove"):
result = remove_featured_record(journal_name, recid)
msg = '''<span style="color:#f00"><a href="%(CFG_SITE_URL)s/%(CFG_SITE_RECORD)s/%(recid)s">Record %(recid)s</a>
has been removed.</span>''' % \
{'CFG_SITE_URL': CFG_SITE_URL,
'CFG_SITE_RECORD': CFG_SITE_RECORD,
'recid': recid}
featured_records = get_featured_records(journal_name)
return wjt.tmpl_admin_feature_record(ln=ln,
journal_name=journal_name,
featured_records=featured_records,
msg=msg)
else:
msg = '''Here you can choose which records from the %s should
be featured on the journal webpage.''' % CFG_SITE_NAME
featured_records = get_featured_records(journal_name)
return wjt.tmpl_admin_feature_record(ln=ln,
journal_name=journal_name,
featured_records=featured_records,
msg=msg)
def perform_regenerate_issue(issue,
journal_name,
ln=CFG_SITE_LANG,
confirmed_p=False,
publish_draft_articles_p=False):
"""
Clears the cache for the given issue.
Parameters:
journal_name - the journal for which the cache should
be deleted
issue - the issue for which the cache should be
deleted
ln - language
confirmed_p - if True, regenerate. Else ask confirmation
publish_draft_articles_p - should the remaining draft articles in
the issue be made public?
"""
if not confirmed_p:
# Ask user confirmation about the regeneration
current_issue = get_current_issue(ln, journal_name)
issue_released_p = not issue_is_later_than(issue, current_issue)
return wjt.tmpl_admin_regenerate_confirm(ln,
journal_name,
issue,
issue_released_p)
else:
# Regenerate the issue (clear the cache)
success = clear_cache_for_issue(journal_name,
issue)
if publish_draft_articles_p:
current_issue = get_current_issue(ln, journal_name)
if not issue_is_later_than(issue, current_issue):
# This issue is already released: we can safely publish
# the articles. Otherwise we'll refuse to publish the drafts
move_drafts_articles_to_ready(journal_name, issue)
if success:
return wjt.tmpl_admin_regenerate_success(ln,
journal_name,
issue)
else:
return wjt.tmpl_admin_regenerate_error(ln,
journal_name,
issue)
def perform_request_issue_control(journal_name, issues,
action, ln=CFG_SITE_LANG):
"""
Central logic for issue control.
Regenerates the flat files 'current_issue' and 'issue_group' of
the journal that control which issue is currently active for the
journal.
Parameters:
journal_name - the journal affected by 'action'
issues - list of issues affected by 'action' TODO: check
action - One of ['cfg', _('Add'), _('Refresh'),
_('Publish'), _('Update')]
ln - language
"""
_ = gettext_set_language(ln)
out = ''
if action == "cfg" or action == _("Refresh") or action == _("Add"):
# find out if we are in update or release
current_issue = get_current_issue(ln, journal_name)
grouped_issues = get_grouped_issues(journal_name, current_issue)
if current_issue != grouped_issues[-1]:
# The current issue has "pending updates", i.e. is grouped
# with unreleased issues. Propose to update these issues
next_issue = grouped_issues[grouped_issues.index(current_issue) + 1]
out = wjt.tmpl_admin_update_issue(ln,
journal_name,
next_issue,
current_issue)
else:
# Propose a release
next_issues = get_next_journal_issues(current_issue,
journal_name,
n=get_journal_issue_grouping(journal_name))
if action == _("Refresh"):
next_issues += issues
next_issues = list(set(next_issues))# avoid double entries
elif action == _("Add"):
next_issues += issues
next_issues = list(set(next_issues))# avoid double entries
next_issues.sort(compare_issues)
highest_issue_so_far = next_issues[-1]
one_more_issue = get_next_journal_issues(highest_issue_so_far,
journal_name,
1)
next_issues += one_more_issue
next_issues = list(set(next_issues)) # avoid double entries
else:
# get the next issue numbers to publish
next_issues = get_next_journal_issues(current_issue,
journal_name,
n=get_journal_issue_grouping(journal_name))
next_issues.sort(compare_issues)
out = wjt.tmpl_admin_control_issue(ln,
journal_name,
next_issues)
elif action == _("Publish"):
# Publish the given issues (mark them as current issues)
publish_issues = issues
publish_issues = list(set(publish_issues)) # avoid double entries
publish_issues.sort(compare_issues)
if len(publish_issues) == 0:
# User did not select an issue
current_issue = get_current_issue(ln, journal_name)
next_issues = get_next_journal_issues(current_issue,
journal_name,
n=get_journal_issue_grouping(journal_name))
out = '<p style="color:#f00;text-align:center">' + \
_('Please select an issue') + '</p>'
out += wjt.tmpl_admin_control_issue(ln,
journal_name,
next_issues)
return out
try:
release_journal_issue(publish_issues, journal_name, ln)
except InvenioWebJournalJournalIdNotFoundDBError, e:
register_exception(req=None)
return e.user_box()
out = wjt.tmpl_admin_control_issue_success_msg(ln,
publish_issues,
journal_name)
elif action == _("Update"):
try:
try:
update_issue = issues[0]
except:
raise InvenioWebJournalReleaseUpdateError(ln, journal_name)
except InvenioWebJournalReleaseUpdateError, e:
register_exception(req=None)
return e.user_box()
try:
release_journal_update(update_issue, journal_name, ln)
except InvenioWebJournalJournalIdNotFoundDBError, e:
register_exception(req=None)
return e.user_box()
out = wjt.tmpl_admin_updated_issue_msg(ln,
update_issue,
journal_name)
return out
def perform_request_alert(journal_name, issue,
sent, plain_text, subject, recipients,
html_mail, force, ln=CFG_SITE_LANG):
"""
All the logic for alert emails.
Display a form to edit email/recipients and options to send the
email. Sent in HTML/PlainText or only PlainText if wished so.
Also prevent mistake of sending the alert more than one for a
particular issue.
Parameters:
journal_name - the journal for which the alert is sent
issue - the issue for which the alert is sent
sent - Display interface to edit email if "False"
(string). Else send the email.
plain_text - the text of the mail
subject - the subject of the mail
recipients - the recipients of the mail (string with
comma-separated emails)
html_mail - if 'html', also send email as HTML (copying
from the current issue on the web)
force - if different than "False", the email is sent
even if it has already been sent.
ln - language
"""
# FIXME: more flexible options to choose the language of the alert
languages = get_journal_languages(journal_name)
if languages:
alert_ln = languages[0]
else:
alert_ln = CFG_SITE_LANG
if not get_release_datetime(issue, journal_name, ln):
# Trying to send an alert for an unreleased issue
return wjt.tmpl_admin_alert_unreleased_issue(ln,
journal_name)
if sent == "False":
# Retrieve default message, subject and recipients, and
# display email editor
subject = wjt.tmpl_admin_alert_subject(journal_name,
alert_ln,
issue)
plain_text = wjt.tmpl_admin_alert_plain_text(journal_name,
alert_ln,
issue)
plain_text = plain_text.encode('utf-8')
recipients = get_journal_alert_recipient_email(journal_name)
return wjt.tmpl_admin_alert_interface(ln,
journal_name,
subject,
plain_text,
recipients,
alert_ln)
else:
# User asked to send the mail
if was_alert_sent_for_issue(issue,
journal_name,
ln) != False and force == "False":
# Mmh, email already sent before for this issue. Ask
# confirmation
return wjt.tmpl_admin_alert_was_already_sent(ln,
journal_name,
subject,
plain_text,
recipients,
html_mail,
issue)
html_string = None
if html_mail == "html":
# Also send as HTML: retrieve from current issue
html_file = WEBJOURNAL_OPENER.open('%s/journal/%s?ln=%s'
% (CFG_SITE_URL, journal_name, alert_ln))
html_string = html_file.read()
html_file.close()
html_string = put_css_in_file(html_string, journal_name)
html_string = insert_journal_link(html_string, journal_name, issue, ln)
html_string = wash_alert(html_string)
sender_email = get_journal_alert_sender_email(journal_name)
send_email(sender_email, recipients, subject, plain_text,
html_string, header='', footer='', html_header='',
html_footer='', charset='utf-8')
update_DB_for_alert(issue, journal_name, ln)
return wjt.tmpl_admin_alert_success_msg(ln,
journal_name)
def perform_request_configure(journal_name, xml_config, action, ln=CFG_SITE_LANG):
"""
Add a new journal or configure the settings of an existing journal.
Parameters:
journal_name - the journal to configure, or name of the new journal
xml_config - the xml configuration of the journal (string)
action - One of ['edit', 'editDone', 'add', 'addDone']
ln - language
"""
msg = None
if action == 'edit':
# Read existing config
if journal_name is not None:
if not can_read_xml_config(journal_name):
return '<span style="color:#f00">Configuration could not be read. Please check that %s/webjournal/%s/%s-config.xml exists and can be read by the server.</span><br/>' % (CFG_ETCDIR, journal_name, journal_name)
config_path = '%s/webjournal/%s/%s-config.xml' % (CFG_ETCDIR, journal_name, journal_name)
xml_config = file(config_path).read()
else:
# cannot edit unknown journal...
return '<span style="color:#f00">You must specify a journal name</span>'
if action in ['editDone', 'addDone']:
# Save config
if action == 'addDone':
res = add_journal(journal_name, xml_config)
if res == -1:
msg = '<span style="color:#f00">A journal with that name already exists. Please choose another name.</span>'
action = 'add'
elif res == -2:
msg = '<span style="color:#f00">Configuration could not be written (no permission). Please manually copy your config to %s/webjournal/%s/%s-config.xml</span><br/>' % (CFG_ETCDIR, journal_name, journal_name)
action = 'edit'
elif res == -4:
msg = '<span style="color:#f00">Cache file could not be written (no permission). Please manually create directory %s/webjournal/%s/ and make it writable for your Apache user</span><br/>' % (CFG_CACHEDIR, journal_name)
action = 'edit'
elif res > 0:
msg = '<span style="color:#0f0">Journal successfully added.</span>'
action = 'edit'
else:
msg = '<span style="color:#f00">An error occurred. The journal could not be added</span>'
action = 'edit'
if action == 'add':
# Display a sample config.
xml_config = '''<?xml version="1.0" encoding="UTF-8"?>
<webjournal name="AtlantisTimes">
<view>
<niceName>Atlantis Times</niceName>
<niceURL>%(CFG_SITE_URL)s</niceURL>
<css>
<screen>/img/AtlantisTimes.css</screen>
<print>/img/AtlantisTimes.css</print>
</css>
<format_template>
<index>AtlantisTimes_Index.bft</index>
<detailed>AtlantisTimes_Detailed.bft</detailed>
<search>AtlantisTimes_Search.bft</search>
<popup>AtlantisTimes_Popup.bft</popup>
<contact>AtlantisTimes_Contact.bft</contact>
</format_template>
</view>
<model>
<record>
<rule>News, 980__a:ATLANTISTIMESNEWS or 980__a:ATLANTISTIMESNEWSDRAFT</rule>
<rule>Science, 980__a:ATLANTISTIMESSCIENCE or 980__a:ATLANTISTIMESSCIENCEDRAFT</rule>
<rule>Arts, 980__a:ATLANTISTIMESARTS or 980__a:ATLANTISTIMESARTSDRAFT</rule>
</record>
</model>
<controller>
<issue_grouping>2</issue_grouping>
<issues_per_year>52</issues_per_year>
<hide_unreleased_issues>all</hide_unreleased_issues>
<marc_tags>
<issue_number>773__n</issue_number>
<order_number>773__c</order_number>
</marc_tags>
<alert_sender>%(CFG_SITE_SUPPORT_EMAIL)s</alert_sender>
<alert_recipients>recipients@atlantis.atl</alert_recipients>
<languages>en,fr</languages>
<submission>
<doctype>DEMOJRN</doctype>
<report_number_field>DEMOJRN_RN</report_number_field>
</submission>
<first_issue>02/2009</first_issue>
<draft_keyword>DRAFT</draft_keyword>
</controller>
</webjournal>''' % {'CFG_SITE_URL': CFG_SITE_URL,
'CFG_SITE_SUPPORT_EMAIL': CFG_SITE_SUPPORT_EMAIL}
out = wjt.tmpl_admin_configure_journal(ln=ln,
journal_name=journal_name,
xml_config=xml_config,
action=action,
msg=msg)
return out
######################## ADDING/REMOVING JOURNALS ###############################
def add_journal(journal_name, xml_config):
"""
Add a new journal to the DB. Also create the configuration file
Parameters:
journal_name - the name (used in URLs) of the new journal
xml_config - the xml configuration of the journal (string)
Returns:
the id of the journal if successfully added
-1 if could not be added because journal name already exists
-2 if config could not be saved
-3 if could not be added for other reasons
-4 if database cache could not be added
"""
try:
get_journal_id(journal_name)
except InvenioWebJournalJournalIdNotFoundDBError:
# Perfect, journal does not exist
res = run_sql("INSERT INTO jrnJOURNAL (name) VALUES(%s)", (journal_name,))
# Also save xml_config
config_dir = '%s/webjournal/%s/' % (CFG_ETCDIR, journal_name)
try:
if not os.path.exists(config_dir):
os.makedirs(config_dir)
xml_config_file = file(config_dir + journal_name + '-config.xml', 'w')
xml_config_file.write(xml_config)
xml_config_file.close()
except Exception:
res = -2
# And save some info in file in case database is down
journal_info_path = get_journal_info_path(journal_name)
journal_info_dir = os.path.dirname(journal_info_path)
if not os.path.exists(journal_info_dir):
try:
os.makedirs(journal_info_dir)
except Exception:
if res <= 0:
res = -4
journal_info_file = open(journal_info_path, 'w')
cPickle.dump({'journal_id': res,
'journal_name': journal_name,
'current_issue':'01/2000'}, journal_info_file)
return res
return -1
def remove_journal(journal_name):
"""
Remove a journal from the DB. Does not completely remove
everything, in case it was an error from the editor..
Parameters:
journal_name - the journal to remove
Returns:
the id of the journal if successfully removed or
-1 if could not be removed because journal name does not exist or
-2 if could not be removed for other reasons
"""
run_sql("DELETE FROM jrnJOURNAL WHERE name=%s", (journal_name,))
######################## TIME / ISSUE FUNCTIONS ###############################
def release_journal_issue(publish_issues, journal_name, ln=CFG_SITE_LANG):
"""
Releases a new issue.
This sets the current issue in the database to 'publish_issues' for
given 'journal_name'
Parameters:
journal_name - the journal for which we release a new issue
publish_issues - the list of issues that will be considered as
current (there can be several)
ln - language
"""
journal_id = get_journal_id(journal_name, ln)
if len(publish_issues) > 1:
publish_issues.sort(compare_issues)
low_bound = publish_issues[0]
high_bound = publish_issues[-1]
issue_display = '%s-%s/%s' % (low_bound.split("/")[0],
high_bound.split("/")[0],
high_bound.split("/")[1])
# remember convention: if we are going over a new year, take the higher
else:
issue_display = publish_issues[0]
# produce the DB lines
for publish_issue in publish_issues:
move_drafts_articles_to_ready(journal_name, publish_issue)
run_sql("INSERT INTO jrnISSUE (id_jrnJOURNAL, issue_number, issue_display) \
VALUES(%s, %s, %s)", (journal_id,
publish_issue,
issue_display))
# set first issue to published
release_journal_update(publish_issues[0], journal_name, ln)
# update information in file (in case DB is down)
journal_info_path = get_journal_info_path(journal_name)
journal_info_file = open(journal_info_path, 'w')
cPickle.dump({'journal_id': journal_id,
'journal_name': journal_name,
'current_issue': get_current_issue(ln, journal_name)},
journal_info_file)
def delete_journal_issue(issue, journal_name, ln=CFG_SITE_LANG):
"""
Deletes an issue from the DB.
(Not currently used)
"""
journal_id = get_journal_id(journal_name, ln)
run_sql("DELETE FROM jrnISSUE WHERE issue_number=%s \
AND id_jrnJOURNAL=%s",(issue, journal_id))
# update information in file (in case DB is down)
journal_info_path = get_journal_info_path(journal_name)
journal_info_file = open(journal_info_path, 'w')
cPickle.dump({'journal_id': journal_id,
'journal_name': journal_name,
'current_issue': get_current_issue(ln, journal_name)},
journal_info_file)
def was_alert_sent_for_issue(issue, journal_name, ln):
"""
Returns False if alert has not already been sent for given journal and
issue, else returns time of last alert, as time tuple
Parameters:
journal_name - the journal for which we want to check last alert
issue - the issue for which we want to check last alert
ln - language
Returns:
time tuple or False. Eg: (2008, 4, 25, 7, 58, 37, 4, 116, -1)
"""
journal_id = get_journal_id(journal_name, ln)
date_announced = run_sql("SELECT date_announced FROM jrnISSUE \
WHERE issue_number=%s \
AND id_jrnJOURNAL=%s", (issue, journal_id))[0][0]
if date_announced == None:
return False
else:
return date_announced.timetuple()
def update_DB_for_alert(issue, journal_name, ln):
"""
Update the 'last sent alert' timestamp for the given journal and
issue.
Parameters:
journal_name - the journal for which we want to update the time
of last alert
issue - the issue for which we want to update the time
of last alert
ln - language
"""
journal_id = get_journal_id(journal_name, ln)
run_sql("UPDATE jrnISSUE set date_announced=NOW() \
WHERE issue_number=%s \
AND id_jrnJOURNAL=%s", (issue,
journal_id))
def release_journal_update(update_issue, journal_name, ln=CFG_SITE_LANG):
"""
Releases an update to a journal.
"""
move_drafts_articles_to_ready(journal_name, update_issue)
journal_id = get_journal_id(journal_name, ln)
run_sql("UPDATE jrnISSUE set date_released=NOW() \
WHERE issue_number=%s \
AND id_jrnJOURNAL=%s", (update_issue,
journal_id))
def move_drafts_articles_to_ready(journal_name, issue):
"""
Move draft articles to their final "collection".
To do so we rely on the convention that an admin-chosen keyword
must be removed from the metadata
"""
protected_datafields = ['100', '245', '246', '520', '590', '700']
keyword_to_remove = get_journal_draft_keyword_to_remove(journal_name)
collections_to_refresh = {}
indexes_to_refresh = get_journal_index_to_refresh_on_release(journal_name)
bibindex_indexes_params = []
if indexes_to_refresh:
bibindex_indexes_params = ['-w', ','.join(indexes_to_refresh)]
categories = get_journal_categories(journal_name, issue)
task_sequence_id = str(bibtask_allocate_sequenceid())
for category in categories:
articles = get_journal_articles(journal_name, issue, category)
for order, recids in articles.iteritems():
for recid in recids:
record_xml = format_record(recid, of='xm')
if not record_xml:
continue
new_record_xml_path = os.path.join(CFG_TMPSHAREDDIR,
'webjournal_publish_' + \
str(recid) + '.xml')
if os.path.exists(new_record_xml_path):
# Do not modify twice
continue
record_struc = create_record(record_xml)
record = record_struc[0]
new_record = update_draft_record_metadata(record,
protected_datafields,
keyword_to_remove)
new_record_xml = print_rec(new_record)
if new_record_xml.find(keyword_to_remove) >= 0:
new_record_xml = new_record_xml.replace(keyword_to_remove, '')
# Write to file
new_record_xml_file = file(new_record_xml_path, 'w')
new_record_xml_file.write(new_record_xml)
new_record_xml_file.close()
# Submit
task_low_level_submission('bibupload',
'WebJournal',
'-c', new_record_xml_path,
'-I', task_sequence_id)
task_low_level_submission('bibindex',
'WebJournal',
'-i', str(recid),
'-I', task_sequence_id,
*bibindex_indexes_params)
for collection in get_all_collections_of_a_record(recid):
collections_to_refresh[collection] = ''
# Refresh collections
collections_to_refresh.update([(c, '') for c in get_journal_collection_to_refresh_on_release(journal_name)])
for collection in collections_to_refresh.keys():
task_low_level_submission('webcoll',
'WebJournal',
'-f', '-P', '2', '-p', '1', '-c', collection,
'-I', task_sequence_id)
def update_draft_record_metadata(record, protected_datafields, keyword_to_remove):
"""
Returns a new record with fields that should be modified in order
for this draft record to be considered as 'ready': keep only
controlfield 001 and non-protected fields that contains the
'keyword_to_remove'
Parameters:
record - a single recored (as BibRecord structure)
protected_datafields - *list* tags that should not be part of the
returned record
keyword_to_remove - *str* keyword that should be considered
when checking if a field should be part of
the returned record.
"""
new_record = {}
for tag, field in record.iteritems():
if tag in protected_datafields:
continue
elif not keyword_to_remove in str(field) and \
not tag == '001':
continue
else:
# Keep
new_record[tag] = field
return new_record
######################## XML CONFIG ###############################
def can_read_xml_config(journal_name):
"""
Check that configuration xml for given journal name is exists and
can be read.
"""
config_path = '%s/webjournal/%s/%s-config.xml' % \
(CFG_ETCDIR, journal_name, journal_name)
try:
file(config_path).read()
except IOError:
return False
return True
######################## EMAIL HELPER FUNCTIONS ###############################
CFG_WEBJOURNAL_ALERT_WASH_PATTERN = re.compile('<\!--\s*START_NOT_FOR_ALERT\s*-->.*?<\!--\s*END_NOT_FOR_ALERT\s*-->', re.MULTILINE | re.DOTALL)
def wash_alert(html_string):
"""
Remove from alert any content in-between tags <!--START_NOT_FOR_ALERT--> and
<!--END_NOT_FOR_ALERT-->
@param html_string: the HTML newsletter
"""
return CFG_WEBJOURNAL_ALERT_WASH_PATTERN.sub('', html_string)
def insert_journal_link(html_string, journal_name, issue, ln):
"""
Insert a warning regarding HTML formatting inside mail client and
link to journal page just after the body of the page.
@param html_string: the HTML newsletter
@param journal_name: the journal name
@param issue: journal issue for which the alert is sent (in the form number/year)
@param ln: language
"""
def replace_body(match_obj):
"Replace body with itself + header message"
header = wjt.tmpl_admin_alert_header_html(journal_name, ln, issue)
return match_obj.group() + header
return re.sub('<body.*?>', replace_body, html_string, 1)
def put_css_in_file(html_message, journal_name):
"""
Retrieve the CSS of the journal and insert/inline it in the <head>
section of the given html_message. (Used for HTML alert emails)
Parameters:
journal_name - the journal name
html_message - the html message (string) in which the CSS
should be inserted
Returns:
the HTML message with its CSS inlined
"""
css_path = get_journal_css_url(journal_name)
if not css_path:
return
css_file = WEBJOURNAL_OPENER.open(css_path)
css = css_file.read()
css = make_full_paths_in_css(css, journal_name)
html_parted = html_message.split("</head>")
if len(html_parted) > 1:
html = '%s<style type="text/css">%s</style></head>%s' % (html_parted[0],
css,
html_parted[1])
else:
html_parted = html_message.split("<html>")
if len(html_parted) > 1:
html = '%s<html><head><style type="text/css">%s</style></head>%s' % (html_parted[0],
css,
html_parted[1])
else:
return
return html
def make_full_paths_in_css(css, journal_name):
"""
Update the URLs in a CSS from relative to absolute URLs, so that the
URLs are accessible from anywhere (Used for HTML alert emails)
Parameters:
journal_name - the journal name
css - a cascading stylesheet (string)
Returns:
(str) the given css with relative paths converted to absolute paths
"""
url_pattern = re.compile('''url\(["']?\s*(?P<url>\S*)\s*["']?\)''',
re.DOTALL)
url_iter = url_pattern.finditer(css)
rel_to_full_path = {}
for url in url_iter:
url_string = url.group("url")
url_string = url_string.replace('"', "")
url_string = url_string.replace("'", "")
if url_string[:6] != "http://":
rel_to_full_path[url_string] = '"%s/img/webjournal_%s/%s"' % \
(CFG_SITE_URL,
journal_name,
url_string)
for url in rel_to_full_path.keys():
css = css.replace(url, rel_to_full_path[url])
return css
diff --git a/invenio/legacy/webjournal/api.py b/invenio/legacy/webjournal/api.py
index be42f5200..4546833ec 100644
--- a/invenio/legacy/webjournal/api.py
+++ b/invenio/legacy/webjournal/api.py
@@ -1,271 +1,271 @@
# -*- coding: utf-8 -*-
##
## This file is part of Invenio.
## Copyright (C) 2007, 2008, 2009, 2010, 2011 CERN.
##
## Invenio is free software; you can redistribute it and/or
## modify it under the terms of the GNU General Public License as
## published by the Free Software Foundation; either version 2 of the
## License, or (at your option) any later version.
##
## Invenio is distributed in the hope that it will be useful, but
## WITHOUT ANY WARRANTY; without even the implied warranty of
## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
## General Public License for more details.
##
## You should have received a copy of the GNU General Public License
## along with Invenio; if not, write to the Free Software Foundation, Inc.,
## 59 Temple Place, Suite 330, Boston, MA 02111-1307, USA.
"""
WebJournal - Main Public interface of the WebJournals
"""
import datetime
import time
from invenio.modules.formatter.engine import \
BibFormatObject, \
format_with_format_template
-from invenio.errorlib import register_exception
+from invenio.ext.logging import register_exception
from invenio.config import \
CFG_SITE_URL, \
CFG_ACCESS_CONTROL_LEVEL_SITE
from invenio.utils.url import redirect_to_url
from invenio.webuser import collect_user_info
from invenio.webjournal_config import \
InvenioWebJournalTemplateNotFoundError
from invenio.webjournal_utils import \
get_article_page_from_cache, \
cache_article_page, \
get_current_issue, \
get_journal_template, \
get_release_datetime, \
get_journal_articles, \
get_unreleased_issue_hiding_mode, \
issue_is_later_than, \
datetime_to_issue
def perform_request_index(req, journal_name, issue_number, ln,
category, editor=False, verbose=0):
"""
Central logic function for index pages.
Brings together format templates and MARC rules from the config, with
the requested index page, given by the url parameters.
From config:
- page template for index pages -> formatting
- MARC rule list -> Category Navigation
- MARC tag used for issue numbers -> search (later in the format
elements)
Uses BibFormatObject and format_with_format_template to produce the
required HTML.
"""
current_issue = get_current_issue(ln, journal_name)
if not get_release_datetime(issue_number, journal_name):
# Unreleased issue. Display latest released issue?
unreleased_issues_mode = get_unreleased_issue_hiding_mode(journal_name)
if not editor and \
(unreleased_issues_mode == 'all' or \
(unreleased_issues_mode == 'future' and \
issue_is_later_than(issue_number, current_issue))):
redirect_to_url(req, "%s/journal/%s/%s/%s?ln=%s" % \
(CFG_SITE_URL,
journal_name,
current_issue.split('/')[1],
current_issue.split('/')[0],
ln))
try:
index_page_template = get_journal_template('index',
journal_name,
ln)
except InvenioWebJournalTemplateNotFoundError, e:
register_exception(req=req)
return e.user_box(req)
temp_marc = '''<record>
<controlfield tag="001">0</controlfield>
</record>'''
# create a record and get HTML back from bibformat
user_info = collect_user_info(req)
bfo = BibFormatObject(0, ln=ln, xml_record=temp_marc,
user_info=user_info)
bfo.req = req
verbosity = 0
if editor:
# Increase verbosity only for editors/admins
verbosity = verbose
html = format_with_format_template(index_page_template,
bfo,
verbose=verbosity)
return html
def perform_request_article(req, journal_name, issue_number, ln,
category, recid, editor=False, verbose=0):
"""
Central logic function for article pages.
Loads the format template for article display and displays the requested
article using BibFormat.
'Editor' mode generates edit links on the article view page and disables
caching.
"""
current_issue = get_current_issue(ln, journal_name)
if not get_release_datetime(issue_number, journal_name):
# Unreleased issue. Display latest released issue?
unreleased_issues_mode = get_unreleased_issue_hiding_mode(journal_name)
if not editor and \
(unreleased_issues_mode == 'all' or \
(unreleased_issues_mode == 'future' and \
issue_is_later_than(issue_number, current_issue))):
redirect_to_url(req, "%s/journal/%s/%s/%s?ln=%s" % \
(CFG_SITE_URL,
journal_name,
current_issue.split('/')[1],
current_issue.split('/')[0],
ln))
try:
index_page_template = get_journal_template('detailed',
journal_name,
ln)
except InvenioWebJournalTemplateNotFoundError, e:
register_exception(req=req)
return e.user_box(req)
user_info = collect_user_info(req)
bfo = BibFormatObject(recid, ln=ln, user_info=user_info)
bfo.req = req
# if it is cached, return it
cached_html = get_article_page_from_cache(journal_name, category,
recid, issue_number, ln,
bfo)
if cached_html and not editor:
return cached_html
# Check that this recid is indeed an article
is_article = False
articles = get_journal_articles(journal_name, issue_number, category)
for order, recids in articles.iteritems():
if recid in recids:
is_article = True
break
if not is_article:
redirect_to_url(req, "%s/journal/%s/%s/%s?ln=%s" % \
(CFG_SITE_URL,
journal_name,
issue_number.split('/')[1],
issue_number.split('/')[0],
ln))
# create a record and get HTML back from bibformat
verbosity = 0
if editor:
# Increase verbosity only for editors/admins
verbosity = verbose
html_out = format_with_format_template(index_page_template,
bfo,
verbose=verbosity)
# cache if not in editor mode, and if database is not down
if not editor and not CFG_ACCESS_CONTROL_LEVEL_SITE == 2:
cache_article_page(html_out, journal_name, category,
recid, issue_number, ln)
return html_out
def perform_request_contact(req, ln, journal_name, verbose=0):
"""
Display contact information
"""
try:
contact_page_template = get_journal_template('contact',
journal_name,
ln)
except InvenioWebJournalTemplateNotFoundError, e:
register_exception(req=req)
return e.user_box(req)
user_info = collect_user_info(req)
temp_marc = '''<record>
<controlfield tag="001">0</controlfield>
</record>'''
bfo = BibFormatObject(0,
ln=ln,
xml_record=temp_marc,
user_info=user_info)
bfo.req = req
html = format_with_format_template(contact_page_template,
bfo)
return html
def perform_request_popup(req, ln, journal_name, record):
"""
Display the popup window
"""
try:
popup_page_template = get_journal_template('popup',
journal_name,
ln)
except InvenioWebJournalTemplateNotFoundError, e:
register_exception(req=req)
return e.user_box(req)
user_info = collect_user_info(req)
bfo = BibFormatObject(record, ln=ln, user_info=user_info)
bfo.req = req
html = format_with_format_template(popup_page_template,
bfo)
return html
def perform_request_search(req, journal_name, ln,
archive_issue, archive_select,
archive_date, archive_search, verbose=0):
"""
Logic for the search / archive page.
"""
try:
search_page_template = get_journal_template('search',
journal_name,
ln)
except InvenioWebJournalTemplateNotFoundError, e:
register_exception(req=req)
return e.user_box(req)
if archive_select == "False" and archive_search == "False":
temp_marc = '''<record>
<controlfield tag="001">0</controlfield>
</record>'''
user_info = collect_user_info(req)
bfo = BibFormatObject(0,
ln=ln,
xml_record=temp_marc,
user_info=user_info)
bfo.req = req
html = format_with_format_template(search_page_template,
bfo,
verbose=verbose)
return html
elif archive_select == "Go":
redirect_to_url(req, "%s/journal/%s/%s/%s?ln=%s" % (CFG_SITE_URL,
journal_name,
archive_issue.split('/')[1],
archive_issue.split('/')[0],
ln))
elif archive_search == "Go":
try:
archive_issue_time = datetime.datetime(*time.strptime(archive_date, "%d/%m/%Y")[0:5])
archive_issue = datetime_to_issue(archive_issue_time, journal_name)
if not archive_issue:
archive_issue = get_current_issue(ln, journal_name)
except ValueError:
archive_issue = get_current_issue(ln, journal_name)
redirect_to_url(req, "%s/journal/%s/%s/%s?ln=%s" % (CFG_SITE_URL,
journal_name,
archive_issue.split('/')[1],
archive_issue.split('/')[0],
ln))
diff --git a/invenio/legacy/webjournal/utils.py b/invenio/legacy/webjournal/utils.py
index 5491167f9..a6c4e5bb1 100644
--- a/invenio/legacy/webjournal/utils.py
+++ b/invenio/legacy/webjournal/utils.py
@@ -1,1809 +1,1809 @@
# -*- coding: utf-8 -*-
##
## This file is part of Invenio.
## Copyright (C) 2007, 2008, 2009, 2010, 2011 CERN.
##
## Invenio is free software; you can redistribute it and/or
## modify it under the terms of the GNU General Public License as
## published by the Free Software Foundation; either version 2 of the
## License, or (at your option) any later version.
##
## Invenio is distributed in the hope that it will be useful, but
## WITHOUT ANY WARRANTY; without even the implied warranty of
## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
## General Public License for more details.
##
## You should have received a copy of the GNU General Public License
## along with Invenio; if not, write to the Free Software Foundation, Inc.,
## 59 Temple Place, Suite 330, Boston, MA 02111-1307, USA.
"""
Various utilities for WebJournal, e.g. config parser, etc.
"""
import time
import datetime
import calendar
import re
import os
import cPickle
import math
import urllib
from MySQLdb import OperationalError
from xml.dom import minidom
from urlparse import urlparse
from invenio.config import \
CFG_ETCDIR, \
CFG_SITE_URL, \
CFG_CACHEDIR, \
CFG_SITE_LANG, \
CFG_ACCESS_CONTROL_LEVEL_SITE, \
CFG_SITE_SUPPORT_EMAIL, \
CFG_DEVEL_SITE, \
CFG_CERN_SITE
from invenio.dbquery import run_sql
from invenio.modules.formatter.engine import BibFormatObject
from invenio.search_engine import search_pattern, record_exists
from invenio.base.i18n import gettext_set_language
-from invenio.errorlib import register_exception
+from invenio.ext.logging import register_exception
from invenio.utils.url import make_invenio_opener
WEBJOURNAL_OPENER = make_invenio_opener('WebJournal')
########################### REGULAR EXPRESSIONS ######################
header_pattern = re.compile('<p\s*(align=justify)??>\s*<strong>(?P<header>.*?)</strong>\s*</p>')
header_pattern2 = re.compile('<p\s*(class="articleHeader").*?>(?P<header>.*?)</p>')
para_pattern = re.compile('<p.*?>(?P<paragraph>.+?)</p>', re.DOTALL)
img_pattern = re.compile('<img.*?src=("|\')?(?P<image>\S+?)("|\'|\s).*?/>', re.DOTALL)
image_pattern = re.compile(r'''
(<a\s*href=["']?(?P<hyperlink>\S*)["']?>)?# get the link location for the image
\s*# after each tag we can have arbitrary whitespaces
<center># the image is always centered
\s*
<img\s*(class=["']imageScale["'])*?\s*src=(?P<image>\S*)\s*border=1\s*(/)?># getting the image itself
\s*
</center>
\s*
(</a>)?
(<br />|<br />|<br/>)*# the caption can be separated by any nr of line breaks
(
<b>
\s*
<i>
\s*
<center>(?P<caption>.*?)</center># getting the caption
\s*
</i>
\s*
</b>
)?''', re.DOTALL | re.VERBOSE | re.IGNORECASE )
#'
############################## FEATURED RECORDS ######################
def get_featured_records(journal_name):
"""
Returns the 'featured' records i.e. records chosen to be displayed
with an image on the main page, in the widgets section, for the
given journal.
parameter:
journal_name - (str) the name of the journal for which we want
to get the featured records
returns:
list of tuples (recid, img_url)
"""
try:
feature_file = open('%s/webjournal/%s/featured_record' % \
(CFG_ETCDIR, journal_name))
except:
return []
records = feature_file.readlines()
return [(record.split('---', 1)[0], record.split('---', 1)[1]) \
for record in records if "---" in record]
def add_featured_record(journal_name, recid, img_url):
"""
Adds the given record to the list of featured records of the given
journal.
parameters:
journal_name - (str) the name of the journal to which the record
should be added.
recid - (int) the record id of the record to be featured.
img_url - (str) a url to an image icon displayed along the
featured record.
returns:
0 if everything went ok
1 if record is already in the list
2 if other problems
"""
# Check that record is not already there
featured_records = get_featured_records(journal_name)
for featured_recid, featured_img in featured_records:
if featured_recid == str(recid):
return 1
try:
fptr = open('%s/webjournal/%s/featured_record'
% (CFG_ETCDIR, journal_name), "a")
fptr.write(str(recid) + '---' + img_url + '\n')
fptr.close()
except:
return 2
return 0
def remove_featured_record(journal_name, recid):
"""
Removes the given record from the list of featured records of the
given journal.
parameters:
journal_name - (str) the name of the journal to which the record
should be added.
recid - (int) the record id of the record to be featured.
"""
featured_records = get_featured_records(journal_name)
try:
fptr = open('%s/webjournal/%s/featured_record'
% (CFG_ETCDIR, journal_name), "w")
for featured_recid, featured_img in featured_records:
if str(featured_recid) != str(recid):
fptr.write(str(featured_recid) + '---' + featured_img + \
'\n')
fptr.close()
except:
return 1
return 0
############################ ARTICLES RELATED ########################
def get_order_dict_from_recid_list(recids, journal_name, issue_number,
newest_first=False,
newest_only=False):
"""
Returns the ordered list of input recids, for given
'issue_number'.
Since there might be several articles at the same position, the
returned structure is a dictionary with keys being order number
indicated in record metadata, and values being list of recids for
this order number (recids for one position are ordered from
highest to lowest recid).
Eg: {'1': [2390, 2386, 2385],
'3': [2388],
'2': [2389],
'4': [2387]}
Parameters:
recids - a list of all recid's that should be brought
into order
journal_name - the name of the journal
issue_number - *str* the issue_number for which we are
deriving the order
newest_first - *bool* if True, new articles should be placed
at beginning of the list. If so, their
position/order will be negative integers
newest_only - *bool* if only new articles should be returned
Returns:
ordered_records: a dictionary with the recids ordered by
keys
"""
ordered_records = {}
ordered_new_records = {}
records_without_defined_order = []
new_records_without_defined_order = []
for record in recids:
temp_rec = BibFormatObject(record)
articles_info = temp_rec.fields('773__')
for article_info in articles_info:
if article_info.get('n', '') == issue_number or \
'0' + article_info.get('n', '') == issue_number:
if article_info.has_key('c') and \
article_info['c'].isdigit():
order_number = int(article_info.get('c', ''))
if (newest_first or newest_only) and \
is_new_article(journal_name, issue_number, record):
if ordered_new_records.has_key(order_number):
ordered_new_records[order_number].append(record)
else:
ordered_new_records[order_number] = [record]
elif not newest_only:
if ordered_records.has_key(order_number):
ordered_records[order_number].append(record)
else:
ordered_records[order_number] = [record]
else:
# No order? No problem! Append it at the end.
if newest_first and is_new_article(journal_name, issue_number, record):
new_records_without_defined_order.append(record)
elif not newest_only:
records_without_defined_order.append(record)
# Append records without order at the end of the list
if records_without_defined_order:
if ordered_records:
ordered_records[max(ordered_records.keys()) + 1] = records_without_defined_order
else:
ordered_records[1] = records_without_defined_order
# Append new records without order at the end of the list of new
# records
if new_records_without_defined_order:
if ordered_new_records:
ordered_new_records[max(ordered_new_records.keys()) + 1] = new_records_without_defined_order
else:
ordered_new_records[1] = new_records_without_defined_order
# Append new records at the beginning of the list of 'old'
# records. To do so, use negative integers
if ordered_new_records:
highest_new_record_order = max(ordered_new_records.keys())
for order, new_records in ordered_new_records.iteritems():
ordered_records[- highest_new_record_order + order - 1] = new_records
for (order, records) in ordered_records.iteritems():
# Reverse so that if there are several articles at same
# positon, newest appear first
records.reverse()
return ordered_records
def get_journal_articles(journal_name, issue, category,
newest_first=False, newest_only=False):
"""
Returns the recids in given category and journal, for given issue
number. The returned recids are grouped according to their 773__c
field.
Example of returned value:
{'1': [2390, 2386, 2385],
'3': [2388],
'2': [2389],
'4': [2387]}
Parameters:
journal_name - *str* the name of the journal (as used in URLs)
issue - *str* the issue. Eg: "08/2007"
category - *str* the name of the category
newest_first - *bool* if True, new articles should be placed
at beginning of the list. If so, their
position/order will be negative integers
newest_only - *bool* if only new articles should be returned
"""
use_cache = True
current_issue = get_current_issue(CFG_SITE_LANG, journal_name)
if issue_is_later_than(issue, current_issue):
# If we are working on unreleased issue, do not use caching
# mechanism
use_cache = False
if use_cache:
cached_articles = _get_cached_journal_articles(journal_name, issue, category)
if cached_articles is not None:
ordered_articles = get_order_dict_from_recid_list(cached_articles,
journal_name,
issue,
newest_first,
newest_only)
return ordered_articles
# Retrieve the list of rules that map Category -> Search Pattern.
# Keep only the rule matching our category
config_strings = get_xml_from_config(["record/rule"], journal_name)
category_to_search_pattern_rules = config_strings["record/rule"]
try:
matching_rule = [rule.split(',', 1) for rule in \
category_to_search_pattern_rules \
if rule.split(',')[0] == category]
except:
return []
recids_issue = search_pattern(p='773__n:%s -980:DELETED' % issue)
recids_rule = search_pattern(p=matching_rule[0][1])
if issue[0] == '0':
# search for 09/ and 9/
recids_issue.union_update(search_pattern(p='773__n:%s -980:DELETED' % issue.lstrip('0')))
recids_rule.intersection_update(recids_issue)
recids = [recid for recid in recids_rule if record_exists(recid) == 1]
if use_cache:
_cache_journal_articles(journal_name, issue, category, recids)
ordered_articles = get_order_dict_from_recid_list(recids,
journal_name,
issue,
newest_first,
newest_only)
return ordered_articles
def _cache_journal_articles(journal_name, issue, category, articles):
"""
Caches given articles IDs.
"""
journal_cache_path = get_journal_article_cache_path(journal_name,
issue)
try:
journal_cache_file = open(journal_cache_path, 'r')
journal_info = cPickle.load(journal_cache_file)
journal_cache_file.close()
except cPickle.PickleError, e:
journal_info = {}
except IOError:
journal_info = {}
except EOFError:
journal_info = {}
except ValueError:
journal_info = {}
if not journal_info.has_key('journal_articles'):
journal_info['journal_articles'] = {}
journal_info['journal_articles'][category] = articles
# Create cache directory if it does not exist
journal_cache_dir = os.path.dirname(journal_cache_path)
if not os.path.exists(journal_cache_dir):
try:
os.makedirs(journal_cache_dir)
except:
return False
journal_cache_file = open(journal_cache_path, 'w')
cPickle.dump(journal_info, journal_cache_file)
journal_cache_file.close()
return True
def _get_cached_journal_articles(journal_name, issue, category):
"""
Retrieve the articles IDs cached for this journal.
Returns None if cache does not exist or more than 5 minutes old
"""
# Check if our cache is more or less up-to-date (not more than 5
# minutes old)
try:
journal_cache_path = get_journal_article_cache_path(journal_name,
issue)
last_update = os.path.getctime(journal_cache_path)
except Exception, e :
return None
now = time.time()
if (last_update + 5*60) < now:
return None
# Get from cache
try:
journal_cache_file = open(journal_cache_path, 'r')
journal_info = cPickle.load(journal_cache_file)
journal_articles = journal_info.get('journal_articles', {}).get(category, None)
journal_cache_file.close()
except cPickle.PickleError, e:
journal_articles = None
except IOError:
journal_articles = None
except EOFError:
journal_articles = None
except ValueError:
journal_articles = None
return journal_articles
def is_new_article(journal_name, issue, recid):
"""
Check if given article should be considered as new or not.
New articles are articles that have never appeared in older issues
than given one.
"""
article_found_in_older_issue = False
temp_rec = BibFormatObject(recid)
publication_blocks = temp_rec.fields('773__')
for publication_block in publication_blocks:
this_issue_number, this_issue_year = issue.split('/')
issue_number, issue_year = publication_block.get('n', '/').split('/', 1)
if int(issue_year) < int(this_issue_year):
# Found an older issue
article_found_in_older_issue = True
break
elif int(issue_year) == int(this_issue_year) and \
int(issue_number) < int(this_issue_number):
# Found an older issue
article_found_in_older_issue = True
break
return not article_found_in_older_issue
############################ CATEGORIES RELATED ######################
def get_journal_categories(journal_name, issue=None):
"""
List the categories for the given journal and issue.
Returns categories in same order as in config file.
Parameters:
journal_name - *str* the name of the journal (as used in URLs)
issue - *str* the issue. Eg:'08/2007'. If None, consider
all categories defined in journal config
"""
categories = []
current_issue = get_current_issue(CFG_SITE_LANG, journal_name)
config_strings = get_xml_from_config(["record/rule"], journal_name)
all_categories = [rule.split(',')[0] for rule in \
config_strings["record/rule"]]
if issue is None:
return all_categories
for category in all_categories:
recids = get_journal_articles(journal_name,
issue,
category)
if len(recids.keys()) > 0:
categories.append(category)
return categories
def get_category_query(journal_name, category):
"""
Returns the category definition for the given category and journal name
Parameters:
journal_name - *str* the name of the journal (as used in URLs)
categoy - *str* a category name, as found in the XML config
"""
config_strings = get_xml_from_config(["record/rule"], journal_name)
category_to_search_pattern_rules = config_strings["record/rule"]
try:
matching_rule = [rule.split(',', 1)[1].strip() for rule in \
category_to_search_pattern_rules \
if rule.split(',')[0] == category]
except:
return None
return matching_rule[0]
######################### JOURNAL CONFIG VARS ######################
cached_parsed_xml_config = {}
def get_xml_from_config(nodes, journal_name):
"""
Returns values from the journal configuration file.
The needed values can be specified by node name, or by a hierarchy
of nodes names using '/' as character to mean 'descendant of'.
Eg. 'record/rule' to get all the values of 'rule' tags inside the
'record' node
Returns a dictionary with a key for each query and a list of
strings (innerXml) results for each key.
Has a special field "config_fetching_error" that returns an error when
something has gone wrong.
"""
# Get and open the config file
results = {}
if cached_parsed_xml_config.has_key(journal_name):
config_file = cached_parsed_xml_config[journal_name]
else:
config_path = '%s/webjournal/%s/%s-config.xml' % \
(CFG_ETCDIR, journal_name, journal_name)
config_file = minidom.Document
try:
config_file = minidom.parse("%s" % config_path)
except:
# todo: raise exception "error: no config file found"
results["config_fetching_error"] = "could not find config file"
return results
else:
cached_parsed_xml_config[journal_name] = config_file
for node_path in nodes:
node = config_file
for node_path_component in node_path.split('/'):
# pylint: disable=E1103
# The node variable can be rewritten in the loop and therefore
# its type can change.
if node != config_file and node.length > 0:
# We have a NodeList object: consider only first child
node = node.item(0)
# pylint: enable=E1103
try:
node = node.getElementsByTagName(node_path_component)
except:
# WARNING, config did not have such value
node = []
break
results[node_path] = []
for result in node:
try:
result_string = result.firstChild.toxml(encoding="utf-8")
except:
# WARNING, config did not have such value
continue
results[node_path].append(result_string)
return results
def get_journal_issue_field(journal_name):
"""
Returns the MARC field in which this journal expects to find
the issue number. Read this from the journal config file
Parameters:
journal_name - *str* the name of the journal (as used in URLs)
"""
config_strings = get_xml_from_config(["issue_number"], journal_name)
try:
issue_field = config_strings["issue_number"][0]
except:
issue_field = '773__n'
return issue_field
def get_journal_css_url(journal_name, type='screen'):
"""
Returns URL to this journal's CSS.
Parameters:
journal_name - *str* the name of the journal (as used in URLs)
type - *str* 'screen' or 'print', depending on the kind
of CSS
"""
config_strings = get_xml_from_config([type], journal_name)
css_path = ''
try:
css_path = config_strings["screen"][0]
except Exception:
register_exception(req=None,
suffix="No css file for journal %s. Is this right?" % \
journal_name)
return CFG_SITE_URL + '/' + css_path
def get_journal_submission_params(journal_name):
"""
Returns the (doctype, identifier element, identifier field) for
the submission of articles in this journal, so that it is possible
to build direct submission links.
Parameter:
journal_name - *str* the name of the journal (as used in URLs)
"""
doctype = ''
identifier_field = ''
identifier_element = ''
config_strings = get_xml_from_config(["submission/doctype"], journal_name)
if config_strings.get('submission/doctype', ''):
doctype = config_strings['submission/doctype'][0]
config_strings = get_xml_from_config(["submission/identifier_element"], journal_name)
if config_strings.get('submission/identifier_element', ''):
identifier_element = config_strings['submission/identifier_element'][0]
config_strings = get_xml_from_config(["submission/identifier_field"], journal_name)
if config_strings.get('submission/identifier_field', ''):
identifier_field = config_strings['submission/identifier_field'][0]
else:
identifier_field = '037__a'
return (doctype, identifier_element, identifier_field)
def get_journal_draft_keyword_to_remove(journal_name):
"""
Returns the keyword that should be removed from the article
metadata in order to move the article from Draft to Ready
"""
config_strings = get_xml_from_config(["draft_keyword"], journal_name)
if config_strings.get('draft_keyword', ''):
return config_strings['draft_keyword'][0]
return ''
def get_journal_alert_sender_email(journal_name):
"""
Returns the email address that should be used as send of the alert
email.
If not specified, use CFG_SITE_SUPPORT_EMAIL
"""
config_strings = get_xml_from_config(["alert_sender"], journal_name)
if config_strings.get('alert_sender', ''):
return config_strings['alert_sender'][0]
return CFG_SITE_SUPPORT_EMAIL
def get_journal_alert_recipient_email(journal_name):
"""
Returns the default email address of the recipients of the email
Return a string of comma-separated emails.
"""
if CFG_DEVEL_SITE:
# To be on the safe side, do not return the default alert recipients.
return ''
config_strings = get_xml_from_config(["alert_recipients"], journal_name)
if config_strings.get('alert_recipients', ''):
return config_strings['alert_recipients'][0]
return ''
def get_journal_collection_to_refresh_on_release(journal_name):
"""
Returns the list of collection to update (WebColl) upon release of
an issue.
"""
from invenio.search_engine import collection_reclist_cache
config_strings = get_xml_from_config(["update_on_release/collection"], journal_name)
return [coll for coll in config_strings.get('update_on_release/collection', []) if \
collection_reclist_cache.cache.has_key(coll)]
def get_journal_index_to_refresh_on_release(journal_name):
"""
Returns the list of indexed to update (BibIndex) upon release of
an issue.
"""
from invenio.bibindex_engine_utils import get_index_id_from_index_name
config_strings = get_xml_from_config(["update_on_release/index"], journal_name)
return [index for index in config_strings.get('update_on_release/index', []) if \
get_index_id_from_index_name(index) != '']
def get_journal_template(template, journal_name, ln=CFG_SITE_LANG):
"""
Returns the journal templates name for the given template type
Raise an exception if template cannot be found.
"""
from invenio.webjournal_config import \
InvenioWebJournalTemplateNotFoundError
config_strings = get_xml_from_config([template], journal_name)
try:
index_page_template = 'webjournal' + os.sep + \
config_strings[template][0]
except:
raise InvenioWebJournalTemplateNotFoundError(ln,
journal_name,
template)
return index_page_template
def get_journal_name_intl(journal_name, ln=CFG_SITE_LANG):
"""
Returns the nice name of the journal, translated if possible
"""
_ = gettext_set_language(ln)
config_strings = get_xml_from_config(["niceName"], journal_name)
if config_strings.get('niceName', ''):
return _(config_strings['niceName'][0])
return ''
def get_journal_languages(journal_name):
"""
Returns the list of languages defined for this journal
"""
config_strings = get_xml_from_config(["languages"], journal_name)
if config_strings.get('languages', ''):
return [ln.strip() for ln in \
config_strings['languages'][0].split(',')]
return []
def get_journal_issue_grouping(journal_name):
"""
Returns the number of issue that are typically released at the
same time.
This is used if every two weeks you release an issue that should
contains issue of next 2 weeks (eg. at week 16, you relase an
issue named '16-17/2009')
This number should help in the admin interface to guess how to
release the next issue (can be overidden by user).
"""
config_strings = get_xml_from_config(["issue_grouping"], journal_name)
if config_strings.get('issue_grouping', ''):
issue_grouping = config_strings['issue_grouping'][0]
if issue_grouping.isdigit() and int(issue_grouping) > 0:
return int(issue_grouping)
return 1
def get_journal_nb_issues_per_year(journal_name):
"""
Returns the default number of issues per year for this journal.
This number should help in the admin interface to guess the next
issue number (can be overidden by user).
"""
config_strings = get_xml_from_config(["issues_per_year"], journal_name)
if config_strings.get('issues_per_year', ''):
issues_per_year = config_strings['issues_per_year'][0]
if issues_per_year.isdigit() and int(issues_per_year) > 0:
return int(issues_per_year)
return 52
def get_journal_preferred_language(journal_name, ln):
"""
Returns the most adequate language to display the journal, given a
language.
"""
languages = get_journal_languages(journal_name)
if ln in languages:
return ln
elif CFG_SITE_LANG in languages:
return CFG_SITE_LANG
elif languages:
return languages
else:
return CFG_SITE_LANG
def get_unreleased_issue_hiding_mode(journal_name):
"""
Returns how unreleased issue should be treated. Can be one of the
following string values:
'future' - only future unreleased issues are hidden. Past
unreleased one can be viewed
'all' - any unreleased issue (past and future) have to be
hidden
- 'none' - no unreleased issue is hidden
"""
config_strings = get_xml_from_config(["hide_unreleased_issues"], journal_name)
if config_strings.get('hide_unreleased_issues', ''):
hide_unreleased_issues = config_strings['hide_unreleased_issues'][0]
if hide_unreleased_issues in ['future', 'all', 'none']:
return hide_unreleased_issues
return 'all'
def get_first_issue_from_config(journal_name):
"""
Returns the first issue as defined from config. This should only
be useful when no issue have been released.
If not specified, returns the issue made of current week number
and year.
"""
config_strings = get_xml_from_config(["first_issue"], journal_name)
if config_strings.has_key('first_issue'):
return config_strings['first_issue'][0]
return time.strftime("%W/%Y", time.localtime())
######################## TIME / ISSUE FUNCTIONS ######################
def get_current_issue(ln, journal_name):
"""
Returns the current issue of a journal as a string.
Current issue is the latest released issue.
"""
journal_id = get_journal_id(journal_name, ln)
try:
current_issue = run_sql("""SELECT issue_number
FROM jrnISSUE
WHERE date_released <= NOW()
AND id_jrnJOURNAL=%s
ORDER BY date_released DESC
LIMIT 1""",
(journal_id,))[0][0]
except:
# start the first journal ever
current_issue = get_first_issue_from_config(journal_name)
run_sql("""INSERT INTO jrnISSUE (id_jrnJOURNAL, issue_number, issue_display)
VALUES(%s, %s, %s)""",
(journal_id,
current_issue,
current_issue))
return current_issue
def get_all_released_issues(journal_name):
"""
Returns the list of released issue, ordered by release date
Note that it only includes the issues that are considered as
released in the DB: it will not for example include articles that
have been imported in the system but not been released
"""
journal_id = get_journal_id(journal_name)
res = run_sql("""SELECT issue_number
FROM jrnISSUE
WHERE id_jrnJOURNAL = %s
AND UNIX_TIMESTAMP(date_released) != 0
ORDER BY date_released DESC""",
(journal_id,))
if res:
return [row[0] for row in res]
else:
return []
def get_next_journal_issues(current_issue_number, journal_name, n=2):
"""
This function suggests the 'n' next issue numbers
"""
number, year = current_issue_number.split('/', 1)
number = int(number)
year = int(year)
number_issues_per_year = get_journal_nb_issues_per_year(journal_name)
next_issues = [make_issue_number(journal_name,
((number - 1 + i) % (number_issues_per_year)) + 1,
year + ((number - 1 + i) / number_issues_per_year)) \
for i in range(1, n + 1)]
return next_issues
def get_grouped_issues(journal_name, issue_number):
"""
Returns all the issues grouped with a given one.
Issues are sorted from the oldest to newest one.
"""
grouped_issues = []
journal_id = get_journal_id(journal_name, CFG_SITE_LANG)
issue_display = get_issue_number_display(issue_number, journal_name)
res = run_sql("""SELECT issue_number
FROM jrnISSUE
WHERE id_jrnJOURNAL=%s AND issue_display=%s""",
(journal_id,
issue_display))
if res:
grouped_issues = [row[0] for row in res]
grouped_issues.sort(compare_issues)
return grouped_issues
def compare_issues(issue1, issue2):
"""
Comparison function for issues.
Returns:
-1 if issue1 is older than issue2
0 if issues are equal
1 if issue1 is newer than issue2
"""
issue1_number, issue1_year = issue1.split('/', 1)
issue2_number, issue2_year = issue2.split('/', 1)
if int(issue1_year) == int(issue2_year):
return cmp(int(issue1_number), int(issue2_number))
else:
return cmp(int(issue1_year), int(issue2_year))
def issue_is_later_than(issue1, issue2):
"""
Returns true if issue1 is later than issue2
"""
issue_number1, issue_year1 = issue1.split('/', 1)
issue_number2, issue_year2 = issue2.split('/', 1)
if int(issue_year1) > int(issue_year2):
return True
elif int(issue_year1) == int(issue_year2):
return int(issue_number1) > int(issue_number2)
else:
return False
def get_issue_number_display(issue_number, journal_name,
ln=CFG_SITE_LANG):
"""
Returns the display string for a given issue number.
"""
journal_id = get_journal_id(journal_name, ln)
issue_display = run_sql("""SELECT issue_display
FROM jrnISSUE
WHERE issue_number=%s
AND id_jrnJOURNAL=%s""",
(issue_number, journal_id))
if issue_display:
return issue_display[0][0]
else:
# Not yet released...
return issue_number
def make_issue_number(journal_name, number, year, for_url_p=False):
"""
Creates a normalized issue number representation with given issue
number (as int or str) and year (as int or str).
Reverse the year and number if for_url_p is True
"""
number_issues_per_year = get_journal_nb_issues_per_year(journal_name)
precision = len(str(number_issues_per_year))
number = int(str(number))
year = int(str(year))
if for_url_p:
return ("%i/%0" + str(precision) + "i") % \
(year, number)
else:
return ("%0" + str(precision) + "i/%i") % \
(number, year)
def get_release_datetime(issue, journal_name, ln=CFG_SITE_LANG):
"""
Gets the date at which an issue was released from the DB.
Returns None if issue has not yet been released.
See issue_to_datetime() to get the *theoretical* release time of an
issue.
"""
journal_id = get_journal_id(journal_name, ln)
try:
release_date = run_sql("""SELECT date_released
FROM jrnISSUE
WHERE issue_number=%s
AND id_jrnJOURNAL=%s""",
(issue, journal_id))[0][0]
except:
return None
if release_date:
return release_date
else:
return None
def get_announcement_datetime(issue, journal_name, ln=CFG_SITE_LANG):
"""
Get the date at which an issue was announced through the alert system.
Return None if not announced
"""
journal_id = get_journal_id(journal_name, ln)
try:
announce_date = run_sql("""SELECT date_announced
FROM jrnISSUE
WHERE issue_number=%s
AND id_jrnJOURNAL=%s""",
(issue, journal_id))[0][0]
except:
return None
if announce_date:
return announce_date
else:
return None
def datetime_to_issue(issue_datetime, journal_name):
"""
Returns the issue corresponding to the given datetime object.
If issue_datetime is too far in the future or in the past, gives
the best possible matching issue, or None, if it does not seem to
exist.
#If issue_datetime is too far in the future, return the latest
#released issue.
#If issue_datetime is too far in the past, return None
Parameters:
issue_datetime - *datetime* date of the issue to be retrieved
journal_name - *str* the name of the journal (as used in URLs)
"""
issue_number = None
journal_id = get_journal_id(journal_name)
# Try to discover how much days an issue is valid
nb_issues_per_year = get_journal_nb_issues_per_year(journal_name)
this_year_number_of_days = 365
if calendar.isleap(issue_datetime.year):
this_year_number_of_days = 366
issue_day_lifetime = math.ceil(float(this_year_number_of_days)/nb_issues_per_year)
res = run_sql("""SELECT issue_number, date_released
FROM jrnISSUE
WHERE date_released < %s
AND id_jrnJOURNAL = %s
ORDER BY date_released DESC LIMIT 1""",
(issue_datetime, journal_id))
if res and res[0][1]:
issue_number = res[0][0]
issue_release_date = res[0][1]
# Check that the result is not too far in the future:
if issue_release_date + datetime.timedelta(issue_day_lifetime) < issue_datetime:
# In principle, the latest issue will no longer be valid
# at that time
return None
else:
# Mmh, are we too far in the past? This can happen in the case
# of articles that have been imported in the system but never
# considered as 'released' in the database. So we should still
# try to approximate/match an issue:
if round(issue_day_lifetime) in [6, 7, 8]:
# Weekly issues. We can use this information to better
# match the issue number
issue_nb = int(issue_datetime.strftime('%W')) # = week number
else:
# Compute the number of days since beginning of year, and
# divide by the lifetime of an issue: we get the
# approximate issue_number
issue_nb = math.ceil((int(issue_datetime.strftime('%j')) / issue_day_lifetime))
issue_number = ("%0" + str(len(str(nb_issues_per_year)))+ "i/%i") % (issue_nb, issue_datetime.year)
# Now check if this issue exists in the system for this
# journal
if not get_journal_categories(journal_name, issue_number):
# This issue did not exist
return None
return issue_number
DAILY = 1
WEEKLY = 2
MONTHLY = 3
def issue_to_datetime(issue_number, journal_name, granularity=None):
"""
Returns the *theoretical* date of release for given issue: useful
if you release on Friday, but the issue date of the journal
should correspond to the next Monday.
This will correspond to the next day/week/month, depending on the
number of issues per year (or the 'granularity' if specified) and
the release time (if close to the end of a period defined by the
granularity, consider next period since release is made a bit in
advance).
See get_release_datetime() for the *real* release time of an issue
THIS FUNCTION SHOULD ONLY BE USED FOR INFORMATIVE DISPLAY PURPOSE,
AS IT GIVES APPROXIMATIVE RESULTS. Do not use it to make decisions.
Parameters:
issue_number - *str* issue number to consider
journal_name - *str* the name of the journal (as used in URLs)
granularity - *int* the granularity to consider
"""
# If we have released, we can use this information. Otherwise we
# have to approximate.
issue_date = get_release_datetime(issue_number, journal_name)
if not issue_date:
# Approximate release date
number, year = issue_number.split('/')
number = int(number)
year = int(year)
nb_issues_per_year = get_journal_nb_issues_per_year(journal_name)
this_year_number_of_days = 365
if calendar.isleap(year):
this_year_number_of_days = 366
issue_day_lifetime = float(this_year_number_of_days)/nb_issues_per_year
# Compute from beginning of the year
issue_date = datetime.datetime(year, 1, 1) + \
datetime.timedelta(days=int(round((number - 1) * issue_day_lifetime)))
# Okay, but if last release is not too far in the past, better
# compute from the release.
current_issue = get_current_issue(CFG_SITE_LANG, journal_name)
current_issue_time = get_release_datetime(current_issue, journal_name)
if current_issue_time.year == issue_date.year:
current_issue_number, current_issue_year = current_issue.split('/')
current_issue_number = int(current_issue_number)
# Compute from last release
issue_date = current_issue_time + \
datetime.timedelta(days=int((number - current_issue_number) * issue_day_lifetime))
# If granularity is not specifed, deduce from config
if granularity is None:
nb_issues_per_year = get_journal_nb_issues_per_year(journal_name)
if nb_issues_per_year > 250:
granularity = DAILY
elif nb_issues_per_year > 40:
granularity = WEEKLY
else:
granularity = MONTHLY
# Now we can adapt the date to match the granularity
if granularity == DAILY:
if issue_date.hour >= 15:
# If released after 3pm, consider it is the issue of the next
# day
issue_date = issue_date + datetime.timedelta(days=1)
elif granularity == WEEKLY:
(year, week_nb, day_nb) = issue_date.isocalendar()
if day_nb > 4:
# If released on Fri, Sat or Sun, consider that it is next
# week's issue.
issue_date = issue_date + datetime.timedelta(weeks=1)
# Get first day of the week
issue_date = issue_date - datetime.timedelta(days=issue_date.weekday())
else:
if issue_date.day > 22:
# If released last week of the month, consider release for
# next month
issue_date = issue_date.replace(month=issue_date.month+1)
date_string = issue_date.strftime("%Y %m 1")
issue_date = datetime.datetime(*(time.strptime(date_string, "%Y %m %d")[0:6]))
return issue_date
def get_number_of_articles_for_issue(issue, journal_name, ln=CFG_SITE_LANG):
"""
Function that returns a dictionary with all categories and number of
articles in each category.
"""
all_articles = {}
categories = get_journal_categories(journal_name, issue)
for category in categories:
all_articles[category] = len(get_journal_articles(journal_name, issue, category))
return all_articles
########################## JOURNAL RELATED ###########################
def get_journal_info_path(journal_name):
"""
Returns the path to the info file of the given journal. The info
file should be used to get information about a journal when database
is not available.
Returns None if path cannot be determined
"""
# We must make sure we don't try to read outside of webjournal
# cache dir
info_path = os.path.abspath("%s/webjournal/%s/info.dat" % \
(CFG_CACHEDIR, journal_name))
if info_path.startswith(CFG_CACHEDIR + '/webjournal/'):
return info_path
else:
return None
def get_journal_article_cache_path(journal_name, issue):
"""
Returns the path to cache file of the articles of a given issue
Returns None if path cannot be determined
"""
# We must make sure we don't try to read outside of webjournal
# cache dir
issue_number, year = issue.replace('/', '_').split('_', 1)
cache_path = os.path.abspath("%s/webjournal/%s/%s/%s/articles_cache.dat" % \
(CFG_CACHEDIR, journal_name,
year, issue_number))
if cache_path.startswith(CFG_CACHEDIR + '/webjournal/'):
return cache_path
else:
return None
def get_journal_id(journal_name, ln=CFG_SITE_LANG):
"""
Get the id for this journal from the DB. If DB is down, try to get
from cache.
"""
journal_id = None
from invenio.webjournal_config import InvenioWebJournalJournalIdNotFoundDBError
if CFG_ACCESS_CONTROL_LEVEL_SITE == 2:
# do not connect to the database as the site is closed for
# maintenance:
journal_info_path = get_journal_info_path(journal_name)
try:
journal_info_file = open(journal_info_path, 'r')
journal_info = cPickle.load(journal_info_file)
journal_id = journal_info.get('journal_id', None)
except cPickle.PickleError, e:
journal_id = None
except IOError:
journal_id = None
except ValueError:
journal_id = None
else:
try:
res = run_sql("SELECT id FROM jrnJOURNAL WHERE name=%s",
(journal_name,))
if len(res) > 0:
journal_id = res[0][0]
except OperationalError, e:
# Cannot connect to database. Try to read from cache
journal_info_path = get_journal_info_path(journal_name)
try:
journal_info_file = open(journal_info_path, 'r')
journal_info = cPickle.load(journal_info_file)
journal_id = journal_info['journal_id']
except cPickle.PickleError, e:
journal_id = None
except IOError:
journal_id = None
except ValueError:
journal_id = None
if journal_id is None:
raise InvenioWebJournalJournalIdNotFoundDBError(ln, journal_name)
return journal_id
def guess_journal_name(ln, journal_name=None):
"""
Tries to take a guess what a user was looking for on the server if
not providing a name for the journal, or if given journal name
does not match case of original journal.
"""
from invenio.webjournal_config import InvenioWebJournalNoJournalOnServerError
from invenio.webjournal_config import InvenioWebJournalNoNameError
journals_id_and_names = get_journals_ids_and_names()
if len(journals_id_and_names) == 0:
raise InvenioWebJournalNoJournalOnServerError(ln)
elif not journal_name and \
journals_id_and_names[0].has_key('journal_name'):
return journals_id_and_names[0]['journal_name']
elif len(journals_id_and_names) > 0:
possible_journal_names = [journal_id_and_name['journal_name'] for journal_id_and_name \
in journals_id_and_names \
if journal_id_and_name.get('journal_name', '').lower() == journal_name.lower()]
if possible_journal_names:
return possible_journal_names[0]
else:
raise InvenioWebJournalNoNameError(ln)
else:
raise InvenioWebJournalNoNameError(ln)
def get_journals_ids_and_names():
"""
Returns the list of existing journals IDs and names. Try to read
from the DB, or from cache if DB is not accessible.
"""
journals = []
if CFG_ACCESS_CONTROL_LEVEL_SITE == 2:
# do not connect to the database as the site is closed for
# maintenance:
files = os.listdir("%s/webjournal" % CFG_CACHEDIR)
info_files = [path + os.sep + 'info.dat' for path in files if \
os.path.isdir(path) and \
os.path.exists(path + os.sep + 'info.dat')]
for info_file in info_files:
try:
journal_info_file = open(info_file, 'r')
journal_info = cPickle.load(journal_info_file)
journal_id = journal_info.get('journal_id', None)
journal_name = journal_info.get('journal_name', None)
current_issue = journal_info.get('current_issue', None)
if journal_id is not None and \
journal_name is not None:
journals.append({'journal_id': journal_id,
'journal_name': journal_name,
'current_issue': current_issue})
except cPickle.PickleError, e:
# Well, can't do anything...
continue
except IOError:
# Well, can't do anything...
continue
except ValueError:
continue
else:
try:
res = run_sql("SELECT id, name FROM jrnJOURNAL ORDER BY id")
for journal_id, journal_name in res:
journals.append({'journal_id': journal_id,
'journal_name': journal_name})
except OperationalError, e:
# Cannot connect to database. Try to read from cache
files = os.listdir("%s/webjournal" % CFG_CACHEDIR)
info_files = [path + os.sep + 'info.dat' for path in files if \
os.path.isdir(path) and \
os.path.exists(path + os.sep + 'info.dat')]
for info_file in info_files:
try:
journal_info_file = open(info_file, 'r')
journal_info = cPickle.load(journal_info_file)
journal_id = journal_info.get('journal_id', None)
journal_name = journal_info.get('journal_name', None)
current_issue = journal_info.get('current_issue', None)
if journal_id is not None and \
journal_name is not None:
journals.append({'journal_id': journal_id,
'journal_name': journal_name,
'current_issue': current_issue})
except cPickle.PickleError, e:
# Well, can't do anything...
continue
except IOError:
# Well, can't do anything...
continue
except ValueError:
continue
return journals
def parse_url_string(uri):
"""
Centralized function to parse any url string given in
webjournal. Useful to retrieve current category, journal,
etc. from within format elements
The webjournal interface handler should already have cleaned the
URI beforehand, so that journal name exist, issue number is
correct, etc. The only remaining problem might be due to the
capitalization of journal name in contact, search and popup pages,
so clean the journal name. Note that language is also as returned
from the URL, which might need to be filtered to match available
languages (WebJournal elements can rely in bfo.lang to retrieve
washed language)
returns:
args: all arguments in dict form
"""
args = {'journal_name' : '',
'issue_year' : '',
'issue_number' : None,
'issue' : None,
'category' : '',
'recid' : -1,
'verbose' : 0,
'ln' : CFG_SITE_LANG,
'archive_year' : None,
'archive_search': ''}
if not uri.startswith('/journal'):
# Mmh, incorrect context. Still, keep language if available
url_params = urlparse(uri)[4]
args['ln'] = dict([part.split('=') for part in url_params.split('&') \
if len(part.split('=')) == 2]).get('ln', CFG_SITE_LANG)
return args
# Take everything after journal and before first question mark
splitted_uri = uri.split('journal', 1)
second_part = splitted_uri[1]
splitted_uri = second_part.split('?')
uri_middle_part = splitted_uri[0]
uri_arguments = ''
if len(splitted_uri) > 1:
uri_arguments = splitted_uri[1]
arg_list = uri_arguments.split("&")
args['ln'] = CFG_SITE_LANG
args['verbose'] = 0
for arg_pair in arg_list:
arg_and_value = arg_pair.split('=')
if len(arg_and_value) == 2:
if arg_and_value[0] == 'ln':
args['ln'] = arg_and_value[1]
elif arg_and_value[0] == 'verbose' and \
arg_and_value[1].isdigit():
args['verbose'] = int(arg_and_value[1])
elif arg_and_value[0] == 'archive_year' and \
arg_and_value[1].isdigit():
args['archive_year'] = int(arg_and_value[1])
elif arg_and_value[0] == 'archive_search':
args['archive_search'] = arg_and_value[1]
elif arg_and_value[0] == 'name':
args['journal_name'] = guess_journal_name(args['ln'],
arg_and_value[1])
arg_list = uri_middle_part.split("/")
if len(arg_list) > 1 and arg_list[1] not in ['search', 'contact', 'popup']:
args['journal_name'] = urllib.unquote(arg_list[1])
elif arg_list[1] not in ['search', 'contact', 'popup']:
args['journal_name'] = guess_journal_name(args['ln'],
args['journal_name'])
cur_issue = get_current_issue(args['ln'], args['journal_name'])
if len(arg_list) > 2:
try:
args['issue_year'] = int(urllib.unquote(arg_list[2]))
except:
args['issue_year'] = int(cur_issue.split('/')[1])
else:
args['issue'] = cur_issue
args['issue_year'] = int(cur_issue.split('/')[1])
args['issue_number'] = int(cur_issue.split('/')[0])
if len(arg_list) > 3:
try:
args['issue_number'] = int(urllib.unquote(arg_list[3]))
except:
args['issue_number'] = int(cur_issue.split('/')[0])
args['issue'] = make_issue_number(args['journal_name'],
args['issue_number'],
args['issue_year'])
if len(arg_list) > 4:
args['category'] = urllib.unquote(arg_list[4])
if len(arg_list) > 5:
try:
args['recid'] = int(urllib.unquote(arg_list[5]))
except:
pass
args['ln'] = get_journal_preferred_language(args['journal_name'],
args['ln'])
# FIXME : wash arguments?
return args
def make_journal_url(current_uri, custom_parameters=None):
"""
Create a URL, using the current URI and overriding values
with the given custom_parameters
Parameters:
current_uri - *str* the current full URI
custom_parameters - *dict* a dictionary of parameters that
should override those of curent_uri
"""
if not custom_parameters:
custom_parameters = {}
default_params = parse_url_string(current_uri)
for key, value in custom_parameters.iteritems():
# Override default params with custom params
default_params[key] = str(value)
uri = CFG_SITE_URL + '/journal/'
if default_params['journal_name']:
uri += urllib.quote(default_params['journal_name']) + '/'
if default_params['issue_year'] and default_params['issue_number']:
uri += make_issue_number(default_params['journal_name'],
default_params['issue_number'],
default_params['issue_year'],
for_url_p=True) + '/'
if default_params['category']:
uri += urllib.quote(default_params['category'])
if default_params['recid'] and \
default_params['recid'] != -1:
uri += '/' + str(default_params['recid'])
printed_question_mark = False
if default_params['ln']:
uri += '?ln=' + default_params['ln']
printed_question_mark = True
if default_params['verbose'] != 0:
if printed_question_mark:
uri += '&amp;verbose=' + str(default_params['verbose'])
else:
uri += '?verbose=' + str(default_params['verbose'])
return uri
############################ HTML CACHING FUNCTIONS ############################
def cache_index_page(html, journal_name, category, issue, ln):
"""
Caches the index page main area of a Bulletin
(right hand menu cannot be cached)
@return: tuple (path to cache file (or None), message)
"""
issue = issue.replace("/", "_")
issue_number, year = issue.split("_", 1)
category = category.replace(" ", "")
cache_path = os.path.abspath('%s/webjournal/%s/%s/%s/index_%s_%s.html' % \
(CFG_CACHEDIR, journal_name,
year, issue_number, category,
ln))
if not cache_path.startswith(CFG_CACHEDIR + '/webjournal'):
# Mmh, not accessing correct path. Stop caching
return (None, 'Trying to cache at wrong location: %s' % cache_path)
cache_path_dir = os.path.dirname(cache_path)
try:
if not os.path.isdir(cache_path_dir):
os.makedirs(cache_path_dir)
cached_file = open(cache_path, "w")
cached_file.write(html)
cached_file.close()
except Exception, e:
register_exception(req=None,
prefix="Could not store index page cache",
alert_admin=True)
return (None, e)
return (cache_path, '')
def get_index_page_from_cache(journal_name, category, issue, ln):
"""
Function to get an index page from the cache.
False if not in cache.
"""
issue = issue.replace("/", "_")
issue_number, year = issue.split("_", 1)
category = category.replace(" ", "")
cache_path = os.path.abspath('%s/webjournal/%s/%s/%s/index_%s_%s.html' % \
(CFG_CACHEDIR, journal_name,
year, issue_number, category, ln))
if not cache_path.startswith(CFG_CACHEDIR + '/webjournal'):
# Mmh, not accessing correct path. Stop reading cache
return False
try:
cached_file = open(cache_path).read()
except:
return False
return cached_file
def cache_article_page(html, journal_name, category, recid, issue, ln):
"""
Caches an article view of a journal.
If cache cannot be written, a warning is reported to the admin.
@return: tuple (path to cache file (or None), message)
"""
issue = issue.replace("/", "_")
issue_number, year = issue.split("_", 1)
category = category.replace(" ", "")
cache_path = os.path.abspath('%s/webjournal/%s/%s/%s/article_%s_%s_%s.html' % \
(CFG_CACHEDIR, journal_name,
year, issue_number, category, recid, ln))
if not cache_path.startswith(CFG_CACHEDIR + '/webjournal'):
# Mmh, not accessing correct path. Stop caching
return (None, 'Trying to cache at wrong location: %s' % cache_path)
cache_path_dir = os.path.dirname(cache_path)
try:
if not os.path.isdir(cache_path_dir):
os.makedirs(cache_path_dir)
cached_file = open(cache_path, "w")
cached_file.write(html)
cached_file.close()
except Exception, e:
register_exception(req=None,
prefix="Could not store article cache",
alert_admin=True)
return (None, e)
return (cache_path_dir, '')
NOT_FOR_ALERT_COMMENTS_RE = re.compile('<!--\s*START_NOT_FOR_ALERT\s*-->.*?<!--\s*END_NOT_FOR_ALERT\s*-->', re.IGNORECASE | re.DOTALL)
def get_article_page_from_cache(journal_name, category, recid, issue, ln, bfo=None):
"""
Gets an article view of a journal from cache.
False if not in cache.
"""
issue = issue.replace("/", "_")
issue_number, year = issue.split("_", 1)
category = category.replace(" ", "")
cache_path = os.path.abspath('%s/webjournal/%s/%s/%s/article_%s_%s_%s.html' % \
(CFG_CACHEDIR, journal_name,
year, issue_number, category, recid, ln))
if not cache_path.startswith(CFG_CACHEDIR + '/webjournal'):
# Mmh, not accessing correct path. Stop reading cache
return False
try:
cached_file = open(cache_path).read()
except:
return False
if CFG_CERN_SITE and bfo:
try:
from invenio.modules.formatter.format_elements import bfe_webjournal_cern_toolbar
cached_file = NOT_FOR_ALERT_COMMENTS_RE.sub(bfe_webjournal_cern_toolbar.format_element(bfo), cached_file, 1)
except ImportError, e:
pass
return cached_file
def clear_cache_for_article(journal_name, category, recid, issue):
"""
Resets the cache for an article (e.g. after an article has been
modified)
"""
issue = issue.replace("/", "_")
issue_number, year = issue.split("_", 1)
category = category.replace(" ", "")
cache_path = os.path.abspath('%s/webjournal/%s/' %
(CFG_CACHEDIR, journal_name))
if not cache_path.startswith(CFG_CACHEDIR + '/webjournal'):
# Mmh, not accessing correct path. Stop deleting cache
return False
# try to delete the article cached file
try:
os.remove('%s/webjournal/%s/%s/%s/article_%s_%s_en.html' %
(CFG_CACHEDIR, journal_name, year, issue_number, category, recid))
except:
pass
try:
os.remove('%s/webjournal/%s/%s/%s/article_%s_%s_fr.html' %
(CFG_CACHEDIR, journal_name, year, issue_number, category, recid))
except:
pass
# delete the index page for the category
try:
os.remove('%s/webjournal/%s/%s/%s/index_%s_en.html'
% (CFG_CACHEDIR, journal_name, year, issue_number, category))
except:
pass
try:
os.remove('%s/webjournal/%s/%s/%s/index_%s_fr.html'
% (CFG_CACHEDIR, journal_name, year, issue_number, category))
except:
pass
try:
path = get_journal_article_cache_path(journal_name, issue)
os.remove(path)
except:
pass
return True
def clear_cache_for_issue(journal_name, issue):
"""
clears the cache of a whole issue.
"""
issue = issue.replace("/", "_")
issue_number, year = issue.split("_", 1)
cache_path_dir = os.path.abspath('%s/webjournal/%s/%s/%s/' % \
(CFG_CACHEDIR, journal_name,
year, issue_number))
if not cache_path_dir.startswith(CFG_CACHEDIR + '/webjournal'):
# Mmh, not accessing correct path. Stop deleting cache
return False
all_cached_files = os.listdir(cache_path_dir)
for cached_file in all_cached_files:
try:
os.remove(cache_path_dir + '/' + cached_file)
except:
return False
return True
######################### CERN SPECIFIC FUNCTIONS #################
def get_recid_from_legacy_number(issue_number, category, number):
"""
Returns the recid based on the issue number, category and
'number'.
This is used to support URLs using the now deprecated 'number'
argument. The function tries to reproduce the behaviour of the
old way of doing, even keeping some of its 'problems' (so that we
reach the same article as before with a given number)..
Returns the recid as int, or -1 if not found
"""
recids = []
if issue_number[0] == "0":
alternative_issue_number = issue_number[1:]
recids = list(search_pattern(p='65017a:"%s" and 773__n:%s' %
(category, issue_number)))
recids.extend(list(search_pattern(p='65017a:"%s" and 773__n:%s' %
(category, alternative_issue_number))))
else:
recids = list(search_pattern(p='65017:"%s" and 773__n:%s' %
(category, issue_number)))
# Now must order the records and pick the one at index 'number'.
# But we have to take into account that there can be multiple
# records at position 1, and that these additional records should
# be numbered with negative numbers:
# 1, 1, 1, 2, 3 -> 1, -1, -2, 2, 3...
negative_index_records = {}
positive_index_records = {}
# Fill in 'negative_index_records' and 'positive_index_records'
# lists with the following loop
for recid in recids:
bfo = BibFormatObject(recid)
order = [subfield['c'] for subfield in bfo.fields('773__') if \
issue_number in subfield.get('n', '')]
if len(order) > 0:
# If several orders are defined for the same article and
# the same issue, keep the first one
order = order[0]
if order.isdigit():
# Order must be an int. Otherwise skip
order = int(order)
if order == 1 and positive_index_records.has_key(1):
# This is then a negative number for this record
index = (len(negative_index_records.keys()) > 0 and \
min(negative_index_records.keys()) -1) or 0
negative_index_records[index] = recid
else:
# Positive number for this record
if not positive_index_records.has_key(order):
positive_index_records[order] = recid
else:
# We make the assumption that we cannot have
# twice the same position for two
# articles. Previous WebJournal module was not
# clear about that. Just drop this record
# (better than crashing or looping forever..)
pass
recid_to_return = -1
# Ok, we can finally pick the recid corresponding to 'number'
if number <= 0:
negative_indexes = negative_index_records.keys()
negative_indexes.sort()
negative_indexes.reverse()
if len(negative_indexes) > abs(number):
recid_to_return = negative_index_records[negative_indexes[abs(number)]]
else:
if positive_index_records.has_key(number):
recid_to_return = positive_index_records[number]
return recid_to_return
def is_recid_in_released_issue(recid):
"""
Returns True if recid is part of the latest issue of the given
journal.
WARNING: the function does not check that the article does not
belong to the draft collection of the record. This is wanted, in
order to workaround the time needed for a record to go from the
draft collection to the final collection
"""
bfo = BibFormatObject(recid)
journal_name = ''
journal_names = [journal_name for journal_name in bfo.fields('773__t') if journal_name]
if journal_names:
journal_name = journal_names[0]
else:
return False
existing_journal_names = [o['journal_name'] for o in get_journals_ids_and_names()]
if not journal_name in existing_journal_names:
# Try to remove whitespace
journal_name = journal_name.replace(' ', '')
if not journal_name in existing_journal_names:
# Journal name unknown from WebJournal
return False
config_strings = get_xml_from_config(["draft_image_access_policy"], journal_name)
if config_strings['draft_image_access_policy'] and \
config_strings['draft_image_access_policy'][0] != 'allow':
# The journal does not want to optimize access to images
return False
article_issues = bfo.fields('773__n')
current_issue = get_current_issue(CFG_SITE_LANG, journal_name)
for article_issue in article_issues:
# Check each issue until a released one is found
if get_release_datetime(article_issue, journal_name):
# Release date exists, issue has been released
return True
else:
# Unreleased issue. Do we still allow based on journal config?
unreleased_issues_mode = get_unreleased_issue_hiding_mode(journal_name)
if (unreleased_issues_mode == 'none' or \
(unreleased_issues_mode == 'future' and \
not issue_is_later_than(article_issue, current_issue))):
return True
return False
diff --git a/invenio/legacy/webjournal/web/admin/webjournaladmin.py b/invenio/legacy/webjournal/web/admin/webjournaladmin.py
index eb69f0556..abfe530cc 100644
--- a/invenio/legacy/webjournal/web/admin/webjournaladmin.py
+++ b/invenio/legacy/webjournal/web/admin/webjournaladmin.py
@@ -1,389 +1,389 @@
## This file is part of Invenio.
## Copyright (C) 2008, 2009, 2010, 2011, 2012 CERN.
##
## Invenio is free software; you can redistribute it and/or
## modify it under the terms of the GNU General Public License as
## published by the Free Software Foundation; either version 2 of the
## License, or (at your option) any later version.
##
## Invenio is distributed in the hope that it will be useful, but
## WITHOUT ANY WARRANTY; without even the implied warranty of
## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
## General Public License for more details.
##
## You should have received a copy of the GNU General Public License
## along with Invenio; if not, write to the Free Software Foundation, Inc.,
## 59 Temple Place, Suite 330, Boston, MA 02111-1307, USA.
"""Invenio WebJournal Administrator Interface."""
__revision__ = "$Id$"
__lastupdated__ = """$Date$"""
import invenio.webjournaladminlib as wjn
from invenio.access_control_engine import acc_authorize_action
from invenio.webpage import page, error_page
from invenio.config import CFG_SITE_URL, CFG_SITE_LANG, CFG_SITE_NAME
from invenio.webuser import getUid, page_not_authorized
from invenio.base.i18n import wash_language, gettext_set_language
from invenio.utils.url import wash_url_argument
-from invenio.errorlib import register_exception
+from invenio.ext.logging import register_exception
from invenio.webjournal_config import \
InvenioWebJournalNoJournalOnServerError, \
InvenioWebJournalNoNameError, \
InvenioWebJournalNoCurrentIssueError, \
InvenioWebJournalIssueNumberBadlyFormedError, \
InvenioWebJournalJournalIdNotFoundDBError
from invenio.webjournal_washer import \
wash_journal_name, \
wash_issue_number
def index(req, ln=CFG_SITE_LANG, journal_name=None, action=""):
"""
Main administration page.
Lists the journals, and offers options to edit them, delete them
or add new journals
"""
navtrail_previous_links = wjn.getnavtrail()
ln = wash_language(ln)
_ = gettext_set_language(ln)
try:
uid = getUid(req)
except:
return error_page('Error', req)
try:
journal_name = wash_journal_name(ln, journal_name)
action = wash_url_argument(action, 'str')
except InvenioWebJournalNoJournalOnServerError, e:
# Ok, no journal. Let the admin add one...
pass
except InvenioWebJournalNoNameError, e:
register_exception(req=req)
return e.user_box()
if action in ['delete', 'askDelete']:
# To perform these, one must be authorized
auth = acc_authorize_action(getUid(req), 'cfgwebjournal',
name=journal_name, with_editor_rights='yes')
else:
auth = acc_authorize_action(getUid(req), 'cfgwebjournal')
if auth[0] == 0:
return page(title=_('WebJournal Admin'),
body=wjn.perform_index(ln=ln,
journal_name=journal_name,
action=action,
uid=getUid(req)),
uid=uid,
language=ln,
req=req,
navtrail = navtrail_previous_links,
lastupdated=__lastupdated__)
else:
return page_not_authorized(req=req, text=auth[1], navtrail=navtrail_previous_links)
def administrate(req, journal_name, ln=CFG_SITE_LANG):
"""
Shows the settings of a journal
"""
navtrail_previous_links = wjn.getnavtrail(' &gt; <a class="navtrail" href="%s/admin/webjournal/webjournaladmin.py">WebJournal Admin</a>' % CFG_SITE_URL)
ln = wash_language(ln)
_ = gettext_set_language(ln)
try:
uid = getUid(req)
except:
return error_page('Error', req)
try:
journal_name = wash_journal_name(ln, journal_name)
except InvenioWebJournalNoJournalOnServerError, e:
register_exception(req=req)
return e.user_box()
except InvenioWebJournalNoNameError, e:
register_exception(req=req)
return e.user_box()
auth = acc_authorize_action(getUid(req), 'cfgwebjournal',
name="%s" % journal_name)
if auth[0] == 0:
as_editor = acc_authorize_action(getUid(req), 'cfgwebjournal',
name="%s" % journal_name,
with_editor_rights='yes')[0] == 0
return page(title=_('Administrate %(journal_name)s' % {'journal_name':journal_name}),
body=wjn.perform_administrate(ln=ln, journal_name=journal_name,
as_editor=as_editor),
uid=uid,
language=ln,
req=req,
navtrail = navtrail_previous_links,
lastupdated=__lastupdated__)
else:
return page_not_authorized(req=req, text=auth[1], navtrail=navtrail_previous_links)
def feature_record(req, journal_name="", recid="", img_url="", ln=CFG_SITE_LANG, action=""):
"""
Interface to feature a record. Will be saved in a flat file.
"""
navtrail_previous_links = wjn.getnavtrail(' &gt; <a class="navtrail" href="%s/admin/webjournal/webjournaladmin.py">WebJournal Admin</a> &gt; <a class="navtrail" href="%s/admin/webjournal/webjournaladmin.py/administrate?journal_name=%s">%s</a>' % (CFG_SITE_URL, CFG_SITE_URL, journal_name, journal_name))
ln = wash_language(ln)
_ = gettext_set_language(ln)
try:
uid = getUid(req)
except:
return error_page('Error', req)
try:
journal_name = wash_journal_name(ln, journal_name)
except InvenioWebJournalNoJournalOnServerError, e:
register_exception(req=req)
return e.user_box()
except InvenioWebJournalNoNameError, e:
register_exception(req=req)
return e.user_box()
auth = acc_authorize_action(getUid(req), 'cfgwebjournal',
name="%s" % journal_name,
with_editor_rights='yes')
if auth[0] == 0:
return page(title=_("Feature a record"),
body=wjn.perform_feature_record(ln=ln,
journal_name=journal_name,
recid=recid,
img_url=img_url,
action=action),
uid=uid,
language=ln,
req=req,
navtrail = navtrail_previous_links,
lastupdated=__lastupdated__)
else:
return page_not_authorized(req=req, text=auth[1], navtrail=navtrail_previous_links)
def alert(req, journal_name="", ln=CFG_SITE_LANG, sent="False", plainText=u"",
htmlMail="", recipients="", subject="", issue="", force="False"):
"""
Sends an email alert, in HTML/PlainText or only PlainText to a mailing
list to alert for new journal releases.
"""
navtrail_previous_links = wjn.getnavtrail(' &gt; <a class="navtrail" href="%s/admin/webjournal/webjournaladmin.py">WebJournal Admin</a> &gt; <a class="navtrail" href="%s/admin/webjournal/webjournaladmin.py/administrate?journal_name=%s">%s</a>' % (CFG_SITE_URL, CFG_SITE_URL, journal_name, journal_name))
ln = wash_language(ln)
_ = gettext_set_language(ln)
try:
uid = getUid(req)
except:
return error_page('Error', req)
try:
journal_name = wash_journal_name(ln, journal_name)
issue = wash_issue_number(ln,
journal_name,
issue)
plain_text = wash_url_argument(plainText, 'str')
html_mail = wash_url_argument(htmlMail, 'str')
recipients = wash_url_argument(recipients, 'str')
subject = wash_url_argument(subject, 'str')
sent = wash_url_argument(sent, 'str')
force = wash_url_argument(force, 'str')
except InvenioWebJournalNoJournalOnServerError, e:
register_exception(req=req)
return e.user_box()
except InvenioWebJournalNoNameError, e:
register_exception(req=req)
return e.user_box()
except InvenioWebJournalNoCurrentIssueError, e:
register_exception(req=req)
return e.user_box()
except InvenioWebJournalIssueNumberBadlyFormedError, e:
register_exception(req=req)
return e.user_box()
except InvenioWebJournalJournalIdNotFoundDBError, e:
register_exception(req=req)
return e.user_box()
auth = acc_authorize_action(getUid(req), 'cfgwebjournal',
name="%s" % journal_name,
with_editor_rights='yes')
if auth[0] == 0:
return page(title=_("Email Alert System"),
body=wjn.perform_request_alert(journal_name=journal_name,
issue=issue,
ln=ln,
sent=sent,
plain_text=plain_text,
subject=subject,
recipients=recipients,
html_mail=html_mail,
force=force),
uid=uid,
language=ln,
req=req,
navtrail = navtrail_previous_links,
lastupdated=__lastupdated__)
else:
return page_not_authorized(req=req, text=auth[1], navtrail=navtrail_previous_links)
def regenerate(req, journal_name="", issue="", ln=CFG_SITE_LANG,
confirmed_p="", publish_draft_articles_p=""):
"""
Clears the cache for the given issue.
"""
navtrail_previous_links = wjn.getnavtrail(' &gt; <a class="navtrail" href="%s/admin/webjournal/webjournaladmin.py">WebJournal Admin</a> &gt; <a class="navtrail" href="%s/admin/webjournal/webjournaladmin.py/administrate?journal_name=%s">%s</a>' % (CFG_SITE_URL, CFG_SITE_URL, journal_name, journal_name))
ln = wash_language(ln)
_ = gettext_set_language(ln)
try:
uid = getUid(req)
except:
return error_page('Error', req)
try:
journal_name = wash_journal_name(ln, journal_name)
issue_number = wash_issue_number(ln, journal_name,
issue)
confirmed_p = wash_url_argument(confirmed_p, 'str') == "confirmed"
publish_draft_articles_p = wash_url_argument(publish_draft_articles_p, 'str') == "move"
except InvenioWebJournalNoJournalOnServerError, e:
register_exception(req=req)
return e.user_box()
except InvenioWebJournalNoNameError, e:
register_exception(req=req)
return e.user_box()
except InvenioWebJournalNoCurrentIssueError, e:
register_exception(req=req)
return e.user_box()
except InvenioWebJournalIssueNumberBadlyFormedError, e:
register_exception(req=req)
return e.user_box()
auth = acc_authorize_action(getUid(req), 'cfgwebjournal',
name="%s" % journal_name)
if auth[0] == 0:
return page(title=confirmed_p and _("Issue regenerated") or _("Regenerate Issue"),
body=wjn.perform_regenerate_issue(ln=ln,
journal_name=journal_name,
issue=issue,
confirmed_p=confirmed_p,
publish_draft_articles_p=publish_draft_articles_p),
uid=uid,
language=ln,
req=req,
navtrail = navtrail_previous_links,
lastupdated=__lastupdated__)
else:
return page_not_authorized(req=req, text=auth[1], navtrail=navtrail_previous_links)
def issue_control(req, journal_name="", issue=[],
ln=CFG_SITE_LANG, action="cfg"):
"""
Page that allows full control over creating, backtracing, adding to,
removing from issues.
"""
navtrail_previous_links = wjn.getnavtrail(' &gt; <a class="navtrail" href="%s/admin/webjournal/webjournaladmin.py">WebJournal Admin</a> &gt; <a class="navtrail" href="%s/admin/webjournal/webjournaladmin.py/administrate?journal_name=%s">%s</a>' % (CFG_SITE_URL, CFG_SITE_URL, journal_name, journal_name))
ln = wash_language(ln)
_ = gettext_set_language(ln)
try:
uid = getUid(req)
except:
return error_page('Error', req)
try:
journal_name = wash_journal_name(ln, journal_name)
action = wash_url_argument(action, 'str')
issue = wash_url_argument(issue, 'list')
issues = [wash_issue_number(ln,journal_name, _issue) \
for _issue in issue \
if _issue != "ww/YYYY"]
except InvenioWebJournalNoJournalOnServerError, e:
register_exception(req=req)
return e.user_box()
except InvenioWebJournalNoNameError, e:
register_exception(req=req)
return e.user_box()
except InvenioWebJournalNoCurrentIssueError, e:
register_exception(req=req)
return e.user_box()
except InvenioWebJournalIssueNumberBadlyFormedError, e:
register_exception(req=req)
return e.user_box()
auth = acc_authorize_action(getUid(req), 'cfgwebjournal',
name="%s" % journal_name,
with_editor_rights='yes')
if auth[0] == 0:
return page(title=_("Publishing Interface"),
body=wjn.perform_request_issue_control(journal_name=journal_name,
issues=issues,
ln=ln,
action=action),
uid=uid,
language=ln,
req=req,
navtrail = navtrail_previous_links,
lastupdated=__lastupdated__)
else:
return page_not_authorized(req=req, text=auth[1], navtrail=navtrail_previous_links)
def configure(req, journal_name=None, ln=CFG_SITE_LANG, xml_config=u'', action='edit'):
"""
Let admins configure the journal settings
"""
ln = wash_language(ln)
_ = gettext_set_language(ln)
if journal_name is None:
navtrail_previous_links = wjn.getnavtrail(' &gt; <a class="navtrail" href="%s/admin/webjournal/webjournaladmin.py">WebJournal Admin</a>' % CFG_SITE_URL)
else:
navtrail_previous_links = wjn.getnavtrail(' &gt; <a class="navtrail" href="%s/admin/webjournal/webjournaladmin.py">WebJournal Admin</a> &gt; <a class="navtrail" href="%s/admin/webjournal/webjournaladmin.py/administrate?journal_name=%s">%s</a>' % (CFG_SITE_URL, CFG_SITE_URL, journal_name, journal_name))
if action in ['add', 'addDone']:
page_title = _('Add Journal')
else:
page_title = _("Edit Settings")
try:
uid = getUid(req)
except:
return error_page('Error', req)
try:
journal_name = wash_journal_name(ln, journal_name, guess=False)
xml_config = wash_url_argument(xml_config, 'str')
action = wash_url_argument(action, 'str')
except InvenioWebJournalNoJournalOnServerError, e:
# Ok, no journal. Let the admin add one...
pass
except InvenioWebJournalNoNameError, e:
register_exception(req=req)
return e.user_box()
auth = acc_authorize_action(getUid(req), 'cfgwebjournal',
name="%s" % journal_name,
with_editor_rights='yes')
if auth[0] == 0:
return page(title=page_title,
body=wjn.perform_request_configure(journal_name=journal_name,
ln=ln,
xml_config=xml_config,
action=action),
uid=uid,
language=ln,
req=req,
navtrail = navtrail_previous_links,
lastupdated=__lastupdated__)
else:
return page_not_authorized(req=req, text=auth[1], navtrail=navtrail_previous_links)
diff --git a/invenio/legacy/webjournal/webinterface.py b/invenio/legacy/webjournal/webinterface.py
index 6dcb3425f..04f630441 100644
--- a/invenio/legacy/webjournal/webinterface.py
+++ b/invenio/legacy/webjournal/webinterface.py
@@ -1,552 +1,552 @@
# -*- coding: utf-8 -*-
##
## This file is part of Invenio.
## Copyright (C) 2007, 2008, 2009, 2010, 2011 CERN.
##
## Invenio is free software; you can redistribute it and/or
## modify it under the terms of the GNU General Public License as
## published by the Free Software Foundation; either version 2 of the
## License, or (at your option) any later version.
##
## Invenio is distributed in the hope that it will be useful, but
## WITHOUT ANY WARRANTY; without even the implied warranty of
## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
## General Public License for more details.
##
## You should have received a copy of the GNU General Public License
## along with Invenio; if not, write to the Free Software Foundation, Inc.,
## 59 Temple Place, Suite 330, Boston, MA 02111-1307, USA.
"""WebJournal Web Interface."""
__revision__ = "$Id$"
__lastupdated__ = """$Date$"""
import urllib
from invenio.webinterface_handler import wash_urlargd, WebInterfaceDirectory
from invenio.access_control_engine import acc_authorize_action
from invenio.config import \
CFG_SITE_URL, \
CFG_SITE_SECURE_URL, \
CFG_SITE_LANG, \
CFG_CERN_SITE
from invenio.webuser import getUid
from invenio.utils.url import redirect_to_url
-from invenio.errorlib import register_exception
+from invenio.ext.logging import register_exception
from invenio.webjournal_config import \
InvenioWebJournalNoJournalOnServerError, \
InvenioWebJournalNoNameError, \
InvenioWebJournalNoCurrentIssueError, \
InvenioWebJournalIssueNumberBadlyFormedError, \
InvenioWebJournalArchiveDateWronglyFormedError, \
InvenioWebJournalJournalIdNotFoundDBError, \
InvenioWebJournalNoArticleNumberError, \
InvenioWebJournalNoPopupRecordError, \
InvenioWebJournalIssueNotFoundDBError, \
InvenioWebJournalNoCategoryError
from invenio.webjournal_utils import \
get_current_issue, \
get_recid_from_legacy_number, \
get_journal_categories
from invenio.webjournal_washer import \
wash_category, \
wash_issue_number, \
wash_journal_name, \
wash_journal_language, \
wash_article_number, \
wash_popup_record, \
wash_archive_date
from invenio.webjournal import \
perform_request_index, \
perform_request_article, \
perform_request_contact, \
perform_request_popup, \
perform_request_search
from invenio.webstat import register_customevent
import invenio.template
webjournal_templates = invenio.template.load('webjournal')
class WebInterfaceJournalPages(WebInterfaceDirectory):
"""Defines the set of /journal pages."""
journal_name = None
journal_issue_year = None
journal_issue_number = None
category = None
article_id = None
_exports = ['popup', 'search', 'contact']
def _lookup(self, component, path):
""" This handler is invoked for the dynamic URLs """
if component in ['article', 'issue_control', 'edit_article', 'alert',
'feature_record', 'regenerate', 'administrate'] and \
CFG_CERN_SITE:
return WebInterfaceJournalPagesLegacy(), [component]
return self, []
def __call__(self, req, form):
""" Maybe resolve the final / of a directory """
path = req.uri[1:].split('/')
journal_name = None
journal_issue_year = None
journal_issue_number = None
specific_category = None
category = None
article_id = None
if len(path) > 1:
journal_name = path[1]
if len(path) > 2 and path[2].isdigit():
journal_issue_year = path[2]
elif len(path) > 2 and not path[2].isdigit():
specific_category = urllib.unquote(path[2])
if len(path) > 3 and path[3].isdigit():
journal_issue_number = path[3]
if len(path) > 4:
category = urllib.unquote(path[4])
if len(path) > 5 and path[5].isdigit():
article_id = int(path[5])
## Support for legacy journal/[empty]?(args*) urls. There are
## these parameters only in that case
argd = wash_urlargd(form, {'name': (str, ""),
'issue': (str, ""),
'category': (str, ""),
'ln': (str, CFG_SITE_LANG),
'number': (int, None),
'verbose': (int, 0)}
)
if 'name' in form.keys() or \
'issue' in form.keys() or \
'category' in form.keys():
ln = wash_journal_language(argd['ln'])
try:
journal_name = wash_journal_name(ln, argd['name'])
except InvenioWebJournalNoJournalOnServerError, e:
register_exception(req=req)
return e.user_box(req)
except InvenioWebJournalNoNameError, e:
return e.user_box(req)
try:
issue = wash_issue_number(ln, journal_name,
argd['issue'])
issue_year = issue.split('/')[1]
issue_number = issue.split('/')[0]
except InvenioWebJournalIssueNumberBadlyFormedError, e:
register_exception(req=req)
return e.user_box(req)
except InvenioWebJournalJournalIdNotFoundDBError, e:
register_exception(req=req)
return e.user_box(req)
category = wash_category(ln, argd['category'], journal_name, issue).replace(' ', '%20')
redirect_to_url(req, CFG_SITE_URL + '/journal/%(name)s/%(issue_year)s/%(issue_number)s/%(category)s/?ln=%(ln)s' % \
{'name': journal_name,
'issue_year': issue_year,
'issue_number': issue_number,
'category': category,
'ln': ln})
## End support for legacy urls
# Check that given journal name exists and that it is written
# with correct casing.
redirect_p = False
try:
washed_journal_name = wash_journal_name(argd['ln'], journal_name)
if washed_journal_name != journal_name:
redirect_p = True
except InvenioWebJournalNoNameError, e:
return e.user_box(req)
except InvenioWebJournalNoJournalOnServerError, e:
register_exception(req=req)
return e.user_box(req)
# If some parameters are missing, deduce them and
# redirect
if not journal_issue_year or \
not journal_issue_number or \
not category or \
redirect_p or \
specific_category:
if not journal_issue_year or not journal_issue_number:
journal_issue = get_current_issue(argd['ln'], washed_journal_name)
journal_issue_year = journal_issue.split('/')[1]
journal_issue_number = journal_issue.split('/')[0]
if not category or specific_category:
categories = get_journal_categories(washed_journal_name,
journal_issue_number + \
'/' + journal_issue_year)
if not categories:
# Mmh, it seems that this issue has no
# category. Ok get all of them regardless of the
# issue
categories = get_journal_categories(washed_journal_name)
if not categories:
# Mmh we really have no category!
try:
raise InvenioWebJournalIssueNotFoundDBError(argd['ln'],
journal_name,
'')
except InvenioWebJournalIssueNotFoundDBError, e:
register_exception(req=req)
return e.user_box(req)
if not category:
category = categories[0].replace(' ', '%20')
if specific_category:
category = specific_category.replace(' ', '%20')
redirect_to_url(req, CFG_SITE_URL + '/journal/%(name)s/%(issue_year)s/%(issue_number)s/%(category)s/?ln=%(ln)s' % \
{'name': washed_journal_name,
'issue_year': journal_issue_year,
'issue_number': journal_issue_number,
'category': category,
'ln': argd['ln']})
journal_issue = ""
if journal_issue_year is not None and \
journal_issue_number is not None:
journal_issue = journal_issue_number + '/' + \
journal_issue_year
try:
journal_name = washed_journal_name
issue = wash_issue_number(argd['ln'], journal_name, journal_issue)
category = wash_category(argd['ln'], category, journal_name, issue)
except InvenioWebJournalNoJournalOnServerError, e:
register_exception(req=req)
return e.user_box(req)
except InvenioWebJournalNoNameError, e:
return e.user_box(req)
except InvenioWebJournalIssueNumberBadlyFormedError, e:
register_exception(req=req)
return e.user_box(req)
except InvenioWebJournalNoCategoryError, e:
register_exception(req=req)
return e.user_box(req)
except InvenioWebJournalJournalIdNotFoundDBError, e:
register_exception(req=req)
return e.user_box(req)
editor = False
if acc_authorize_action(getUid(req), 'cfgwebjournal',
name="%s" % journal_name)[0] == 0:
editor = True
if article_id is None:
html = perform_request_index(req,
journal_name,
journal_issue,
argd['ln'],
category,
editor,
verbose=argd['verbose'])
else:
html = perform_request_article(req,
journal_name,
journal_issue,
argd['ln'],
category,
article_id,
editor,
verbose=argd['verbose'])
# register event in webstat
try:
register_customevent("journals", ["display", journal_name, journal_issue, category, argd['ln'], article_id])
except:
register_exception(suffix="Do the webstat tables exists? Try with 'webstatadmin --load-config'")
return html
def contact(self, req, form):
"""
Display contact information for the journal.
"""
argd = wash_urlargd(form, {'name': (str, ""),
'ln': (str, ""),
'verbose': (int, 0)
})
try:
ln = wash_journal_language(argd['ln'])
washed_journal_name = wash_journal_name(ln, argd['name'])
except InvenioWebJournalNoJournalOnServerError, e:
register_exception(req=req)
return e.user_box(req)
except InvenioWebJournalNoNameError, e:
return e.user_box(req)
html = perform_request_contact(req, ln, washed_journal_name,
verbose=argd['verbose'])
return html
def popup(self, req, form):
"""
simple pass-through function that serves as a checker for popups.
"""
argd = wash_urlargd(form, {'name': (str, ""),
'record': (str, ""),
'ln': (str, "")
})
try:
ln = wash_journal_language(argd['ln'])
washed_journal_name = wash_journal_name(ln, argd['name'])
record = wash_popup_record(ln, argd['record'], washed_journal_name)
except InvenioWebJournalNoJournalOnServerError, e:
register_exception(req=req)
return e.user_box(req)
except InvenioWebJournalNoNameError, e:
return e.user_box(req)
except InvenioWebJournalNoPopupRecordError, e:
register_exception(req=req)
return e.user_box(req)
html = perform_request_popup(req, ln, washed_journal_name, record)
return html
def search(self, req, form):
"""
Display search interface
"""
argd = wash_urlargd(form, {'name': (str, ""),
'issue': (str, ""),
'archive_year': (str, ""),
'archive_issue': (str, ""),
'archive_select': (str, "False"),
'archive_date': (str, ""),
'archive_search': (str, "False"),
'ln': (str, CFG_SITE_LANG),
'verbose': (int, 0)})
try:
# FIXME: if journal_name is empty, redirect
ln = wash_journal_language(argd['ln'])
washed_journal_name = wash_journal_name(ln, argd['name'])
archive_issue = wash_issue_number(ln, washed_journal_name,
argd['archive_issue'])
archive_date = wash_archive_date(ln, washed_journal_name,
argd['archive_date'])
archive_select = argd['archive_select']
archive_search = argd['archive_search']
except InvenioWebJournalNoJournalOnServerError, e:
register_exception(req=req)
return e.user_box(req)
except InvenioWebJournalNoNameError, e:
return e.user_box(req)
except InvenioWebJournalNoCurrentIssueError, e:
register_exception(req=req)
return e.user_box(req)
except InvenioWebJournalIssueNumberBadlyFormedError, e:
register_exception(req=req)
return e.user_box(req)
except InvenioWebJournalArchiveDateWronglyFormedError, e:
register_exception(req=req)
return e.user_box(req)
except InvenioWebJournalJournalIdNotFoundDBError, e:
register_exception(req=req)
return e.user_box(req)
html = perform_request_search(req=req,
journal_name=washed_journal_name,
ln=ln,
archive_issue=archive_issue,
archive_select=archive_select,
archive_date=archive_date,
archive_search=archive_search,
verbose=argd['verbose'])
return html
index = __call__
class WebInterfaceJournalPagesLegacy(WebInterfaceDirectory):
"""Defines the set of /journal pages."""
_exports = ['', 'article', 'issue_control', 'edit_article', 'alert',
'feature_record', 'regenerate', 'administrate']
def index(self, req, form):
"""
Index page.
Washes all the parameters and stores them in journal_defaults dict
for subsequent format_elements.
Passes on to logic function and eventually returns HTML.
"""
argd = wash_urlargd(form, {'name': (str, ""),
'issue': (str, ""),
'category': (str, ""),
'ln': (str, "")}
)
try:
ln = wash_journal_language(argd['ln'])
journal_name = wash_journal_name(ln, argd['name'])
issue_number = wash_issue_number(ln, journal_name,
argd['issue'])
category = wash_category(ln, argd['category'], journal_name, issue_number)
except InvenioWebJournalNoJournalOnServerError, e:
register_exception(req=req)
return e.user_box(req)
except InvenioWebJournalNoNameError, e:
return e.user_box(req)
except InvenioWebJournalNoCurrentIssueError, e:
register_exception(req=req)
return e.user_box(req)
except InvenioWebJournalIssueNumberBadlyFormedError, e:
register_exception(req=req)
return e.user_box(req)
except InvenioWebJournalNoCategoryError, e:
register_exception(req=req)
return e.user_box(req)
except InvenioWebJournalJournalIdNotFoundDBError, e:
register_exception(req=req)
return e.user_box(req)
# the journal_defaults will be used by format elements that have no
# direct access to the params here, no more checking needed
req.journal_defaults = {"name": journal_name,
"issue": issue_number,
"ln": ln,
"category": category}
html = perform_request_index(req, journal_name, issue_number, ln,
category)
return html
def article(self, req, form):
"""
Article page.
Washes all the parameters and stores them in journal_defaults dict
for subsequent format_elements.
Passes on to logic function and eventually returns HTML.
"""
argd = wash_urlargd(form, {'name': (str, ""),
'issue': (str, ""),
'category': (str, ""),
'number': (str, ""),
'ln': (str, ""),
}
)
try:
ln = wash_journal_language(argd['ln'])
journal_name = wash_journal_name(ln, argd['name'])
issue = wash_issue_number(ln, journal_name,
argd['issue'])
issue_year = issue.split('/')[1]
issue_number = issue.split('/')[0]
category = wash_category(ln, argd['category'], journal_name, issue_number)
number = wash_article_number(ln, argd['number'], journal_name)
recid = get_recid_from_legacy_number(issue, category, int(number))
except InvenioWebJournalNoJournalOnServerError, e:
register_exception(req=req)
return e.user_box(req)
except InvenioWebJournalNoNameError, e:
return e.user_box(req)
except InvenioWebJournalNoCurrentIssueError, e:
register_exception(req=req)
return e.user_box(req)
except InvenioWebJournalIssueNumberBadlyFormedError, e:
register_exception(req=req)
return e.user_box(req)
except InvenioWebJournalNoArticleNumberError, e:
register_exception(req=req)
return e.user_box(req)
except InvenioWebJournalNoCategoryError, e:
register_exception(req=req)
return e.user_box(req)
except InvenioWebJournalJournalIdNotFoundDBError, e:
register_exception(req=req)
return e.user_box(req)
if recid != -1:
# Found a corresponding record
redirect_to_url(req, CFG_SITE_URL + \
'/journal/' + journal_name + '/' + issue_year + \
'/' + issue_number + '/' + category + \
'/' + str(recid) + '?ln=' + ln)
else:
# Corresponding record not found. Display index
redirect_to_url(req, CFG_SITE_URL + \
'/journal/' + journal_name + '/' + issue_year + \
'/' + issue_number + '/' + category + \
'?ln=' + ln)
def administrate(self, req, form):
"""Index page."""
argd = wash_urlargd(form, {'name': (str, ""),
'ln': (str, "")
})
try:
ln = wash_journal_language(argd['ln'])
journal_name = wash_journal_name(ln, argd['name'])
except InvenioWebJournalNoJournalOnServerError, e:
register_exception(req=req)
return e.user_box(req)
except InvenioWebJournalNoNameError, e:
return e.user_box(req)
redirect_to_url(req, CFG_SITE_SECURE_URL + \
'/admin/webjournal/webjournaladmin.py/administrate?journal_name=' + \
journal_name + '&ln=' + ln)
def feature_record(self, req, form):
"""
Interface to feature a record. Will be saved in a flat file.
"""
argd = wash_urlargd(form, {'name': (str, ""),
'recid': (str, "init"),
'url': (str, "init"),
'ln': (str, "")})
redirect_to_url(req, CFG_SITE_SECURE_URL + \
'/admin/webjournal/webjournaladmin.py/feature_record?journal_name=' + \
argd['name'] + '&ln=' + argd['ln'] + '&recid='+ argd['recid'] + '&url='+ argd['url'])
def regenerate(self, req, form):
"""
Clears the cache for the issue given.
"""
argd = wash_urlargd(form, {'name': (str, ""),
'issue': (str, ""),
'ln': (str, "")})
redirect_to_url(req, CFG_SITE_SECURE_URL + \
'/admin/webjournal/webjournaladmin.py/regenerate?journal_name=' + \
argd['name'] + '&ln=' + argd['ln'] + '&issue=' + argd['issue'])
def alert(self, req, form):
"""
Alert system.
Sends an email alert, in HTML/PlainText or only PlainText to a mailing
list to alert for new journal releases.
"""
argd = wash_urlargd(form, {'name': (str, ""),
'sent': (str, "False"),
'plainText': (str, ''),
'htmlMail': (str, ""),
'recipients': (str, ""),
'subject': (str, ""),
'ln': (str, ""),
'issue': (str, ""),
'force': (str, "False")})
redirect_to_url(req, CFG_SITE_SECURE_URL + \
'/admin/webjournal/webjournaladmin.py/alert?journal_name=' + \
argd['name'] + '&ln=' + argd['ln'] + '&issue=' + argd['issue'] + \
'&sent=' + argd['sent'] + '&plainText=' + argd['plainText'] + \
'&htmlMail=' + argd['htmlMail'] + '&recipients=' + argd['recipients'] + \
'&force=' + argd['force'] + '&subject=' + argd['subject'])
def issue_control(self, req, form):
"""
page that allows full control over creating, backtracing, adding to,
removing from issues.
"""
argd = wash_urlargd(form, {'name': (str, ""),
'add': (str, ""),
'action_publish': (str, "cfg"),
'issue_number': (list, []),
'ln': (str, "")})
redirect_to_url(req, CFG_SITE_SECURE_URL + \
'/admin/webjournal/webjournaladmin.py/issue_control?journal_name=' + \
argd['name'] + '&ln=' + argd['ln'] + '&issue=' + argd['issue_number'] + \
'&action=' + argd['action_publish'])
diff --git a/invenio/legacy/webmessage/api.py b/invenio/legacy/webmessage/api.py
index a75cb74ec..af15da2d1 100644
--- a/invenio/legacy/webmessage/api.py
+++ b/invenio/legacy/webmessage/api.py
@@ -1,497 +1,497 @@
# -*- coding: utf-8 -*-
##
## This file is part of Invenio.
## Copyright (C) 2005, 2006, 2007, 2008, 2009, 2010, 2011 CERN.
##
## Invenio is free software; you can redistribute it and/or
## modify it under the terms of the GNU General Public License as
## published by the Free Software Foundation; either version 2 of the
## License, or (at your option) any later version.
##
## Invenio is distributed in the hope that it will be useful, but
## WITHOUT ANY WARRANTY; without even the implied warranty of
## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
## General Public License for more details.
##
## You should have received a copy of the GNU General Public License
## along with Invenio; if not, write to the Free Software Foundation, Inc.,
## 59 Temple Place, Suite 330, Boston, MA 02111-1307, USA.
""" WebMessage module, messaging system"""
__revision__ = "$Id$"
import invenio.webmessage_dblayer as db
from invenio.modules.messages.config import \
CFG_WEBMESSAGE_STATUS_CODE, \
CFG_WEBMESSAGE_RESULTS_FIELD, \
CFG_WEBMESSAGE_SEPARATOR, \
CFG_WEBMESSAGE_ROLES_WITHOUT_QUOTA, \
CFG_WEBMESSAGE_MAX_SIZE_OF_MESSAGE
from invenio.modules.messages.errors import InvenioWebMessageError
from invenio.config import CFG_SITE_LANG
from invenio.base.i18n import gettext_set_language
from invenio.utils.date import datetext_default, get_datetext
from invenio.htmlutils import escape_html
from invenio.webuser import collect_user_info, list_users_in_roles
from invenio.access_control_admin import acc_get_role_id, acc_is_user_in_role
try:
import invenio.template
webmessage_templates = invenio.template.load('webmessage')
except:
pass
-from invenio.errorlib import register_exception
+from invenio.ext.logging import register_exception
def perform_request_display_msg(uid, msgid, ln=CFG_SITE_LANG):
"""
Displays a specific message
@param uid: user id
@param msgid: message id
@return: body
"""
_ = gettext_set_language(ln)
body = ""
if (db.check_user_owns_message(uid, msgid) == 0):
# The user doesn't own this message
try:
raise InvenioWebMessageError(_('Sorry, this message in not in your mailbox.'))
except InvenioWebMessageError, exc:
register_exception()
body = webmessage_templates.tmpl_error(exc.message, ln)
return body
else:
(msg_id,
msg_from_id, msg_from_nickname,
msg_sent_to, msg_sent_to_group,
msg_subject, msg_body,
msg_sent_date, msg_received_date,
msg_status) = db.get_message(uid, msgid)
if (msg_id == ""):
# The message exists in table user_msgMESSAGE
# but not in table msgMESSAGE => table inconsistency
try:
raise InvenioWebMessageError(_('This message does not exist.'))
except InvenioWebMessageError, exc:
register_exception()
body = webmessage_templates.tmpl_error(exc.message, ln)
return body
else:
if (msg_status == CFG_WEBMESSAGE_STATUS_CODE['NEW']):
db.set_message_status(uid, msgid,
CFG_WEBMESSAGE_STATUS_CODE['READ'])
body = webmessage_templates.tmpl_display_msg(
msg_id,
msg_from_id,
msg_from_nickname,
msg_sent_to,
msg_sent_to_group,
msg_subject,
msg_body,
msg_sent_date,
msg_received_date,
ln)
return body
def perform_request_display(uid, warnings=[], infos=[], ln=CFG_SITE_LANG):
"""
Displays the user's Inbox
@param uid: user id
@return: body with warnings
"""
body = ""
rows = []
rows = db.get_all_messages_for_user(uid)
nb_messages = db.count_nb_messages(uid)
no_quota_users = list_users_in_roles(CFG_WEBMESSAGE_ROLES_WITHOUT_QUOTA)
no_quota = False
if uid in no_quota_users:
no_quota = True
body = webmessage_templates.tmpl_display_inbox(messages=rows,
infos=infos,
warnings=warnings,
nb_messages=nb_messages,
no_quota=no_quota,
ln=ln)
return body
def perform_request_delete_msg(uid, msgid, ln=CFG_SITE_LANG):
"""
Delete a given message from user inbox
@param uid: user id (int)
@param msgid: message id (int)
@param ln: language
@return: body with warnings
"""
_ = gettext_set_language(ln)
warnings = []
infos = []
body = ""
if (db.check_user_owns_message(uid, msgid) == 0):
# The user doesn't own this message
try:
raise InvenioWebMessageError(_('Sorry, this message in not in your mailbox.'))
except InvenioWebMessageError, exc:
register_exception()
body = webmessage_templates.tmpl_error(exc.message, ln)
return body
else:
if (db.delete_message_from_user_inbox(uid, msgid) == 0):
warnings.append(_("The message could not be deleted."))
else:
infos.append(_("The message was successfully deleted."))
return perform_request_display(uid, warnings, infos, ln)
def perform_request_delete_all(uid, confirmed=False, ln=CFG_SITE_LANG):
"""
Delete every message for a given user
@param uid: user id (int)
@param confirmed: 0 will produce a confirmation message
@param ln: language
@return: body with warnings
"""
infos = []
warnings = []
_ = gettext_set_language(ln)
if confirmed:
db.delete_all_messages(uid)
infos = [_("Your mailbox has been emptied.")]
return perform_request_display(uid, warnings, infos, ln)
else:
body = webmessage_templates.tmpl_confirm_delete(ln)
return body
def perform_request_write(uid,
msg_reply_id="",
msg_to="",
msg_to_group="",
msg_subject="",
msg_body="",
ln=CFG_SITE_LANG):
"""
Display a write a message page.
@param uid: user id.
@type uid: int
@param msg_reply_id: if this message is a reply to another, other's ID.
@type msg_reply_id: int
@param msg_to: comma separated usernames.
@type msg_to: string
@param msg_to_group: comma separated groupnames.
@type msg_to_group: string
@param msg_subject: message subject.
@type msg_subject: string
@param msg_body: message body.
@type msg_body: string
@param ln: language.
@type ln: string
@return: body with warnings.
"""
warnings = []
body = ""
_ = gettext_set_language(ln)
msg_from_nickname = ""
msg_id = 0
if (msg_reply_id):
if (db.check_user_owns_message(uid, msg_reply_id) == 0):
# The user doesn't own this message
try:
raise InvenioWebMessageError(_('Sorry, this message in not in your mailbox.'))
except InvenioWebMessageError, exc:
register_exception()
body = webmessage_templates.tmpl_error(exc.message, ln)
return body
else:
# dummy == variable name to make pylint and pychecker happy!
(msg_id,
msg_from_id, msg_from_nickname,
dummy, dummy,
msg_subject, msg_body,
dummy, dummy, dummy) = db.get_message(uid, msg_reply_id)
if (msg_id == ""):
# The message exists in table user_msgMESSAGE
# but not in table msgMESSAGE => table inconsistency
try:
raise InvenioWebMessageError(_('This message does not exist.'))
except InvenioWebMessageError, exc:
register_exception()
body = webmessage_templates.tmpl_error(exc.message, ln)
return body
else:
msg_to = msg_from_nickname or str(msg_from_id)
body = webmessage_templates.tmpl_write(msg_to=msg_to,
msg_to_group=msg_to_group,
msg_id=msg_id,
msg_subject=msg_subject,
msg_body=msg_body,
warnings=[],
ln=ln)
return body
def perform_request_write_with_search(
uid,
msg_to_user="",
msg_to_group="",
msg_subject="",
msg_body="",
msg_send_year=0,
msg_send_month=0,
msg_send_day=0,
names_selected=[],
search_pattern="",
results_field=CFG_WEBMESSAGE_RESULTS_FIELD['NONE'],
add_values=0,
ln=CFG_SITE_LANG):
"""
Display a write message page, with prefilled values
@param msg_to_user: comma separated usernames (str)
@param msg_to_group: comma separated groupnames (str)
@param msg_subject: message subject (str)
@param msg_bidy: message body (string)
@param msg_send_year: year to send this message on (int)
@param_msg_send_month: month to send this message on (int)
@param_msg_send_day: day to send this message on (int)
@param users_to_add: list of usernames ['str'] to add to msg_to_user
@param groups_to_add: list of groupnames ['str'] to add to msg_to_group
@param user_search_pattern: will search users with this pattern (str)
@param group_search_pattern: will search groups with this pattern (str)
@param mode_user: if 1 display user search box, else group search box
@param add_values: if 1 users_to_add will be added to msg_to_user field..
@param ln: language
@return: body with warnings
"""
warnings = []
search_results_list = []
def cat_names(name1, name2):
""" name1, name2 => 'name1, name2' """
return name1 + CFG_WEBMESSAGE_SEPARATOR + " " + name2
if results_field == CFG_WEBMESSAGE_RESULTS_FIELD['USER']:
if add_values and len(names_selected):
usernames_to_add = reduce(cat_names, names_selected)
if msg_to_user:
msg_to_user = cat_names(msg_to_user, usernames_to_add)
else:
msg_to_user = usernames_to_add
users_found = db.get_nicknames_like(search_pattern)
if users_found:
for user_name in users_found:
search_results_list.append((user_name[0],
user_name[0] in names_selected))
elif results_field == CFG_WEBMESSAGE_RESULTS_FIELD['GROUP']:
if add_values and len(names_selected):
groupnames_to_add = reduce(cat_names, names_selected)
if msg_to_group:
msg_to_group = cat_names(msg_to_group, groupnames_to_add)
else:
msg_to_group = groupnames_to_add
groups_dict = db.get_groupnames_like(uid, search_pattern)
groups_found = groups_dict.values()
if groups_found:
for group_name in groups_found:
search_results_list.append((group_name,
group_name in names_selected))
body = webmessage_templates.tmpl_write(
msg_to=msg_to_user,
msg_to_group=msg_to_group,
msg_subject=msg_subject,
msg_body=msg_body,
msg_send_year=msg_send_year,
msg_send_month=msg_send_month,
msg_send_day=msg_send_day,
warnings=warnings,
search_results_list=search_results_list,
search_pattern=search_pattern,
results_field=results_field,
ln=ln)
return body
def perform_request_send(uid,
msg_to_user="",
msg_to_group="",
msg_subject="",
msg_body="",
msg_send_year=0,
msg_send_month=0,
msg_send_day=0,
ln=CFG_SITE_LANG,
use_email_address = 0):
"""
send a message. if unable return warnings to write page
@param uid: id of user from (int)
@param msg_to_user: comma separated usernames (recipients) (str)
@param msg_to_group: comma separated groupnames (recipeints) (str)
@param msg_subject: subject of message (str)
@param msg_body: body of message (str)
@param msg_send_year: send this message on year x (int)
@param msg_send_month: send this message on month y (int)
@param msg_send_day: send this message on day z (int)
@param ln: language
@return: (body with warnings, title, navtrail)
"""
_ = gettext_set_language(ln)
def strip_spaces(text):
"""suppress spaces before and after x (str)"""
return text.strip()
# wash user input
users_to = map(strip_spaces, msg_to_user.split(CFG_WEBMESSAGE_SEPARATOR))
groups_to = map(strip_spaces, msg_to_group.split(CFG_WEBMESSAGE_SEPARATOR))
if users_to == ['']:
users_to = []
if groups_to == ['']:
groups_to = []
warnings = []
infos = []
problem = None
users_to_str = CFG_WEBMESSAGE_SEPARATOR.join(users_to)
groups_to_str = CFG_WEBMESSAGE_SEPARATOR.join(groups_to)
send_on_date = get_datetext(msg_send_year, msg_send_month, msg_send_day)
if (msg_send_year == msg_send_month == msg_send_day == 0):
status = CFG_WEBMESSAGE_STATUS_CODE['NEW']
else:
status = CFG_WEBMESSAGE_STATUS_CODE['REMINDER']
if send_on_date == datetext_default:
warning = \
_("The chosen date (%(x_year)i/%(x_month)i/%(x_day)i) is invalid.")
warning = warning % {'x_year': msg_send_year,
'x_month': msg_send_month,
'x_day': msg_send_day}
warnings.append(warning)
problem = True
if not(users_to_str or groups_to_str):
# <=> not(users_to_str) AND not(groups_to_str)
warnings.append(_("Please enter a user name or a group name."))
problem = True
if len(msg_body) > CFG_WEBMESSAGE_MAX_SIZE_OF_MESSAGE:
warnings.append(_("Your message is too long, please edit it. Maximum size allowed is %i characters.") % \
(CFG_WEBMESSAGE_MAX_SIZE_OF_MESSAGE,))
problem = True
if use_email_address == 0:
users_dict = db.get_uids_from_nicks(users_to)
users_to = users_dict.items() # users_to=[(nick, uid),(nick2, uid2)]
elif use_email_address == 1:
users_dict = db.get_uids_from_emails(users_to)
users_to = users_dict.items() # users_to=[(email, uid),(email2, uid2)]
groups_dict = db.get_gids_from_groupnames(groups_to)
groups_to = groups_dict.items()
gids_to = []
for (group_name, group_id) in groups_to:
if not(group_id):
warnings.append(_("Group %s does not exist.") % \
(escape_html(group_name)))
problem = 1
else:
gids_to.append(group_id)
# Get uids from gids
uids_from_group = db.get_uids_members_of_groups(gids_to)
# Add the original uids, and make sure there is no double values.
tmp_dict = {}
for uid_receiver in uids_from_group:
tmp_dict[uid_receiver] = None
for (user_nick, user_id) in users_to:
if user_id:
if user_id not in tmp_dict:
uids_from_group.append(user_id)
tmp_dict[user_id] = None
else:
if type(user_nick) == int or \
type(user_nick) == str and user_nick.isdigit():
user_nick = int(user_nick)
if db.user_exists(user_nick) and user_nick not in tmp_dict:
uids_from_group.append(user_nick)
tmp_dict[user_nick] = None
else:
warnings.append(_("User %s does not exist.")% \
(escape_html(user_nick)))
problem = True
if problem:
body = webmessage_templates.tmpl_write(msg_to=users_to_str,
msg_to_group=groups_to_str,
msg_subject=msg_subject,
msg_body=msg_body,
msg_send_year=msg_send_year,
msg_send_month=msg_send_month,
msg_send_day=msg_send_day,
warnings=warnings,
ln=ln)
title = _("Write a message")
navtrail = get_navtrail(ln, title)
return (body, title, navtrail)
else:
msg_id = db.create_message(uid,
users_to_str, groups_to_str,
msg_subject, msg_body,
send_on_date)
uid_problem = db.send_message(uids_from_group, msg_id, status)
if len(uid_problem) > 0:
usernames_problem_dict = db.get_nicks_from_uids(uid_problem)
usernames_problem = usernames_problem_dict.values()
def listing(name1, name2):
""" name1, name2 => 'name1, name2' """
return str(name1) + ", " + str(name2)
warning = _("Your message could not be sent to the following recipients due to their quota:") + " "
warnings.append(warning + reduce(listing, usernames_problem))
if len(uids_from_group) != len(uid_problem):
infos.append(_("Your message has been sent."))
else:
db.check_if_need_to_delete_message_permanently([msg_id])
body = perform_request_display(uid, warnings,
infos, ln)
title = _("Your Messages")
return (body, title, get_navtrail(ln))
def account_new_mail(uid, ln=CFG_SITE_LANG):
"""
display new mail info for myaccount.py page.
@param uid: user id (int)
@param ln: language
@return: html body
"""
nb_new_mail = db.get_nb_new_messages_for_user(uid)
total_mail = db.get_nb_readable_messages_for_user(uid)
return webmessage_templates.tmpl_account_new_mail(nb_new_mail,
total_mail, ln)
def get_navtrail(ln=CFG_SITE_LANG, title=""):
"""
gets the navtrail for title...
@param title: title of the page
@param ln: language
@return: HTML output
"""
navtrail = webmessage_templates.tmpl_navtrail(ln, title)
return navtrail
def is_no_quota_user(uid):
"""
Return True if the user belongs to any of the no_quota roles.
"""
no_quota_role_ids = [acc_get_role_id(role) for role in CFG_WEBMESSAGE_ROLES_WITHOUT_QUOTA]
res = {}
user_info = collect_user_info(uid)
for role_id in no_quota_role_ids:
if acc_is_user_in_role(user_info, role_id):
return True
return False
diff --git a/invenio/legacy/websearch/adminlib.py b/invenio/legacy/websearch/adminlib.py
index a43c3f9ba..8315233e0 100644
--- a/invenio/legacy/websearch/adminlib.py
+++ b/invenio/legacy/websearch/adminlib.py
@@ -1,3535 +1,3535 @@
## This file is part of Invenio.
## Copyright (C) 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011, 2012 CERN.
##
## Invenio is free software; you can redistribute it and/or
## modify it under the terms of the GNU General Public License as
## published by the Free Software Foundation; either version 2 of the
## License, or (at your option) any later version.
##
## Invenio is distributed in the hope that it will be useful, but
## WITHOUT ANY WARRANTY; without even the implied warranty of
## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
## General Public License for more details.
##
## You should have received a copy of the GNU General Public License
## along with Invenio; if not, write to the Free Software Foundation, Inc.,
## 59 Temple Place, Suite 330, Boston, MA 02111-1307, USA.
# pylint: disable=C0301
"""Invenio WebSearch Administrator Interface."""
__revision__ = "$Id$"
import cgi
import random
import time
import sys
from invenio.utils.date import strftime
if sys.hexversion < 0x2040000:
# pylint: disable=W0622
from sets import Set as set
# pylint: enable=W0622
from invenio.config import \
CFG_CACHEDIR, \
CFG_SITE_LANG, \
CFG_SITE_NAME, \
CFG_SITE_URL,\
CFG_WEBCOMMENT_ALLOW_COMMENTS, \
CFG_WEBSEARCH_SHOW_COMMENT_COUNT, \
CFG_WEBCOMMENT_ALLOW_REVIEWS, \
CFG_WEBSEARCH_SHOW_REVIEW_COUNT, \
CFG_BIBRANK_SHOW_CITATION_LINKS, \
CFG_INSPIRE_SITE, \
CFG_CERN_SITE
from invenio.bibrankadminlib import \
write_outcome, \
modify_translations, \
get_def_name, \
get_name, \
get_languages, \
addadminbox, \
tupletotable, \
createhiddenform
from invenio.dbquery import \
run_sql, \
get_table_update_time
from invenio.websearch_external_collections import \
external_collections_dictionary, \
external_collection_sort_engine_by_name, \
external_collection_get_state, \
external_collection_get_update_state_list, \
external_collection_apply_changes
from invenio.websearch_external_collections_utils import \
get_collection_descendants
from invenio.websearch_external_collections_config import CFG_EXTERNAL_COLLECTION_STATES_NAME
#from invenio.modules.formatter.format_elements import bfe_references
#from invenio.modules.formatter.engine import BibFormatObject
from invenio.bibdocfile import BibRecDocs
from invenio.base.i18n import gettext_set_language
#from invenio.bibrank_citation_searcher import get_cited_by
from invenio.access_control_admin import acc_get_action_id
from invenio.access_control_config import VIEWRESTRCOLL
-from invenio.errorlib import register_exception
+from invenio.ext.logging import register_exception
from invenio.intbitset import intbitset
from invenio.bibrank_citation_searcher import get_cited_by_count
from invenio.legacy.bibrecord import record_get_field_instances
def getnavtrail(previous = ''):
"""Get the navtrail"""
navtrail = """<a class="navtrail" href="%s/help/admin">Admin Area</a> """ % (CFG_SITE_URL,)
navtrail = navtrail + previous
return navtrail
def fix_collection_scores():
"""
Re-calculate and re-normalize de scores of the collection relationship.
"""
for id_dad in intbitset(run_sql("SELECT id_dad FROM collection_collection")):
for index, id_son in enumerate(run_sql("SELECT id_son FROM collection_collection WHERE id_dad=%s ORDER BY score DESC", (id_dad, ))):
run_sql("UPDATE collection_collection SET score=%s WHERE id_dad=%s AND id_son=%s", (index * 10 + 10, id_dad, id_son[0]))
def perform_modifytranslations(colID, ln, sel_type='', trans=[], confirm=-1, callback='yes'):
"""Modify the translations of a collection
sel_type - the nametype to modify
trans - the translations in the same order as the languages from get_languages()"""
output = ''
subtitle = ''
sitelangs = get_languages()
if type(trans) is str:
trans = [trans]
if confirm in ["2", 2] and colID:
finresult = modify_translations(colID, sitelangs, sel_type, trans, "collection")
col_dict = dict(get_def_name('', "collection"))
if colID and col_dict.has_key(int(colID)):
colID = int(colID)
subtitle = """<a name="3">3. Modify translations for collection '%s'</a>&nbsp;&nbsp;&nbsp;<small>[<a href="%s/help/admin/websearch-admin-guide#3.3">?</a>]</small>""" % (col_dict[colID], CFG_SITE_URL)
if sel_type == '':
sel_type = get_col_nametypes()[0][0]
header = ['Language', 'Translation']
actions = []
types = get_col_nametypes()
if len(types) > 1:
text = """
<span class="adminlabel">Name type</span>
<select name="sel_type" class="admin_w200">
"""
for (key, value) in types:
text += """<option value="%s" %s>%s""" % (key, key == sel_type and 'selected="selected"' or '', value)
trans_names = get_name(colID, ln, key, "collection")
if trans_names and trans_names[0][0]:
text += ": %s" % trans_names[0][0]
text += "</option>"
text += """</select>"""
output += createhiddenform(action="modifytranslations#3",
text=text,
button="Select",
colID=colID,
ln=ln,
confirm=0)
if confirm in [-1, "-1", 0, "0"]:
trans = []
for (key, value) in sitelangs:
try:
trans_names = get_name(colID, key, sel_type, "collection")
trans.append(trans_names[0][0])
except StandardError, e:
trans.append('')
for nr in range(0, len(sitelangs)):
actions.append(["%s" % (sitelangs[nr][1],)])
actions[-1].append('<input type="text" name="trans" size="30" value="%s"/>' % trans[nr])
text = tupletotable(header=header, tuple=actions)
output += createhiddenform(action="modifytranslations#3",
text=text,
button="Modify",
colID=colID,
sel_type=sel_type,
ln=ln,
confirm=2)
if sel_type and len(trans) and confirm in ["2", 2]:
output += write_outcome(finresult)
body = [output]
if callback:
return perform_editcollection(colID, ln, "perform_modifytranslations", addadminbox(subtitle, body))
else:
return addadminbox(subtitle, body)
def perform_modifyrankmethods(colID, ln, func='', rnkID='', confirm=0, callback='yes'):
"""Modify which rank methods is visible to the collection
func - remove or add rank method
rnkID - the id of the rank method."""
output = ""
subtitle = ""
col_dict = dict(get_def_name('', "collection"))
rnk_dict = dict(get_def_name('', "rnkMETHOD"))
if colID and col_dict.has_key(int(colID)):
colID = int(colID)
if func in ["0", 0] and confirm in ["1", 1]:
finresult = attach_rnk_col(colID, rnkID)
elif func in ["1", 1] and confirm in ["1", 1]:
finresult = detach_rnk_col(colID, rnkID)
subtitle = """<a name="9">9. Modify rank options for collection '%s'</a>&nbsp;&nbsp;&nbsp;<small>[<a title="See guide" href="%s/help/admin/websearch-admin-guide#3.9">?</a>]</small>""" % (col_dict[colID], CFG_SITE_URL)
output = """
<dl>
<dt>The rank methods enabled for the collection '%s' is:</dt>
""" % col_dict[colID]
rnkmethods = get_col_rnk(colID, ln)
output += """<dd>"""
if not rnkmethods:
output += """No rank methods"""
else:
for id, name in rnkmethods:
output += """%s, """ % name
output += """</dd>
</dl>
"""
rnk_list = get_def_name('', "rnkMETHOD")
rnk_dict_in_col = dict(get_col_rnk(colID, ln))
rnk_list = filter(lambda x: not rnk_dict_in_col.has_key(x[0]), rnk_list)
if rnk_list:
text = """
<span class="adminlabel">Enable:</span>
<select name="rnkID" class="admin_w200">
<option value="-1">- select rank method -</option>
"""
for (id, name) in rnk_list:
text += """<option value="%s" %s>%s</option>""" % (id, (func in ["0", 0] and confirm in ["0", 0] and int(rnkID) == int(id)) and 'selected="selected"' or '' , name)
text += """</select>"""
output += createhiddenform(action="modifyrankmethods#9",
text=text,
button="Enable",
colID=colID,
ln=ln,
func=0,
confirm=1)
if confirm in ["1", 1] and func in ["0", 0] and int(rnkID) != -1:
output += write_outcome(finresult)
elif confirm not in ["0", 0] and func in ["0", 0]:
output += """<b><span class="info">Please select a rank method.</span></b>"""
coll_list = get_col_rnk(colID, ln)
if coll_list:
text = """
<span class="adminlabel">Disable:</span>
<select name="rnkID" class="admin_w200">
<option value="-1">- select rank method-</option>
"""
for (id, name) in coll_list:
text += """<option value="%s" %s>%s</option>""" % (id, (func in ["1", 1] and confirm in ["0", 0] and int(rnkID) == int(id)) and 'selected="selected"' or '' , name)
text += """</select>"""
output += createhiddenform(action="modifyrankmethods#9",
text=text,
button="Disable",
colID=colID,
ln=ln,
func=1,
confirm=1)
if confirm in ["1", 1] and func in ["1", 1] and int(rnkID) != -1:
output += write_outcome(finresult)
elif confirm not in ["0", 0] and func in ["1", 1]:
output += """<b><span class="info">Please select a rank method.</span></b>"""
body = [output]
if callback:
return perform_editcollection(colID, ln, "perform_modifyrankmethods", addadminbox(subtitle, body))
else:
return addadminbox(subtitle, body)
def perform_addcollectiontotree(colID, ln, add_dad='', add_son='', rtype='', mtype='', callback='yes', confirm=-1):
"""Form to add a collection to the tree.
add_dad - the dad to add the collection to
add_son - the collection to add
rtype - add it as a regular or virtual
mtype - add it to the regular or virtual tree."""
output = ""
output2 = ""
subtitle = """Attach collection to tree&nbsp;&nbsp;&nbsp;<small>[<a title="See guide" href="%s/help/admin/websearch-admin-guide#2.2">?</a>]</small>""" % (CFG_SITE_URL)
col_dict = dict(get_def_name('', "collection"))
if confirm not in [-1, "-1"] and not (add_son and add_dad and rtype):
output2 += """<b><span class="info">All fields must be filled.</span></b><br /><br />
"""
elif add_son and add_dad and rtype:
add_son = int(add_son)
add_dad = int(add_dad)
if confirm not in [-1, "-1"]:
if add_son == add_dad:
output2 += """<b><span class="info">Cannot add a collection as a pointer to itself.</span></b><br /><br />
"""
elif check_col(add_dad, add_son):
res = add_col_dad_son(add_dad, add_son, rtype)
output2 += write_outcome(res)
if res[0] == 1:
output2 += """<b><span class="info"><br /> The collection will appear on your website after the next webcoll run. You can either run it manually or wait until bibsched does it for you.</span></b><br /><br />
"""
else:
output2 += """<b><span class="info">Cannot add the collection '%s' as a %s subcollection of '%s' since it will either create a loop, or the association already exists.</span></b><br /><br />
""" % (col_dict[add_son], (rtype=="r" and 'regular' or 'virtual'), col_dict[add_dad])
add_son = ''
add_dad = ''
rtype = ''
tree = get_col_tree(colID)
col_list = col_dict.items()
col_list.sort(compare_on_val)
output = show_coll_not_in_tree(colID, ln, col_dict)
text = """
<span class="adminlabel">Attach collection:</span>
<select name="add_son" class="admin_w200">
<option value="">- select collection -</option>
"""
for (id, name) in col_list:
if id != colID:
text += """<option value="%s" %s>%s</option>""" % (id, str(id)==str(add_son) and 'selected="selected"' or '', name)
text += """
</select><br />
<span class="adminlabel">to parent collection:</span>
<select name="add_dad" class="admin_w200">
<option value="">- select parent collection -</option>
"""
for (id, name) in col_list:
text += """<option value="%s" %s>%s</option>
""" % (id, str(id)==add_dad and 'selected="selected"' or '', name)
text += """</select><br />
"""
text += """
<span class="adminlabel">with relationship:</span>
<select name="rtype" class="admin_w200">
<option value="">- select relationship -</option>
<option value="r" %s>Regular (Narrow by...)</option>
<option value="v" %s>Virtual (Focus on...)</option>
</select>
""" % ((rtype=="r" and 'selected="selected"' or ''), (rtype=="v" and 'selected="selected"' or ''))
output += createhiddenform(action="%s/admin/websearch/websearchadmin.py/addcollectiontotree" % CFG_SITE_URL,
text=text,
button="Add",
colID=colID,
ln=ln,
confirm=1)
output += output2
#output += perform_showtree(colID, ln)
body = [output]
if callback:
return perform_index(colID, ln, mtype="perform_addcollectiontotree", content=addadminbox(subtitle, body))
else:
return addadminbox(subtitle, body)
def perform_addcollection(colID, ln, colNAME='', dbquery='', callback="yes", confirm=-1):
"""form to add a new collection.
colNAME - the name of the new collection
dbquery - the dbquery of the new collection"""
output = ""
subtitle = """Create new collection&nbsp;&nbsp;&nbsp;<small>[<a title="See guide" href="%s/help/admin/websearch-admin-guide#2.1">?</a>]</small>""" % (CFG_SITE_URL)
text = """
<span class="adminlabel">Default name</span>
<input class="admin_w200" type="text" name="colNAME" value="%s" /><br />
""" % colNAME
output = createhiddenform(action="%s/admin/websearch/websearchadmin.py/addcollection" % CFG_SITE_URL,
text=text,
colID=colID,
ln=ln,
button="Add collection",
confirm=1)
if colNAME and confirm in ["1", 1]:
res = add_col(colNAME, '')
output += write_outcome(res)
if res[0] == 1:
output += perform_addcollectiontotree(colID=colID, ln=ln, add_son=res[1], callback='')
elif confirm not in ["-1", -1]:
output += """<b><span class="info">Please give the collection a name.</span></b>"""
body = [output]
if callback:
return perform_index(colID, ln=ln, mtype="perform_addcollection", content=addadminbox(subtitle, body))
else:
return addadminbox(subtitle, body)
def perform_modifydbquery(colID, ln, dbquery='', callback='yes', confirm=-1):
"""form to modify the dbquery of the collection.
dbquery - the dbquery of the collection."""
subtitle = ''
output = ""
col_dict = dict(get_def_name('', "collection"))
if colID and col_dict.has_key(int(colID)):
colID = int(colID)
subtitle = """<a name="1">1. Modify collection query for collection '%s'</a>&nbsp;&nbsp;&nbsp;<small>[<a title="See guide" href="%s/help/admin/websearch-admin-guide#3.1">?</a>]</small>""" % (col_dict[colID], CFG_SITE_URL)
if confirm == -1:
res = run_sql("SELECT dbquery FROM collection WHERE id=%s" % colID)
dbquery = res[0][0]
if not dbquery:
dbquery = ''
reg_sons = len(get_col_tree(colID, 'r'))
vir_sons = len(get_col_tree(colID, 'v'))
if reg_sons > 1:
if dbquery:
output += "Warning: This collection got subcollections, and should because of this not have a collection query, for further explanation, check the WebSearch Guide<br />"
elif reg_sons <= 1:
if not dbquery:
output += "Warning: This collection does not have any subcollections, and should because of this have a collection query, for further explanation, check the WebSearch Guide<br />"
text = """
<span class="adminlabel">Query</span>
<input class="admin_w200" type="text" name="dbquery" value="%s" /><br />
""" % cgi.escape(dbquery, 1)
output += createhiddenform(action="modifydbquery",
text=text,
button="Modify",
colID=colID,
ln=ln,
confirm=1)
if confirm in ["1", 1]:
res = modify_dbquery(colID, dbquery)
if res:
if dbquery == "":
text = """<b><span class="info">Query removed for this collection.</span></b>"""
else:
text = """<b><span class="info">Query set for this collection.</span></b>"""
else:
text = """<b><span class="info">Sorry, could not change query.</span></b>"""
output += text
body = [output]
if callback:
return perform_editcollection(colID, ln, "perform_modifydbquery", addadminbox(subtitle, body))
else:
return addadminbox(subtitle, body)
def perform_modifycollectiontree(colID, ln, move_up='', move_down='', move_from='', move_to='', delete='', rtype='', callback='yes', confirm=0):
"""to modify the collection tree: move a collection up and down, delete a collection, or change the father of the collection.
colID - the main collection of the tree, the root
move_up - move this collection up (is not the collection id, but the place in the tree)
move_up - move this collection down (is not the collection id, but the place in the tree)
move_from - move this collection from the current positon (is not the collection id, but the place in the tree)
move_to - move the move_from collection and set this as it's father. (is not the collection id, but the place in the tree)
delete - delete this collection from the tree (is not the collection id, but the place in the tree)
rtype - the type of the collection in the tree, regular or virtual"""
colID = int(colID)
tree = get_col_tree(colID, rtype)
col_dict = dict(get_def_name('', "collection"))
subtitle = """Modify collection tree: %s&nbsp;&nbsp;&nbsp;<small>[<a title="See guide" href="%s/help/admin/websearch-admin-guide#2.3">?</a>]&nbsp;&nbsp;&nbsp;<a href="%s/admin/websearch/websearchadmin.py/showtree?colID=%s&amp;ln=%s">Printer friendly version</a></small>""" % (col_dict[colID], CFG_SITE_URL, CFG_SITE_URL, colID, ln)
fin_output = ""
output = ""
try:
if move_up:
move_up = int(move_up)
switch = find_last(tree, move_up)
if switch and switch_col_treescore(tree[move_up], tree[switch]):
output += """<b><span class="info">Moved the %s collection '%s' up and '%s' down.</span></b><br /><br />
""" % ((rtype=="r" and 'regular' or 'virtual'), col_dict[tree[move_up][0]], col_dict[tree[switch][0]])
else:
output += """<b><span class="info">Could not move the %s collection '%s' up and '%s' down.</span></b><br /><br />
""" % ((rtype=="r" and 'regular' or 'virtual'), col_dict[tree[move_up][0]], col_dict[tree[switch][0]])
elif move_down:
move_down = int(move_down)
switch = find_next(tree, move_down)
if switch and switch_col_treescore(tree[move_down], tree[switch]):
output += """<b><span class="info">Moved the %s collection '%s' down and '%s' up.</span></b><br /><br />
""" % ((rtype=="r" and 'regular' or 'virtual'), col_dict[tree[move_down][0]], col_dict[tree[switch][0]])
else:
output += """<b><span class="info">Could not move the %s collection '%s' up and '%s' down.</span></b><br /><br />
""" % ((rtype=="r" and 'regular' or 'virtual'), col_dict[tree[move_up][0]],col_dict[tree[switch][0]])
elif delete:
delete = int(delete)
if confirm in [0, "0"]:
if col_dict[tree[delete][0]] != col_dict[tree[delete][3]]:
text = """<b>Do you want to remove the %s collection '%s' and its subcollections in the %s collection '%s'.</b>
""" % ((tree[delete][4]=="r" and 'regular' or 'virtual'), col_dict[tree[delete][0]], (rtype=="r" and 'regular' or 'virtual'), col_dict[tree[delete][3]])
else:
text = """<b>Do you want to remove all subcollections of the %s collection '%s'.</b>
""" % ((rtype=="r" and 'regular' or 'virtual'), col_dict[tree[delete][3]])
output += createhiddenform(action="%s/admin/websearch/websearchadmin.py/modifycollectiontree#tree" % CFG_SITE_URL,
text=text,
button="Confirm",
colID=colID,
delete=delete,
rtype=rtype,
ln=ln,
confirm=1)
output += createhiddenform(action="%s/admin/websearch/websearchadmin.py/index?mtype=perform_modifycollectiontree#tree" % CFG_SITE_URL,
text="<b>To cancel</b>",
button="Cancel",
colID=colID,
ln=ln)
else:
if remove_col_subcol(tree[delete][0], tree[delete][3], rtype):
if col_dict[tree[delete][0]] != col_dict[tree[delete][3]]:
output += """<b><span class="info">Removed the %s collection '%s' and its subcollections in subdirectory '%s'.</span></b><br /><br />
""" % ((tree[delete][4]=="r" and 'regular' or 'virtual'), col_dict[tree[delete][0]], col_dict[tree[delete][3]])
else:
output += """<b><span class="info">Removed the subcollections of the %s collection '%s'.</span></b><br /><br />
""" % ((rtype=="r" and 'regular' or 'virtual'), col_dict[tree[delete][3]])
else:
output += """<b><span class="info">Could not remove the collection from the tree.</span></b><br /><br />
"""
delete = ''
elif move_from and not move_to:
move_from_rtype = move_from[0]
move_from_id = int(move_from[1:len(move_from)])
text = """<b>Select collection to place the %s collection '%s' under.</b><br /><br />
""" % ((move_from_rtype=="r" and 'regular' or 'virtual'), col_dict[tree[move_from_id][0]])
output += createhiddenform(action="%s/admin/websearch/websearchadmin.py/index?mtype=perform_modifycollectiontree#tree" % CFG_SITE_URL,
text=text,
button="Cancel",
colID=colID,
ln=ln)
elif move_from and move_to:
move_from_rtype = move_from[0]
move_from_id = int(move_from[1:len(move_from)])
move_to_rtype = move_to[0]
move_to_id = int(move_to[1:len(move_to)])
tree_from = get_col_tree(colID, move_from_rtype)
tree_to = get_col_tree(colID, move_to_rtype)
if confirm in [0, '0']:
if move_from_id == move_to_id and move_from_rtype == move_to_rtype:
output += """<b><span class="info">Cannot move to itself.</span></b><br /><br />
"""
elif tree_from[move_from_id][3] == tree_to[move_to_id][0] and move_from_rtype==move_to_rtype:
output += """<b><span class="info">The collection is already there.</span></b><br /><br />
"""
elif check_col(tree_to[move_to_id][0], tree_from[move_from_id][0]) or (tree_to[move_to_id][0] == 1 and tree_from[move_from_id][3] == tree_to[move_to_id][0] and move_from_rtype != move_to_rtype):
text = """<b>Move %s collection '%s' to the %s collection '%s'.</b>
""" % ((tree_from[move_from_id][4]=="r" and 'regular' or 'virtual'), col_dict[tree_from[move_from_id][0]], (tree_to[move_to_id][4]=="r" and 'regular' or 'virtual'), col_dict[tree_to[move_to_id][0]])
output += createhiddenform(action="%s/admin/websearch/websearchadmin.py/modifycollectiontree#tree" % CFG_SITE_URL,
text=text,
button="Confirm",
colID=colID,
move_from=move_from,
move_to=move_to,
ln=ln,
rtype=rtype,
confirm=1)
output += createhiddenform(action="%s/admin/websearch/websearchadmin.py/index?mtype=perform_modifycollectiontree#tree" % CFG_SITE_URL,
text="""<b>To cancel</b>""",
button="Cancel",
colID=colID,
ln=ln)
else:
output += """<b><span class="info">Cannot move the collection '%s' and set it as a subcollection of '%s' since it will create a loop.</span></b><br /><br />
""" % (col_dict[tree_from[move_from_id][0]], col_dict[tree_to[move_to_id][0]])
else:
if (move_to_id != 0 and move_col_tree(tree_from[move_from_id], tree_to[move_to_id])) or (move_to_id == 0 and move_col_tree(tree_from[move_from_id], tree_to[move_to_id], move_to_rtype)):
output += """<b><span class="info">Moved %s collection '%s' to the %s collection '%s'.</span></b><br /><br />
""" % ((move_from_rtype=="r" and 'regular' or 'virtual'), col_dict[tree_from[move_from_id][0]], (move_to_rtype=="r" and 'regular' or 'virtual'), col_dict[tree_to[move_to_id][0]])
else:
output += """<b><span class="info">Could not move %s collection '%s' to the %s collection '%s'.</span></b><br /><br />
""" % ((move_from_rtype=="r" and 'regular' or 'virtual'), col_dict[tree_from[move_from_id][0]], (move_to_rtype=="r" and 'regular' or 'virtual'), col_dict[tree_to[move_to_id][0]])
move_from = ''
move_to = ''
else:
output += """
"""
except StandardError, e:
register_exception()
return """<b><span class="info">An error occured.</span></b>
"""
output += """<table border ="0" width="100%">
<tr><td width="50%">
<b>Narrow by collection:</b>
</td><td width="50%">
<b>Focus on...:</b>
</td></tr><tr><td valign="top">
"""
tree = get_col_tree(colID, 'r')
output += create_colltree(tree, col_dict, colID, ln, move_from, move_to, 'r', "yes")
output += """</td><td valign="top">
"""
tree = get_col_tree(colID, 'v')
output += create_colltree(tree, col_dict, colID, ln, move_from, move_to, 'v', "yes")
output += """</td>
</tr>
</table>
"""
body = [output]
if callback:
return perform_index(colID, ln, mtype="perform_modifycollectiontree", content=addadminbox(subtitle, body))
else:
return addadminbox(subtitle, body)
def perform_showtree(colID, ln):
"""create collection tree/hiarchy"""
col_dict = dict(get_def_name('', "collection"))
subtitle = "Collection tree: %s" % col_dict[int(colID)]
output = """<table border ="0" width="100%">
<tr><td width="50%">
<b>Narrow by collection:</b>
</td><td width="50%">
<b>Focus on...:</b>
</td></tr><tr><td valign="top">
"""
tree = get_col_tree(colID, 'r')
output += create_colltree(tree, col_dict, colID, ln, '', '', 'r', '')
output += """</td><td valign="top">
"""
tree = get_col_tree(colID, 'v')
output += create_colltree(tree, col_dict, colID, ln, '', '', 'v', '')
output += """</td>
</tr>
</table>
"""
body = [output]
return addadminbox(subtitle, body)
def perform_addportalbox(colID, ln, title='', body='', callback='yes', confirm=-1):
"""form to add a new portalbox
title - the title of the portalbox
body - the body of the portalbox"""
col_dict = dict(get_def_name('', "collection"))
colID = int(colID)
subtitle = """<a name="5.1"></a>Create new portalbox"""
text = """
<span class="adminlabel">Title</span>
<textarea cols="50" rows="1" class="admin_wvar" type="text" name="title">%s</textarea><br />
<span class="adminlabel">Body</span>
<textarea cols="50" rows="10" class="admin_wvar" type="text" name="body">%s</textarea><br />
""" % (cgi.escape(title), cgi.escape(body))
output = createhiddenform(action="addportalbox#5.1",
text=text,
button="Add",
colID=colID,
ln=ln,
confirm=1)
if body and confirm in [1, "1"]:
res = add_pbx(title, body)
output += write_outcome(res)
if res[1] == 1:
output += """<b><span class="info"><a href="addexistingportalbox?colID=%s&amp;ln=%s&amp;pbxID=%s#5">Add portalbox to collection</a></span></b>""" % (colID, ln, res[1])
elif confirm not in [-1, "-1"]:
output += """<b><span class="info">Body field must be filled.</span></b>
"""
body = [output]
return perform_showportalboxes(colID, ln, content=addadminbox(subtitle, body))
def perform_addexistingportalbox(colID, ln, pbxID=-1, score=0, position='', sel_ln='', callback='yes', confirm=-1):
"""form to add an existing portalbox to a collection.
colID - the collection to add the portalbox to
pbxID - the portalbox to add
score - the importance of the portalbox.
position - the position of the portalbox on the page
sel_ln - the language of the portalbox"""
subtitle = """<a name="5.2"></a>Add existing portalbox to collection"""
output = ""
colID = int(colID)
res = get_pbx()
pos = get_pbx_pos()
lang = dict(get_languages())
col_dict = dict(get_def_name('', "collection"))
pbx_dict = dict(map(lambda x: (x[0], x[1]), res))
col_pbx = get_col_pbx(colID)
col_pbx = dict(map(lambda x: (x[0], x[5]), col_pbx))
if len(res) > 0:
text = """
<span class="adminlabel">Portalbox</span>
<select name="pbxID" class="admin_w200">
<option value="-1">- Select portalbox -</option>
"""
for (id, t_title, t_body) in res:
text += """<option value="%s" %s>%s - %s...</option>\n""" % \
(id, id == int(pbxID) and 'selected="selected"' or '',
t_title[:40], cgi.escape(t_body[0:40 - min(40, len(t_title))]))
text += """</select><br />
<span class="adminlabel">Language</span>
<select name="sel_ln" class="admin_w200">
<option value="">- Select language -</option>
"""
listlang = lang.items()
listlang.sort()
for (key, name) in listlang:
text += """<option value="%s" %s>%s</option>
""" % (key, key == sel_ln and 'selected="selected"' or '', name)
text += """</select><br />
<span class="adminlabel">Position</span>
<select name="position" class="admin_w200">
<option value="">- Select position -</option>
"""
listpos = pos.items()
listpos.sort()
for (key, name) in listpos:
text += """<option value="%s" %s>%s</option>""" % (key, key==position and 'selected="selected"' or '', name)
text += "</select>"
output += createhiddenform(action="addexistingportalbox#5.2",
text=text,
button="Add",
colID=colID,
ln=ln,
confirm=1)
else:
output = """No existing portalboxes to add, please create a new one.
"""
if pbxID > -1 and position and sel_ln and confirm in [1, "1"]:
pbxID = int(pbxID)
res = add_col_pbx(colID, pbxID, sel_ln, position, '')
output += write_outcome(res)
elif pbxID > -1 and confirm not in [-1, "-1"]:
output += """<b><span class="info">All fields must be filled.</span></b>
"""
body = [output]
output = "<br />" + addadminbox(subtitle, body)
return perform_showportalboxes(colID, ln, content=output)
def perform_deleteportalbox(colID, ln, pbxID=-1, callback='yes', confirm=-1):
"""form to delete a portalbox which is not in use.
colID - the current collection.
pbxID - the id of the portalbox"""
subtitle = """<a name="5.3"></a>Delete an unused portalbox"""
output = ""
colID = int(colID)
if pbxID not in [-1, "-1"] and confirm in [1, "1"]:
ares = get_pbx()
pbx_dict = dict(map(lambda x: (x[0], x[1]), ares))
if pbx_dict.has_key(int(pbxID)):
pname = pbx_dict[int(pbxID)]
ares = delete_pbx(int(pbxID))
else:
return """<b><span class="info">This portalbox does not exist</span></b>"""
res = get_pbx()
col_dict = dict(get_def_name('', "collection"))
pbx_dict = dict(map(lambda x: (x[0], x[1]), res))
col_pbx = get_col_pbx()
col_pbx = dict(map(lambda x: (x[0], x[5]), col_pbx))
if len(res) > 0:
text = """
<span class="adminlabel">Portalbox</span>
<select name="pbxID" class="admin_w200">
"""
text += """<option value="-1">- Select portalbox -"""
for (id, t_title, t_body) in res:
if not col_pbx.has_key(id):
text += """<option value="%s" %s>%s - %s...""" % (id, id == int(pbxID) and 'selected="selected"' or '', t_title, cgi.escape(t_body[0:10]))
text += "</option>"
text += """</select><br />"""
output += createhiddenform(action="deleteportalbox#5.3",
text=text,
button="Delete",
colID=colID,
ln=ln,
confirm=1)
if pbxID not in [-1, "-1"]:
pbxID = int(pbxID)
if confirm in [1, "1"]:
output += write_outcome(ares)
elif confirm not in [-1, "-1"]:
output += """<b><span class="info">Choose a portalbox to delete.</span></b>
"""
body = [output]
output = "<br />" + addadminbox(subtitle, body)
return perform_showportalboxes(colID, ln, content=output)
def perform_modifyportalbox(colID, ln, pbxID=-1, score='', position='', sel_ln='', title='', body='', callback='yes', confirm=-1):
"""form to modify a portalbox in a collection, or change the portalbox itself.
colID - the id of the collection.
pbxID - the portalbox to change
score - the score of the portalbox connected to colID which should be changed.
position - the position of the portalbox in collection colID to change."""
subtitle = ""
output = ""
colID = int(colID)
res = get_pbx()
pos = get_pbx_pos()
lang = dict(get_languages())
col_dict = dict(get_def_name('', "collection"))
pbx_dict = dict(map(lambda x: (x[0], x[1]), res))
col_pbx = get_col_pbx(colID)
col_pbx = dict(map(lambda x: (x[0], x[5]), col_pbx))
if pbxID not in [-1, "-1"]:
pbxID = int(pbxID)
subtitle = """<a name="5.4"></a>Modify portalbox '%s' for this collection""" % pbx_dict[pbxID]
col_pbx = get_col_pbx(colID)
if not (score and position) and not (body and title):
for (id_pbx, id_collection, tln, score, position, title, body) in col_pbx:
if id_pbx == pbxID:
break
output += """Collection (presentation) specific values (Changes implies only to this collection.)<br />"""
text = """
<span class="adminlabel">Position</span>
<select name="position" class="admin_w200">
"""
listpos = pos.items()
listpos.sort()
for (key, name) in listpos:
text += """<option value="%s" %s>%s""" % (key, key==position and 'selected="selected"' or '', name)
text += "</option>"
text += """</select><br />"""
output += createhiddenform(action="modifyportalbox#5.4",
text=text,
button="Modify",
colID=colID,
pbxID=pbxID,
score=score,
title=title,
body=cgi.escape(body, 1),
sel_ln=sel_ln,
ln=ln,
confirm=3)
if pbxID > -1 and score and position and confirm in [3, "3"]:
pbxID = int(pbxID)
res = modify_pbx(colID, pbxID, sel_ln, score, position, '', '')
res2 = get_pbx()
pbx_dict = dict(map(lambda x: (x[0], x[1]), res2))
output += write_outcome(res)
output += """<br />Portalbox (content) specific values (any changes appears everywhere the portalbox is used.)"""
text = """
<span class="adminlabel">Title</span>
<textarea cols="50" rows="1" class="admin_wvar" type="text" name="title">%s</textarea><br />
""" % cgi.escape(title)
text += """
<span class="adminlabel">Body</span>
<textarea cols="50" rows="10" class="admin_wvar" type="text" name="body">%s</textarea><br />
""" % cgi.escape(body)
output += createhiddenform(action="modifyportalbox#5.4",
text=text,
button="Modify",
colID=colID,
pbxID=pbxID,
sel_ln=sel_ln,
score=score,
position=position,
ln=ln,
confirm=4)
if pbxID > -1 and confirm in [4, "4"]:
pbxID = int(pbxID)
res = modify_pbx(colID, pbxID, sel_ln, '', '', title, body)
output += write_outcome(res)
else:
output = """No portalbox to modify."""
body = [output]
output = "<br />" + addadminbox(subtitle, body)
return perform_showportalboxes(colID, ln, content=output)
def perform_switchpbxscore(colID, id_1, id_2, sel_ln, ln):
"""Switch the score of id_1 and id_2 in collection_portalbox.
colID - the current collection
id_1/id_2 - the id's to change the score for.
sel_ln - the language of the portalbox"""
output = ""
res = get_pbx()
pbx_dict = dict(map(lambda x: (x[0], x[1]), res))
res = switch_pbx_score(colID, id_1, id_2, sel_ln)
output += write_outcome(res)
return perform_showportalboxes(colID, ln, content=output)
def perform_showportalboxes(colID, ln, callback='yes', content='', confirm=-1):
"""show the portalboxes of this collection.
colID - the portalboxes to show the collection for."""
colID = int(colID)
col_dict = dict(get_def_name('', "collection"))
subtitle = """<a name="5">5. Modify portalboxes for collection '%s'</a>&nbsp;&nbsp;&nbsp;<small>[<a title="See guide" href="%s/help/admin/websearch-admin-guide#3.5">?</a>]</small>""" % (col_dict[colID], CFG_SITE_URL)
output = ""
pos = get_pbx_pos()
output = """<dl>
<dt>Portalbox actions (not related to this collection)</dt>
<dd><a href="addportalbox?colID=%s&amp;ln=%s#5.1">Create new portalbox</a></dd>
<dd><a href="deleteportalbox?colID=%s&amp;ln=%s#5.3">Delete an unused portalbox</a></dd>
<dt>Collection specific actions</dt>
<dd><a href="addexistingportalbox?colID=%s&amp;ln=%s#5.2">Add existing portalbox to collection</a></dd>
</dl>
""" % (colID, ln, colID, ln, colID, ln)
header = ['Position', 'Language', '', 'Title', 'Actions']
actions = []
sitelangs = get_languages()
lang = dict(sitelangs)
pos_list = pos.items()
pos_list.sort()
if len(get_col_pbx(colID)) > 0:
for (key, value) in sitelangs:
for (pos_key, pos_value) in pos_list:
res = get_col_pbx(colID, key, pos_key)
i = 0
for (pbxID, colID_pbx, tln, score, position, title, body) in res:
move = """<table cellspacing="1" cellpadding="0" border="0"><tr><td>"""
if i != 0:
move += """<a href="%s/admin/websearch/websearchadmin.py/switchpbxscore?colID=%s&amp;ln=%s&amp;id_1=%s&amp;id_2=%s&amp;sel_ln=%s&amp;rand=%s#5"><img border="0" src="%s/img/smallup.gif" title="Move portalbox up" alt="up" /></a>""" % (CFG_SITE_URL, colID, ln, pbxID, res[i - 1][0], tln, random.randint(0, 1000), CFG_SITE_URL)
else:
move += "&nbsp;&nbsp;&nbsp;"
move += "</td><td>"
i += 1
if i != len(res):
move += """<a href="%s/admin/websearch/websearchadmin.py/switchpbxscore?colID=%s&amp;ln=%s&amp;id_1=%s&amp;id_2=%s&amp;sel_ln=%s&amp;rand=%s#5"><img border="0" src="%s/img/smalldown.gif" title="Move portalbox down" alt="down" /></a>""" % (CFG_SITE_URL, colID, ln, pbxID, res[i][0], tln, random.randint(0, 1000), CFG_SITE_URL)
move += """</td></tr></table>"""
actions.append(["%s" % (i==1 and pos[position] or ''), "%s" % (i==1 and lang[tln] or ''), move, "%s" % title])
for col in [(('Modify', 'modifyportalbox'), ('Remove', 'removeportalbox'),)]:
actions[-1].append('<a href="%s/admin/websearch/websearchadmin.py/%s?colID=%s&amp;ln=%s&amp;pbxID=%s&amp;sel_ln=%s#5.4">%s</a>' % (CFG_SITE_URL, col[0][1], colID, ln, pbxID, tln, col[0][0]))
for (str, function) in col[1:]:
actions[-1][-1] += ' / <a href="%s/admin/websearch/websearchadmin.py/%s?colID=%s&amp;ln=%s&amp;pbxID=%s&amp;sel_ln=%s#5.5">%s</a>' % (CFG_SITE_URL, function, colID, ln, pbxID, tln, str)
output += tupletotable(header=header, tuple=actions)
else:
output += """No portalboxes exists for this collection"""
output += content
body = [output]
if callback:
return perform_editcollection(colID, ln, "perform_showportalboxes", addadminbox(subtitle, body))
else:
return addadminbox(subtitle, body)
def perform_removeportalbox(colID, ln, pbxID='', sel_ln='', callback='yes', confirm=0):
"""form to remove a portalbox from a collection.
colID - the current collection, remove the portalbox from this collection.
sel_ln - remove the portalbox with this language
pbxID - remove the portalbox with this id"""
subtitle = """<a name="5.5"></a>Remove portalbox"""
output = ""
col_dict = dict(get_def_name('', "collection"))
res = get_pbx()
pbx_dict = dict(map(lambda x: (x[0], x[1]), res))
if colID and pbxID and sel_ln:
colID = int(colID)
pbxID = int(pbxID)
if confirm in ["0", 0]:
text = """Do you want to remove the portalbox '%s' from the collection '%s'.""" % (pbx_dict[pbxID], col_dict[colID])
output += createhiddenform(action="removeportalbox#5.5",
text=text,
button="Confirm",
colID=colID,
pbxID=pbxID,
sel_ln=sel_ln,
confirm=1)
elif confirm in ["1", 1]:
res = remove_pbx(colID, pbxID, sel_ln)
output += write_outcome(res)
body = [output]
output = "<br />" + addadminbox(subtitle, body)
return perform_showportalboxes(colID, ln, content=output)
def perform_switchfmtscore(colID, type, id_1, id_2, ln):
"""Switch the score of id_1 and id_2 in the table type.
colID - the current collection
id_1/id_2 - the id's to change the score for.
type - like "format" """
fmt_dict = dict(get_def_name('', "format"))
res = switch_score(colID, id_1, id_2, type)
output = write_outcome(res)
return perform_showoutputformats(colID, ln, content=output)
def perform_switchfldscore(colID, id_1, id_2, fmeth, ln):
"""Switch the score of id_1 and id_2 in collection_field_fieldvalue.
colID - the current collection
id_1/id_2 - the id's to change the score for."""
fld_dict = dict(get_def_name('', "field"))
res = switch_fld_score(colID, id_1, id_2)
output = write_outcome(res)
if fmeth == "soo":
return perform_showsortoptions(colID, ln, content=output)
elif fmeth == "sew":
return perform_showsearchfields(colID, ln, content=output)
elif fmeth == "seo":
return perform_showsearchoptions(colID, ln, content=output)
def perform_switchfldvaluescore(colID, id_1, id_fldvalue_1, id_fldvalue_2, ln):
"""Switch the score of id_1 and id_2 in collection_field_fieldvalue.
colID - the current collection
id_1/id_2 - the id's to change the score for."""
name_1 = run_sql("SELECT name from fieldvalue where id=%s", (id_fldvalue_1, ))[0][0]
name_2 = run_sql("SELECT name from fieldvalue where id=%s", (id_fldvalue_2, ))[0][0]
res = switch_fld_value_score(colID, id_1, id_fldvalue_1, id_fldvalue_2)
output = write_outcome(res)
return perform_modifyfield(colID, fldID=id_1, ln=ln, content=output)
def perform_addnewfieldvalue(colID, fldID, ln, name='', value='', callback="yes", confirm=-1):
"""form to add a new fieldvalue.
name - the name of the new fieldvalue
value - the value of the new fieldvalue
"""
output = ""
subtitle = """<a name="7.4"></a>Add new value"""
text = """
<span class="adminlabel">Display name</span>
<input class="admin_w200" type="text" name="name" value="%s" /><br />
<span class="adminlabel">Search value</span>
<input class="admin_w200" type="text" name="value" value="%s" /><br />
""" % (name, value)
output = createhiddenform(action="%s/admin/websearch/websearchadmin.py/addnewfieldvalue" % CFG_SITE_URL,
text=text,
colID=colID,
fldID=fldID,
ln=ln,
button="Add",
confirm=1)
if name and value and confirm in ["1", 1]:
res = add_fldv(name, value)
output += write_outcome(res)
if res[0] == 1:
res = add_col_fld(colID, fldID, 'seo', res[1])
if res[0] == 0:
output += "<br />" + write_outcome(res)
elif confirm not in ["-1", -1]:
output += """<b><span class="info">Please fill in name and value.</span></b>
"""
body = [output]
output = "<br />" + addadminbox(subtitle, body)
return perform_modifyfield(colID, fldID=fldID, ln=ln, content=output)
def perform_modifyfieldvalue(colID, fldID, fldvID, ln, name='', value='', callback="yes", confirm=-1):
"""form to modify a fieldvalue.
name - the name of the fieldvalue
value - the value of the fieldvalue
"""
if confirm in [-1, "-1"]:
res = get_fld_value(fldvID)
(id, name, value) = res[0]
output = ""
subtitle = """<a name="7.4"></a>Modify existing value"""
output = """<dl>
<dt><b><span class="info">Warning: Modifications done below will also inflict on all places the modified data is used.</span></b></dt>
</dl>"""
text = """
<span class="adminlabel">Display name</span>
<input class="admin_w200" type="text" name="name" value="%s" /><br />
<span class="adminlabel">Search value</span>
<input class="admin_w200" type="text" name="value" value="%s" /><br />
""" % (name, value)
output += createhiddenform(action="%s/admin/websearch/websearchadmin.py/modifyfieldvalue" % CFG_SITE_URL,
text=text,
colID=colID,
fldID=fldID,
fldvID=fldvID,
ln=ln,
button="Update",
confirm=1)
output += createhiddenform(action="%s/admin/websearch/websearchadmin.py/modifyfieldvalue" % CFG_SITE_URL,
text="Delete value and all associations",
colID=colID,
fldID=fldID,
fldvID=fldvID,
ln=ln,
button="Delete",
confirm=2)
if name and value and confirm in ["1", 1]:
res = update_fldv(fldvID, name, value)
output += write_outcome(res)
#if res:
# output += """<b><span class="info">Operation successfully completed.</span></b>"""
#else:
# output += """<b><span class="info">Operation failed.</span></b>"""
elif confirm in ["2", 2]:
res = delete_fldv(fldvID)
output += write_outcome(res)
elif confirm not in ["-1", -1]:
output += """<b><span class="info">Please fill in name and value.</span></b>"""
body = [output]
output = "<br />" + addadminbox(subtitle, body)
return perform_modifyfield(colID, fldID=fldID, ln=ln, content=output)
def perform_removefield(colID, ln, fldID='', fldvID='', fmeth='', callback='yes', confirm=0):
"""form to remove a field from a collection.
colID - the current collection, remove the field from this collection.
sel_ln - remove the field with this language
fldID - remove the field with this id"""
if fmeth == "soo":
field = "sort option"
elif fmeth == "sew":
field = "search field"
elif fmeth == "seo":
field = "search option"
else:
field = "field"
subtitle = """<a name="6.4"><a name="7.4"><a name="8.4"></a>Remove %s""" % field
output = ""
col_dict = dict(get_def_name('', "collection"))
fld_dict = dict(get_def_name('', "field"))
res = get_fld_value()
fldv_dict = dict(map(lambda x: (x[0], x[1]), res))
if colID and fldID:
colID = int(colID)
fldID = int(fldID)
if fldvID and fldvID != "None":
fldvID = int(fldvID)
if confirm in ["0", 0]:
text = """Do you want to remove the %s '%s' %s from the collection '%s'.""" % (field, fld_dict[fldID], (fldvID not in["", "None"] and "with value '%s'" % fldv_dict[fldvID] or ''), col_dict[colID])
output += createhiddenform(action="removefield#6.5",
text=text,
button="Confirm",
colID=colID,
fldID=fldID,
fldvID=fldvID,
fmeth=fmeth,
confirm=1)
elif confirm in ["1", 1]:
res = remove_fld(colID, fldID, fldvID)
output += write_outcome(res)
body = [output]
output = "<br />" + addadminbox(subtitle, body)
if fmeth == "soo":
return perform_showsortoptions(colID, ln, content=output)
elif fmeth == "sew":
return perform_showsearchfields(colID, ln, content=output)
elif fmeth == "seo":
return perform_showsearchoptions(colID, ln, content=output)
def perform_removefieldvalue(colID, ln, fldID='', fldvID='', fmeth='', callback='yes', confirm=0):
"""form to remove a field from a collection.
colID - the current collection, remove the field from this collection.
sel_ln - remove the field with this language
fldID - remove the field with this id"""
subtitle = """<a name="7.4"></a>Remove value"""
output = ""
col_dict = dict(get_def_name('', "collection"))
fld_dict = dict(get_def_name('', "field"))
res = get_fld_value()
fldv_dict = dict(map(lambda x: (x[0], x[1]), res))
if colID and fldID:
colID = int(colID)
fldID = int(fldID)
if fldvID and fldvID != "None":
fldvID = int(fldvID)
if confirm in ["0", 0]:
text = """Do you want to remove the value '%s' from the search option '%s'.""" % (fldv_dict[fldvID], fld_dict[fldID])
output += createhiddenform(action="removefieldvalue#7.4",
text=text,
button="Confirm",
colID=colID,
fldID=fldID,
fldvID=fldvID,
fmeth=fmeth,
confirm=1)
elif confirm in ["1", 1]:
res = remove_fld(colID, fldID, fldvID)
output += write_outcome(res)
body = [output]
output = "<br />" + addadminbox(subtitle, body)
return perform_modifyfield(colID, fldID=fldID, ln=ln, content=output)
def perform_rearrangefieldvalue(colID, fldID, ln, callback='yes', confirm=-1):
"""rearrang the fieldvalues alphabetically
colID - the collection
fldID - the field to rearrange the fieldvalue for
"""
subtitle = "Order values alphabetically"
output = ""
col_fldv = get_col_fld(colID, 'seo', fldID)
col_fldv = dict(map(lambda x: (x[1], x[0]), col_fldv))
fldv_names = get_fld_value()
fldv_names = map(lambda x: (x[0], x[1]), fldv_names)
if not col_fldv.has_key(None):
vscore = len(col_fldv)
for (fldvID, name) in fldv_names:
if col_fldv.has_key(fldvID):
run_sql("UPDATE collection_field_fieldvalue SET score_fieldvalue=%s WHERE id_collection=%s and id_field=%s and id_fieldvalue=%s", (vscore, colID, fldID, fldvID))
vscore -= 1
output += write_outcome((1, ""))
else:
output += write_outcome((0, (0, "No values to order")))
body = [output]
output = "<br />" + addadminbox(subtitle, body)
return perform_modifyfield(colID, fldID, ln, content=output)
def perform_rearrangefield(colID, ln, fmeth, callback='yes', confirm=-1):
"""rearrang the fields alphabetically
colID - the collection
"""
subtitle = "Order fields alphabetically"
output = ""
col_fld = dict(map(lambda x: (x[0], x[1]), get_col_fld(colID, fmeth)))
fld_names = get_def_name('', "field")
if len(col_fld) > 0:
score = len(col_fld)
for (fldID, name) in fld_names:
if col_fld.has_key(fldID):
run_sql("UPDATE collection_field_fieldvalue SET score=%s WHERE id_collection=%s and id_field=%s", (score, colID, fldID))
score -= 1
output += write_outcome((1, ""))
else:
output += write_outcome((0, (0, "No fields to order")))
body = [output]
output = "<br />" + addadminbox(subtitle, body)
if fmeth == "soo":
return perform_showsortoptions(colID, ln, content=output)
elif fmeth == "sew":
return perform_showsearchfields(colID, ln, content=output)
elif fmeth == "seo":
return perform_showsearchoptions(colID, ln, content=output)
def perform_addexistingfieldvalue(colID, fldID, fldvID=-1, ln=CFG_SITE_LANG, callback='yes', confirm=-1):
"""form to add an existing fieldvalue to a field.
colID - the collection
fldID - the field to add the fieldvalue to
fldvID - the fieldvalue to add"""
subtitle = """</a><a name="7.4"></a>Add existing value to search option"""
output = ""
if fldvID not in [-1, "-1"] and confirm in [1, "1"]:
fldvID = int(fldvID)
ares = add_col_fld(colID, fldID, 'seo', fldvID)
colID = int(colID)
fldID = int(fldID)
lang = dict(get_languages())
res = get_def_name('', "field")
col_dict = dict(get_def_name('', "collection"))
fld_dict = dict(res)
col_fld = dict(map(lambda x: (x[0], x[1]), get_col_fld(colID, 'seo')))
fld_value = get_fld_value()
fldv_dict = dict(map(lambda x: (x[0], x[1]), fld_value))
text = """
<span class="adminlabel">Value</span>
<select name="fldvID" class="admin_w200">
<option value="-1">- Select value -</option>
"""
res = run_sql("SELECT id,name,value FROM fieldvalue ORDER BY name")
for (id, name, value) in res:
text += """<option value="%s" %s>%s - %s</option>
""" % (id, id == int(fldvID) and 'selected="selected"' or '', name, value)
text += """</select><br />"""
output += createhiddenform(action="addexistingfieldvalue#7.4",
text=text,
button="Add",
colID=colID,
fldID=fldID,
ln=ln,
confirm=1)
if fldvID not in [-1, "-1"] and confirm in [1, "1"]:
output += write_outcome(ares)
elif confirm in [1, "1"]:
output += """<b><span class="info">Select a value to add and try again.</span></b>"""
body = [output]
output = "<br />" + addadminbox(subtitle, body)
return perform_modifyfield(colID, fldID, ln, content=output)
def perform_addexistingfield(colID, ln, fldID=-1, fldvID=-1, fmeth='', callback='yes', confirm=-1):
"""form to add an existing field to a collection.
colID - the collection to add the field to
fldID - the field to add
sel_ln - the language of the field"""
subtitle = """<a name="6.2"></a><a name="7.2"></a><a name="8.2"></a>Add existing field to collection"""
output = ""
if fldID not in [-1, "-1"] and confirm in [1, "1"]:
fldID = int(fldID)
ares = add_col_fld(colID, fldID, fmeth, fldvID)
colID = int(colID)
lang = dict(get_languages())
res = get_def_name('', "field")
col_dict = dict(get_def_name('', "collection"))
fld_dict = dict(res)
col_fld = dict(map(lambda x: (x[0], x[1]), get_col_fld(colID, fmeth)))
fld_value = get_fld_value()
fldv_dict = dict(map(lambda x: (x[0], x[1]), fld_value))
if fldvID:
fldvID = int(fldvID)
text = """
<span class="adminlabel">Field</span>
<select name="fldID" class="admin_w200">
<option value="-1">- Select field -</option>
"""
for (id, var) in res:
if fmeth == 'seo' or (fmeth != 'seo' and not col_fld.has_key(id)):
text += """<option value="%s" %s>%s</option>
""" % (id, '', fld_dict[id])
text += """</select><br />"""
output += createhiddenform(action="addexistingfield#6.2",
text=text,
button="Add",
colID=colID,
fmeth=fmeth,
ln=ln,
confirm=1)
if fldID not in [-1, "-1"] and confirm in [1, "1"]:
output += write_outcome(ares)
elif fldID in [-1, "-1"] and confirm not in [-1, "-1"]:
output += """<b><span class="info">Select a field.</span></b>
"""
body = [output]
output = "<br />" + addadminbox(subtitle, body)
if fmeth == "soo":
return perform_showsortoptions(colID, ln, content=output)
elif fmeth == "sew":
return perform_showsearchfields(colID, ln, content=output)
elif fmeth == "seo":
return perform_showsearchoptions(colID, ln, content=output)
def perform_showsortoptions(colID, ln, callback='yes', content='', confirm=-1):
"""show the sort fields of this collection.."""
colID = int(colID)
col_dict = dict(get_def_name('', "collection"))
fld_dict = dict(get_def_name('', "field"))
fld_type = get_sort_nametypes()
subtitle = """<a name="8">8. Modify sort options for collection '%s'</a>&nbsp;&nbsp;&nbsp;<small>[<a title="See guide" href="%s/help/admin/websearch-admin-guide#3.8">?</a>]</small>""" % (col_dict[colID], CFG_SITE_URL)
output = """<dl>
<dt>Field actions (not related to this collection)</dt>
<dd>Go to the BibIndex interface to modify the available sort options</dd>
<dt>Collection specific actions
<dd><a href="addexistingfield?colID=%s&amp;ln=%s&amp;fmeth=soo#8.2">Add sort option to collection</a></dd>
<dd><a href="rearrangefield?colID=%s&amp;ln=%s&amp;fmeth=soo#8.2">Order sort options alphabetically</a></dd>
</dl>
""" % (colID, ln, colID, ln)
header = ['', 'Sort option', 'Actions']
actions = []
sitelangs = get_languages()
lang = dict(sitelangs)
fld_type_list = fld_type.items()
if len(get_col_fld(colID, 'soo')) > 0:
res = get_col_fld(colID, 'soo')
i = 0
for (fldID, fldvID, stype, score, score_fieldvalue) in res:
move = """<table cellspacing="1" cellpadding="0" border="0"><tr><td>"""
if i != 0:
move += """<a href="%s/admin/websearch/websearchadmin.py/switchfldscore?colID=%s&amp;ln=%s&amp;id_1=%s&amp;id_2=%s&amp;fmeth=soo&amp;rand=%s#8"><img border="0" src="%s/img/smallup.gif" title="Move up"></a>""" % (CFG_SITE_URL, colID, ln, fldID, res[i - 1][0], random.randint(0, 1000), CFG_SITE_URL)
else:
move += "&nbsp;&nbsp;&nbsp;&nbsp;"
move += "</td><td>"
i += 1
if i != len(res):
move += """<a href="%s/admin/websearch/websearchadmin.py/switchfldscore?colID=%s&amp;ln=%s&amp;id_1=%s&amp;id_2=%s&amp;fmeth=soo&amp;rand=%s#8"><img border="0" src="%s/img/smalldown.gif" title="Move down"></a>""" % (CFG_SITE_URL, colID, ln, fldID, res[i][0], random.randint(0, 1000), CFG_SITE_URL)
move += """</td></tr></table>"""
actions.append([move, fld_dict[int(fldID)]])
for col in [(('Remove sort option', 'removefield'),)]:
actions[-1].append('<a href="%s/admin/websearch/websearchadmin.py/%s?colID=%s&amp;ln=%s&amp;fldID=%s&amp;fmeth=soo#8.4">%s</a>' % (CFG_SITE_URL, col[0][1], colID, ln, fldID, col[0][0]))
for (str, function) in col[1:]:
actions[-1][-1] += ' / <a href="%s/admin/websearch/websearchadmin.py/%s?colID=%s&amp;ln=%s&amp;fldID=%s&amp;fmeth=soo#8.5">%s</a>' % (CFG_SITE_URL, function, colID, ln, fldID, str)
output += tupletotable(header=header, tuple=actions)
else:
output += """No sort options exists for this collection"""
output += content
body = [output]
if callback:
return perform_editcollection(colID, ln, "perform_showsortoptions", addadminbox(subtitle, body))
else:
return addadminbox(subtitle, body)
def perform_showsearchfields(colID, ln, callback='yes', content='', confirm=-1):
"""show the search fields of this collection.."""
colID = int(colID)
col_dict = dict(get_def_name('', "collection"))
fld_dict = dict(get_def_name('', "field"))
fld_type = get_sort_nametypes()
subtitle = """<a name="6">6. Modify search fields for collection '%s'</a>&nbsp;&nbsp;&nbsp;<small>[<a title="See guide" href="%s/help/admin/websearch-admin-guide#3.6">?</a>]</small>""" % (col_dict[colID], CFG_SITE_URL)
output = """<dl>
<dt>Field actions (not related to this collection)</dt>
<dd>Go to the BibIndex interface to modify the available search fields</dd>
<dt>Collection specific actions
<dd><a href="addexistingfield?colID=%s&amp;ln=%s&amp;fmeth=sew#6.2">Add search field to collection</a></dd>
<dd><a href="rearrangefield?colID=%s&amp;ln=%s&amp;fmeth=sew#6.2">Order search fields alphabetically</a></dd>
</dl>
""" % (colID, ln, colID, ln)
header = ['', 'Search field', 'Actions']
actions = []
sitelangs = get_languages()
lang = dict(sitelangs)
fld_type_list = fld_type.items()
if len(get_col_fld(colID, 'sew')) > 0:
res = get_col_fld(colID, 'sew')
i = 0
for (fldID, fldvID, stype, score, score_fieldvalue) in res:
move = """<table cellspacing="1" cellpadding="0" border="0"><tr><td>"""
if i != 0:
move += """<a href="%s/admin/websearch/websearchadmin.py/switchfldscore?colID=%s&amp;ln=%s&amp;id_1=%s&amp;id_2=%s&amp;fmeth=sew&amp;rand=%s#6"><img border="0" src="%s/img/smallup.gif" title="Move up"></a>""" % (CFG_SITE_URL, colID, ln, fldID, res[i - 1][0], random.randint(0, 1000), CFG_SITE_URL)
else:
move += "&nbsp;&nbsp;&nbsp;"
move += "</td><td>"
i += 1
if i != len(res):
move += '<a href="%s/admin/websearch/websearchadmin.py/switchfldscore?colID=%s&amp;ln=%s&amp;id_1=%s&amp;id_2=%s&amp;fmeth=sew&amp;rand=%s#6"><img border="0" src="%s/img/smalldown.gif" title="Move down"></a>' % (CFG_SITE_URL, colID, ln, fldID, res[i][0], random.randint(0, 1000), CFG_SITE_URL)
move += """</td></tr></table>"""
actions.append([move, fld_dict[int(fldID)]])
for col in [(('Remove search field', 'removefield'),)]:
actions[-1].append('<a href="%s/admin/websearch/websearchadmin.py/%s?colID=%s&amp;ln=%s&amp;fldID=%s&amp;fmeth=sew#6.4">%s</a>' % (CFG_SITE_URL, col[0][1], colID, ln, fldID, col[0][0]))
for (str, function) in col[1:]:
actions[-1][-1] += ' / <a href="%s/admin/websearch/websearchadmin.py/%s?colID=%s&amp;ln=%s&amp;fldID=%s#6.5">%s</a>' % (CFG_SITE_URL, function, colID, ln, fldID, str)
output += tupletotable(header=header, tuple=actions)
else:
output += """No search fields exists for this collection"""
output += content
body = [output]
if callback:
return perform_editcollection(colID, ln, "perform_showsearchfields", addadminbox(subtitle, body))
else:
return addadminbox(subtitle, body)
def perform_showsearchoptions(colID, ln, callback='yes', content='', confirm=-1):
"""show the sort and search options of this collection.."""
colID = int(colID)
col_dict = dict(get_def_name('', "collection"))
fld_dict = dict(get_def_name('', "field"))
fld_type = get_sort_nametypes()
subtitle = """<a name="7">7. Modify search options for collection '%s'</a>&nbsp;&nbsp;&nbsp;<small>[<a title="See guide" href="%s/help/admin/websearch-admin-guide#3.7">?</a>]</small>""" % (col_dict[colID], CFG_SITE_URL)
output = """<dl>
<dt>Field actions (not related to this collection)</dt>
<dd>Go to the BibIndex interface to modify the available search options</dd>
<dt>Collection specific actions
<dd><a href="addexistingfield?colID=%s&amp;ln=%s&amp;fmeth=seo#7.2">Add search option to collection</a></dd>
<dd><a href="rearrangefield?colID=%s&amp;ln=%s&amp;fmeth=seo#7.2">Order search options alphabetically</a></dd>
</dl>
""" % (colID, ln, colID, ln)
header = ['', 'Search option', 'Actions']
actions = []
sitelangs = get_languages()
lang = dict(sitelangs)
fld_type_list = fld_type.items()
fld_distinct = run_sql("SELECT distinct(id_field) FROM collection_field_fieldvalue WHERE type='seo' AND id_collection=%s ORDER by score desc", (colID, ))
if len(fld_distinct) > 0:
i = 0
for (id) in fld_distinct:
fldID = id[0]
col_fld = get_col_fld(colID, 'seo', fldID)
move = ""
if i != 0:
move += """<a href="%s/admin/websearch/websearchadmin.py/switchfldscore?colID=%s&amp;ln=%s&amp;id_1=%s&amp;id_2=%s&amp;fmeth=seo&amp;rand=%s#7"><img border="0" src="%s/img/smallup.gif" title="Move up"></a>""" % (CFG_SITE_URL, colID, ln, fldID, fld_distinct[i - 1][0], random.randint(0, 1000), CFG_SITE_URL)
else:
move += "&nbsp;&nbsp;&nbsp;"
i += 1
if i != len(fld_distinct):
move += '<a href="%s/admin/websearch/websearchadmin.py/switchfldscore?colID=%s&amp;ln=%s&amp;id_1=%s&amp;id_2=%s&amp;fmeth=seo&amp;rand=%s#7"><img border="0" src="%s/img/smalldown.gif" title="Move down"></a>' % (CFG_SITE_URL, colID, ln, fldID, fld_distinct[i][0], random.randint(0, 1000), CFG_SITE_URL)
actions.append([move, "%s" % fld_dict[fldID]])
for col in [(('Modify values', 'modifyfield'), ('Remove search option', 'removefield'),)]:
actions[-1].append('<a href="%s/admin/websearch/websearchadmin.py/%s?colID=%s&amp;ln=%s&amp;fldID=%s#7.3">%s</a>' % (CFG_SITE_URL, col[0][1], colID, ln, fldID, col[0][0]))
for (str, function) in col[1:]:
actions[-1][-1] += ' / <a href="%s/admin/websearch/websearchadmin.py/%s?colID=%s&amp;ln=%s&amp;fldID=%s&amp;fmeth=seo#7.3">%s</a>' % (CFG_SITE_URL, function, colID, ln, fldID, str)
output += tupletotable(header=header, tuple=actions)
else:
output += """No search options exists for this collection"""
output += content
body = [output]
if callback:
return perform_editcollection(colID, ln, "perform_showsearchoptions", addadminbox(subtitle, body))
else:
return addadminbox(subtitle, body)
def perform_modifyfield(colID, fldID, fldvID='', ln=CFG_SITE_LANG, content='', callback='yes', confirm=0):
"""Modify the fieldvalues for a field"""
colID = int(colID)
col_dict = dict(get_def_name('', "collection"))
fld_dict = dict(get_def_name('', "field"))
fld_type = get_sort_nametypes()
fldID = int(fldID)
subtitle = """<a name="7.3">Modify values for field '%s'</a>""" % (fld_dict[fldID])
output = """<dl>
<dt>Value specific actions
<dd><a href="addexistingfieldvalue?colID=%s&amp;ln=%s&amp;fldID=%s#7.4">Add existing value to search option</a></dd>
<dd><a href="addnewfieldvalue?colID=%s&amp;ln=%s&amp;fldID=%s#7.4">Add new value to search option</a></dd>
<dd><a href="rearrangefieldvalue?colID=%s&amp;ln=%s&amp;fldID=%s#7.4">Order values alphabetically</a></dd>
</dl>
""" % (colID, ln, fldID, colID, ln, fldID, colID, ln, fldID)
header = ['', 'Value name', 'Actions']
actions = []
sitelangs = get_languages()
lang = dict(sitelangs)
fld_type_list = fld_type.items()
col_fld = list(get_col_fld(colID, 'seo', fldID))
if len(col_fld) == 1 and col_fld[0][1] is None:
output += """<b><span class="info">No values added for this search option yet</span></b>"""
else:
j = 0
for (fldID, fldvID, stype, score, score_fieldvalue) in col_fld:
fieldvalue = get_fld_value(fldvID)
move = ""
if j != 0:
move += """<a href="%s/admin/websearch/websearchadmin.py/switchfldvaluescore?colID=%s&amp;ln=%s&amp;id_1=%s&amp;id_fldvalue_1=%s&amp;id_fldvalue_2=%s&amp;rand=%s#7.3"><img border="0" src="%s/img/smallup.gif" title="Move up"></a>""" % (CFG_SITE_URL, colID, ln, fldID, fldvID, col_fld[j - 1][1], random.randint(0, 1000), CFG_SITE_URL)
else:
move += "&nbsp;&nbsp;&nbsp;"
j += 1
if j != len(col_fld):
move += """<a href="%s/admin/websearch/websearchadmin.py/switchfldvaluescore?colID=%s&amp;ln=%s&amp;id_1=%s&amp;id_fldvalue_1=%s&amp;id_fldvalue_2=%s&amp;rand=%s#7.3"><img border="0" src="%s/img/smalldown.gif" title="Move down"></a>""" % (CFG_SITE_URL, colID, ln, fldID, fldvID, col_fld[j][1], random.randint(0, 1000), CFG_SITE_URL)
if fieldvalue[0][1] != fieldvalue[0][2] and fldvID is not None:
actions.append([move, "%s - %s" % (fieldvalue[0][1], fieldvalue[0][2])])
elif fldvID is not None:
actions.append([move, "%s" % fieldvalue[0][1]])
move = ''
for col in [(('Modify value', 'modifyfieldvalue'), ('Remove value', 'removefieldvalue'),)]:
actions[-1].append('<a href="%s/admin/websearch/websearchadmin.py/%s?colID=%s&amp;ln=%s&amp;fldID=%s&amp;fldvID=%s&amp;fmeth=seo#7.4">%s</a>' % (CFG_SITE_URL, col[0][1], colID, ln, fldID, fldvID, col[0][0]))
for (str, function) in col[1:]:
actions[-1][-1] += ' / <a href="%s/admin/websearch/websearchadmin.py/%s?colID=%s&amp;ln=%s&amp;fldID=%s&amp;fldvID=%s#7.4">%s</a>' % (CFG_SITE_URL, function, colID, ln, fldID, fldvID, str)
output += tupletotable(header=header, tuple=actions)
output += content
body = [output]
output = "<br />" + addadminbox(subtitle, body)
if len(col_fld) == 0:
output = content
return perform_showsearchoptions(colID, ln, content=output)
def perform_showoutputformats(colID, ln, callback='yes', content='', confirm=-1):
"""shows the outputformats of the current collection
colID - the collection id."""
colID = int(colID)
col_dict = dict(get_def_name('', "collection"))
subtitle = """<a name="10">10. Modify output formats for collection '%s'</a>&nbsp;&nbsp;&nbsp;<small>[<a title="See guide" href="%s/help/admin/websearch-admin-guide#3.10">?</a>]</small>""" % (col_dict[colID], CFG_SITE_URL)
output = """
<dl>
<dt>Output format actions (not specific to the chosen collection)
<dd>Go to the BibFormat interface to modify</dd>
<dt>Collection specific actions
<dd><a href="addexistingoutputformat?colID=%s&amp;ln=%s#10.2">Add existing output format to collection</a></dd>
</dl>
""" % (colID, ln)
header = ['', 'Code', 'Output format', 'Actions']
actions = []
col_fmt = get_col_fmt(colID)
fmt_dict = dict(get_def_name('', "format"))
i = 0
if len(col_fmt) > 0:
for (id_format, colID_fld, code, score) in col_fmt:
move = """<table cellspacing="1" cellpadding="0" border="0"><tr><td>"""
if i != 0:
move += """<a href="%s/admin/websearch/websearchadmin.py/switchfmtscore?colID=%s&amp;ln=%s&amp;type=format&amp;id_1=%s&amp;id_2=%s&amp;rand=%s#10"><img border="0" src="%s/img/smallup.gif" title="Move format up"></a>""" % (CFG_SITE_URL, colID, ln, id_format, col_fmt[i - 1][0], random.randint(0, 1000), CFG_SITE_URL)
else:
move += "&nbsp;&nbsp;&nbsp;"
move += "</td><td>"
i += 1
if i != len(col_fmt):
move += '<a href="%s/admin/websearch/websearchadmin.py/switchfmtscore?colID=%s&amp;ln=%s&amp;type=format&amp;id_1=%s&amp;id_2=%s&amp;rand=%s#10"><img border="0" src="%s/img/smalldown.gif" title="Move format down"></a>' % (CFG_SITE_URL, colID, ln, id_format, col_fmt[i][0], random.randint(0, 1000), CFG_SITE_URL)
move += """</td></tr></table>"""
actions.append([move, code, fmt_dict[int(id_format)]])
for col in [(('Remove', 'removeoutputformat'),)]:
actions[-1].append('<a href="%s/admin/websearch/websearchadmin.py/%s?colID=%s&amp;ln=%s&amp;fmtID=%s#10">%s</a>' % (CFG_SITE_URL, col[0][1], colID, ln, id_format, col[0][0]))
for (str, function) in col[1:]:
actions[-1][-1] += ' / <a href="%s/admin/websearch/websearchadmin.py/%s?colID=%s&amp;ln=%s&amp;fmtID=%s#10">%s</a>' % (CFG_SITE_URL, function, colID, ln, id_format, str)
output += tupletotable(header=header, tuple=actions)
else:
output += """No output formats exists for this collection"""
output += content
body = [output]
if callback:
return perform_editcollection(colID, ln, "perform_showoutputformats", addadminbox(subtitle, body))
else:
return addadminbox(subtitle, body)
def external_collections_build_select(colID, external_collection):
output = '<select name="state" class="admin_w200">'
if external_collection.parser:
max_state = 4
else:
max_state = 2
num_selected = external_collection_get_state(external_collection, colID)
for num in range(max_state):
state_name = CFG_EXTERNAL_COLLECTION_STATES_NAME[num]
if num == num_selected:
selected = ' selected'
else:
selected = ''
output += '<option value="%(num)d"%(selected)s>%(state_name)s</option>' % {'num': num, 'selected': selected, 'state_name': state_name}
output += '</select>\n'
return output
def perform_manage_external_collections(colID, ln, callback='yes', content='', confirm=-1):
"""Show the interface to configure external collections to the user."""
colID = int(colID)
subtitle = """<a name="11">11. Configuration of related external collections</a>
&nbsp;&nbsp;&nbsp;<small>[<a title="See guide" href="%s/help/admin/websearch-admin-guide#3.11">?</a>]</small>""" % CFG_SITE_URL
output = '<form action="update_external_collections" method="POST"><input type="hidden" name="colID" value="%(colID)d">' % {'colID': colID}
table_header = ['External collection', 'Mode', 'Apply also to daughter collections?']
table_content = []
external_collections = external_collection_sort_engine_by_name(external_collections_dictionary.values())
for external_collection in external_collections:
collection_name = external_collection.name
select = external_collections_build_select(colID, external_collection)
recurse = '<input type=checkbox name="recurse" value="%(collection_name)s">' % {'collection_name': collection_name}
table_content.append([collection_name, select, recurse])
output += tupletotable(header=table_header, tuple=table_content)
output += '<input class="adminbutton" type="submit" value="Modify"/>'
output += '</form>'
return addadminbox(subtitle, [output])
def perform_update_external_collections(colID, ln, state_list, recurse_list):
colID = int(colID)
changes = []
output = ""
if not state_list:
return 'Warning : No state found.<br />' + perform_manage_external_collections(colID, ln)
external_collections = external_collection_sort_engine_by_name(external_collections_dictionary.values())
if len(external_collections) != len(state_list):
return 'Warning : Size of state_list different from external_collections!<br />' + perform_manage_external_collections(colID, ln)
for (external_collection, state) in zip(external_collections, state_list):
state = int(state)
collection_name = external_collection.name
recurse = recurse_list and collection_name in recurse_list
oldstate = external_collection_get_state(external_collection, colID)
if oldstate != state or recurse:
changes += external_collection_get_update_state_list(external_collection, colID, state, recurse)
external_collection_apply_changes(changes)
return output + '<br /><br />' + perform_manage_external_collections(colID, ln)
def perform_showdetailedrecordoptions(colID, ln, callback='yes', content='', confirm=-1):
"""Show the interface to configure detailed record page to the user."""
colID = int(colID)
subtitle = """<a name="12">12. Configuration of detailed record page</a>
&nbsp;&nbsp;&nbsp;<small>[<a title="See guide" href="%s/help/admin/websearch-admin-guide#3.12">?</a>]</small>""" % CFG_SITE_URL
output = '''<form action="update_detailed_record_options" method="post">
<table><tr><td>
<input type="hidden" name="colID" value="%(colID)d">
<dl>
<dt><b>Show tabs:</b></dt>
<dd>
''' % {'colID': colID}
for (tab_id, tab_info) in get_detailed_page_tabs(colID).iteritems():
if tab_id == 'comments' and \
not CFG_WEBCOMMENT_ALLOW_REVIEWS and \
not CFG_WEBCOMMENT_ALLOW_COMMENTS:
continue
check = ''
output += '''<input type="checkbox" id="id%(tabid)s" name="tabs" value="%(tabid)s" %(check)s />
<label for="id%(tabid)s">&nbsp;%(label)s</label><br />
''' % {'tabid':tab_id,
'check':((tab_info['visible'] and 'checked="checked"') or ''),
'label':tab_info['label']}
output += '</dd></dl></td><td>'
output += '</td></tr></table><input class="adminbutton" type="submit" value="Modify"/>'
output += '''<input type="checkbox" id="recurse" name="recurse" value="1" />
<label for="recurse">&nbsp;Also apply to subcollections</label>'''
output += '</form>'
return addadminbox(subtitle, [output])
def perform_update_detailed_record_options(colID, ln, tabs, recurse):
"""Update the preferences for the tab to show/hide in the detailed record page."""
colID = int(colID)
changes = []
output = '<b><span class="info">Operation successfully completed.</span></b>'
if '' in tabs:
tabs.remove('')
tabs.append('metadata')
def update_settings(colID, tabs, recurse):
run_sql("DELETE FROM collectiondetailedrecordpagetabs WHERE id_collection=%s", (colID, ))
run_sql("REPLACE INTO collectiondetailedrecordpagetabs" + \
" SET id_collection=%s, tabs=%s", (colID, ';'.join(tabs)))
## for enabled_tab in tabs:
## run_sql("REPLACE INTO collectiondetailedrecordpagetabs" + \
## " SET id_collection='%s', tabs='%s'" % (colID, ';'.join(tabs)))
if recurse:
for descendant_id in get_collection_descendants(colID):
update_settings(descendant_id, tabs, recurse)
update_settings(colID, tabs, recurse)
## for colID in colIDs:
## run_sql("DELETE FROM collectiondetailedrecordpagetabs WHERE id_collection='%s'" % colID)
## for enabled_tab in tabs:
## run_sql("REPLACE INTO collectiondetailedrecordpagetabs" + \
## " SET id_collection='%s', tabs='%s'" % (colID, ';'.join(tabs)))
#if callback:
return perform_editcollection(colID, ln, "perform_modifytranslations",
'<br /><br />' + output + '<br /><br />' + \
perform_showdetailedrecordoptions(colID, ln))
#else:
# return addadminbox(subtitle, body)
#return output + '<br /><br />' + perform_showdetailedrecordoptions(colID, ln)
def perform_addexistingoutputformat(colID, ln, fmtID=-1, callback='yes', confirm=-1):
"""form to add an existing output format to a collection.
colID - the collection the format should be added to
fmtID - the format to add."""
subtitle = """<a name="10.2"></a>Add existing output format to collection"""
output = ""
if fmtID not in [-1, "-1"] and confirm in [1, "1"]:
ares = add_col_fmt(colID, fmtID)
colID = int(colID)
res = get_def_name('', "format")
fmt_dict = dict(res)
col_dict = dict(get_def_name('', "collection"))
col_fmt = get_col_fmt(colID)
col_fmt = dict(map(lambda x: (x[0], x[2]), col_fmt))
if len(res) > 0:
text = """
<span class="adminlabel">Output format</span>
<select name="fmtID" class="admin_w200">
<option value="-1">- Select output format -</option>
"""
for (id, name) in res:
if not col_fmt.has_key(id):
text += """<option value="%s" %s>%s</option>
""" % (id, id == int(fmtID) and 'selected="selected"' or '', name)
text += """</select><br />
"""
output += createhiddenform(action="addexistingoutputformat#10.2",
text=text,
button="Add",
colID=colID,
ln=ln,
confirm=1)
else:
output = """No existing output formats to add, please create a new one."""
if fmtID not in [-1, "-1"] and confirm in [1, "1"]:
output += write_outcome(ares)
elif fmtID in [-1, "-1"] and confirm not in [-1, "-1"]:
output += """<b><span class="info">Please select output format.</span></b>"""
body = [output]
output = "<br />" + addadminbox(subtitle, body)
return perform_showoutputformats(colID, ln, content=output)
def perform_deleteoutputformat(colID, ln, fmtID=-1, callback='yes', confirm=-1):
"""form to delete an output format not in use.
colID - the collection id of the current collection.
fmtID - the format id to delete."""
subtitle = """<a name="10.3"></a>Delete an unused output format"""
output = """
<dl>
<dd>Deleting an output format will also delete the translations associated.</dd>
</dl>
"""
colID = int(colID)
if fmtID not in [-1, "-1"] and confirm in [1, "1"]:
fmt_dict = dict(get_def_name('', "format"))
old_colNAME = fmt_dict[int(fmtID)]
ares = delete_fmt(int(fmtID))
res = get_def_name('', "format")
fmt_dict = dict(res)
col_dict = dict(get_def_name('', "collection"))
col_fmt = get_col_fmt()
col_fmt = dict(map(lambda x: (x[0], x[2]), col_fmt))
if len(res) > 0:
text = """
<span class="adminlabel">Output format</span>
<select name="fmtID" class="admin_w200">
"""
text += """<option value="-1">- Select output format -"""
for (id, name) in res:
if not col_fmt.has_key(id):
text += """<option value="%s" %s>%s""" % (id, id == int(fmtID) and 'selected="selected"' or '', name)
text += "</option>"
text += """</select><br />"""
output += createhiddenform(action="deleteoutputformat#10.3",
text=text,
button="Delete",
colID=colID,
ln=ln,
confirm=0)
if fmtID not in [-1, "-1"]:
fmtID = int(fmtID)
if confirm in [0, "0"]:
text = """<b>Do you want to delete the output format '%s'.</b>
""" % fmt_dict[fmtID]
output += createhiddenform(action="deleteoutputformat#10.3",
text=text,
button="Confirm",
colID=colID,
fmtID=fmtID,
ln=ln,
confirm=1)
elif confirm in [1, "1"]:
output += write_outcome(ares)
elif confirm not in [-1, "-1"]:
output += """<b><span class="info">Choose a output format to delete.</span></b>
"""
body = [output]
output = "<br />" + addadminbox(subtitle, body)
return perform_showoutputformats(colID, ln, content=output)
def perform_removeoutputformat(colID, ln, fmtID='', callback='yes', confirm=0):
"""form to remove an output format from a collection.
colID - the collection id of the current collection.
fmtID - the format id.
"""
subtitle = """<a name="10.5"></a>Remove output format"""
output = ""
col_dict = dict(get_def_name('', "collection"))
fmt_dict = dict(get_def_name('', "format"))
if colID and fmtID:
colID = int(colID)
fmtID = int(fmtID)
if confirm in ["0", 0]:
text = """Do you want to remove the output format '%s' from the collection '%s'.""" % (fmt_dict[fmtID], col_dict[colID])
output += createhiddenform(action="removeoutputformat#10.5",
text=text,
button="Confirm",
colID=colID,
fmtID=fmtID,
confirm=1)
elif confirm in ["1", 1]:
res = remove_fmt(colID, fmtID)
output += write_outcome(res)
body = [output]
output = "<br />" + addadminbox(subtitle, body)
return perform_showoutputformats(colID, ln, content=output)
def perform_index(colID=1, ln=CFG_SITE_LANG, mtype='', content='', confirm=0):
"""The index method, calling methods to show the collection tree, create new collections and add collections to tree.
"""
subtitle = "Overview"
colID = int(colID)
col_dict = dict(get_def_name('', "collection"))
output = ""
fin_output = ""
if not col_dict.has_key(1):
res = add_col(CFG_SITE_NAME, '')
if res:
fin_output += """<b><span class="info">Created root collection.</span></b><br />"""
else:
return "Cannot create root collection, please check database."
if CFG_SITE_NAME != run_sql("SELECT name from collection WHERE id=1")[0][0]:
res = run_sql("update collection set name=%s where id=1", (CFG_SITE_NAME, ))
if res:
fin_output += """<b><span class="info">The name of the root collection has been modified to be the same as the %(sitename)s installation name given prior to installing %(sitename)s.</span><b><br />""" % {'sitename' : CFG_SITE_NAME}
else:
return "Error renaming root collection."
fin_output += """
<table>
<tr>
<td>0.&nbsp;<small><a href="%s/admin/websearch/websearchadmin.py?colID=%s&amp;ln=%s&amp;mtype=perform_showall">Show all</a></small></td>
<td>1.&nbsp;<small><a href="%s/admin/websearch/websearchadmin.py?colID=%s&amp;ln=%s&amp;mtype=perform_addcollection">Create new collection</a></small></td>
<td>2.&nbsp;<small><a href="%s/admin/websearch/websearchadmin.py?colID=%s&amp;ln=%s&amp;mtype=perform_addcollectiontotree">Attach collection to tree</a></small></td>
<td>3.&nbsp;<small><a href="%s/admin/websearch/websearchadmin.py?colID=%s&amp;ln=%s&amp;mtype=perform_modifycollectiontree">Modify collection tree</a></small></td>
<td>4.&nbsp;<small><a href="%s/admin/websearch/websearchadmin.py?colID=%s&amp;ln=%s&amp;mtype=perform_checkwebcollstatus">Webcoll Status</a></small></td>
</tr><tr>
<td>5.&nbsp;<small><a href="%s/admin/websearch/websearchadmin.py?colID=%s&amp;ln=%s&amp;mtype=perform_checkcollectionstatus">Collection Status</a></small></td>
<td>6.&nbsp;<small><a href="%s/admin/websearch/websearchadmin.py?colID=%s&amp;ln=%s&amp;mtype=perform_checkexternalcollections">Check external collections</a></small></td>
<td>7.&nbsp;<small><a href="%s/help/admin/websearch-admin-guide?ln=%s">Guide</a></small></td>
</tr>
</table>
""" % (CFG_SITE_URL, colID, ln, CFG_SITE_URL, colID, ln, CFG_SITE_URL, colID, ln, CFG_SITE_URL, colID, ln, CFG_SITE_URL, colID, ln, CFG_SITE_URL, colID, ln, CFG_SITE_URL, colID, ln, CFG_SITE_URL, ln)
if mtype == "":
fin_output += """<br /><br /><b><span class="info">To manage the collections, select an item from the menu.</span><b><br />"""
if mtype == "perform_addcollection" and content:
fin_output += content
elif mtype == "perform_addcollection" or mtype == "perform_showall":
fin_output += perform_addcollection(colID=colID, ln=ln, callback='')
fin_output += "<br />"
if mtype == "perform_addcollectiontotree" and content:
fin_output += content
elif mtype == "perform_addcollectiontotree" or mtype == "perform_showall":
fin_output += perform_addcollectiontotree(colID=colID, ln=ln, callback='')
fin_output += "<br />"
if mtype == "perform_modifycollectiontree" and content:
fin_output += content
elif mtype == "perform_modifycollectiontree" or mtype == "perform_showall":
fin_output += perform_modifycollectiontree(colID=colID, ln=ln, callback='')
fin_output += "<br />"
if mtype == "perform_checkwebcollstatus" and content:
fin_output += content
elif mtype == "perform_checkwebcollstatus" or mtype == "perform_showall":
fin_output += perform_checkwebcollstatus(colID, ln, callback='')
if mtype == "perform_checkcollectionstatus" and content:
fin_output += content
elif mtype == "perform_checkcollectionstatus" or mtype == "perform_showall":
fin_output += perform_checkcollectionstatus(colID, ln, callback='')
if mtype == "perform_checkexternalcollections" and content:
fin_output += content
elif mtype == "perform_checkexternalcollections" or mtype == "perform_showall":
fin_output += perform_checkexternalcollections(colID, ln, callback='')
body = [fin_output]
body = [fin_output]
return addadminbox('<b>Menu</b>', body)
def show_coll_not_in_tree(colID, ln, col_dict):
"""Returns collections not in tree"""
tree = get_col_tree(colID)
in_tree = {}
output = "These collections are not in the tree, and should be added:<br />"
for (id, up, down, dad, reltype) in tree:
in_tree[id] = 1
in_tree[dad] = 1
res = run_sql("SELECT id from collection")
if len(res) != len(in_tree):
for id in res:
if not in_tree.has_key(id[0]):
output += """<a href="%s/admin/websearch/websearchadmin.py/editcollection?colID=%s&amp;ln=%s" title="Edit collection">%s</a> ,
""" % (CFG_SITE_URL, id[0], ln, col_dict[id[0]])
output += "<br /><br />"
else:
output = ""
return output
def create_colltree(tree, col_dict, colID, ln, move_from='', move_to='', rtype='', edit=''):
"""Creates the presentation of the collection tree, with the buttons for modifying it.
tree - the tree to present, from get_tree()
col_dict - the name of the collections in a dictionary
colID - the collection id to start with
move_from - if a collection to be moved has been chosen
move_to - the collection which should be set as father of move_from
rtype - the type of the tree, regular or virtual
edit - if the method should output the edit buttons."""
if move_from:
move_from_rtype = move_from[0]
move_from_id = int(move_from[1:len(move_from)])
tree_from = get_col_tree(colID, move_from_rtype)
tree_to = get_col_tree(colID, rtype)
tables = 0
tstack = []
i = 0
text = """
<table border ="0" cellspacing="0" cellpadding="0">"""
for i in range(0, len(tree)):
id_son = tree[i][0]
up = tree[i][1]
down = tree[i][2]
dad = tree[i][3]
reltype = tree[i][4]
tmove_from = ""
j = i
while j > 0:
j = j - 1
try:
if tstack[j][1] == dad:
table = tstack[j][2]
for k in range(0, tables - table):
tables = tables - 1
text += """</table></td></tr>
"""
break
except StandardError, e:
pass
text += """<tr><td>
"""
if i > 0 and tree[i][1] == 0:
tables = tables + 1
text += """</td><td></td><td></td><td></td><td><table border="0" cellspacing="0" cellpadding="0"><tr><td>
"""
if i == 0:
tstack.append((id_son, dad, 1))
else:
tstack.append((id_son, dad, tables))
if up == 1 and edit:
text += """<a href="%s/admin/websearch/websearchadmin.py/modifycollectiontree?colID=%s&amp;ln=%s&amp;move_up=%s&amp;rtype=%s#%s"><img border="0" src="%s/img/smallup.gif" title="Move collection up"></a>""" % (CFG_SITE_URL, colID, ln, i, rtype, tree[i][0], CFG_SITE_URL)
else:
text += """&nbsp;"""
text += "</td><td>"
if down == 1 and edit:
text += """<a href="%s/admin/websearch/websearchadmin.py/modifycollectiontree?colID=%s&amp;ln=%s&amp;move_down=%s&amp;rtype=%s#%s"><img border="0" src="%s/img/smalldown.gif" title="Move collection down"></a>""" % (CFG_SITE_URL, colID, ln, i, rtype, tree[i][0], CFG_SITE_URL)
else:
text += """&nbsp;"""
text += "</td><td>"
if edit:
if move_from and move_to:
tmove_from = move_from
move_from = ''
if not (move_from == "" and i == 0) and not (move_from != "" and int(move_from[1:len(move_from)]) == i and rtype == move_from[0]):
check = "true"
if move_from:
#if tree_from[move_from_id][0] == tree_to[i][0] or not check_col(tree_to[i][0], tree_from[move_from_id][0]):
# check = ''
#elif not check_col(tree_to[i][0], tree_from[move_from_id][0]):
# check = ''
#if not check and (tree_to[i][0] == 1 and tree_from[move_from_id][3] == tree_to[i][0] and move_from_rtype != rtype):
# check = "true"
if check:
text += """<a href="%s/admin/websearch/websearchadmin.py/modifycollectiontree?colID=%s&amp;ln=%s&amp;move_from=%s&amp;move_to=%s%s&amp;rtype=%s#tree"><img border="0" src="%s/img/move_to.gif" title="Move '%s' to '%s'"></a>
""" % (CFG_SITE_URL, colID, ln, move_from, rtype, i, rtype, CFG_SITE_URL, col_dict[tree_from[int(move_from[1:len(move_from)])][0]], col_dict[tree_to[i][0]])
else:
try:
text += """<a href="%s/admin/websearch/websearchadmin.py/modifycollectiontree?colID=%s&amp;ln=%s&amp;move_from=%s%s&amp;rtype=%s#%s"><img border="0" src="%s/img/move_from.gif" title="Move '%s' from this location."></a>""" % (CFG_SITE_URL, colID, ln, rtype, i, rtype, tree[i][0], CFG_SITE_URL, col_dict[tree[i][0]])
except KeyError:
pass
else:
text += """<img border="0" src="%s/img/white_field.gif">
""" % CFG_SITE_URL
else:
text += """<img border="0" src="%s/img/white_field.gif">
""" % CFG_SITE_URL
text += """
</td>
<td>"""
if edit:
try:
text += """<a href="%s/admin/websearch/websearchadmin.py/modifycollectiontree?colID=%s&amp;ln=%s&amp;delete=%s&amp;rtype=%s#%s"><img border="0" src="%s/img/iconcross.gif" title="Remove colletion from tree"></a>""" % (CFG_SITE_URL, colID, ln, i, rtype, tree[i][0], CFG_SITE_URL)
except KeyError:
pass
elif i != 0:
text += """<img border="0" src="%s/img/white_field.gif">
""" % CFG_SITE_URL
text += """</td><td>
"""
if tmove_from:
move_from = tmove_from
try:
text += """<a name="%s"></a>%s<a href="%s/admin/websearch/websearchadmin.py/editcollection?colID=%s&amp;ln=%s" title="Edit collection">%s</a>%s%s%s""" % (tree[i][0], (reltype=="v" and '<i>' or ''), CFG_SITE_URL, tree[i][0], ln, col_dict[id_son], (move_to=="%s%s" %(rtype, i) and '&nbsp;<img border="0" src="%s/img/move_to.gif">' % CFG_SITE_URL or ''), (move_from=="%s%s" % (rtype, i) and '&nbsp;<img border="0" src="%s/img/move_from.gif">' % CFG_SITE_URL or ''), (reltype=="v" and '</i>' or ''))
except KeyError:
pass
text += """</td></tr>
"""
while tables > 0:
text += """</table></td></tr>
"""
tables = tables - 1
text += """</table>"""
return text
def perform_deletecollection(colID, ln, confirm=-1, callback='yes'):
"""form to delete a collection
colID - id of collection
"""
subtitle =''
output = """
<span class="warning">
<strong>
<dl>
<dt>WARNING:</dt>
<dd>When deleting a collection, you also deletes all data related to the collection like translations, relations to other collections and information about which rank methods to use.
<br />For more information, please go to the <a title="See guide" href="%s/help/admin/websearch-admin-guide">WebSearch guide</a> and read the section regarding deleting a collection.</dd>
</dl>
</strong>
</span>
""" % CFG_SITE_URL
col_dict = dict(get_def_name('', "collection"))
if colID != 1 and colID and col_dict.has_key(int(colID)):
colID = int(colID)
subtitle = """<a name="4">4. Delete collection '%s'</a>&nbsp;&nbsp;&nbsp;<small>[<a title="See guide" href="%s/help/admin/websearch-admin-guide#3.4">?</a>]</small>""" % (col_dict[colID], CFG_SITE_URL)
res = run_sql("SELECT id_dad,id_son,type,score from collection_collection WHERE id_dad=%s", (colID, ))
res2 = run_sql("SELECT id_dad,id_son,type,score from collection_collection WHERE id_son=%s", (colID, ))
if not res and not res2:
if confirm in ["-1", -1]:
text = """Do you want to delete this collection."""
output += createhiddenform(action="deletecollection#4",
text=text,
colID=colID,
button="Delete",
confirm=0)
elif confirm in ["0", 0]:
text = """Are you sure you want to delete this collection."""
output += createhiddenform(action="deletecollection#4",
text=text,
colID=colID,
button="Confirm",
confirm=1)
elif confirm in ["1", 1]:
result = delete_col(colID)
if not result:
raise Exception
else:
output = """<b><span class="info">Can not delete a collection that is a part of the collection tree, remove collection from the tree and try again.</span></b>"""
else:
subtitle = """4. Delete collection"""
output = """<b><span class="info">Not possible to delete the root collection</span></b>"""
body = [output]
if callback:
return perform_editcollection(colID, ln, "perform_deletecollection", addadminbox(subtitle, body))
else:
return addadminbox(subtitle, body)
def perform_editcollection(colID=1, ln=CFG_SITE_LANG, mtype='', content=''):
"""interface to modify a collection. this method is calling other methods which again is calling this and sending back the output of the method.
if callback, the method will call perform_editcollection, if not, it will just return its output.
colID - id of the collection
mtype - the method that called this method.
content - the output from that method."""
colID = int(colID)
col_dict = dict(get_def_name('', "collection"))
if not col_dict.has_key(colID):
return """<b><span class="info">Collection deleted.</span></b>
"""
fin_output = """
<table>
<tr>
<td><b>Menu</b></td>
</tr>
<tr>
<td>0.&nbsp;<small><a href="editcollection?colID=%s&amp;ln=%s">Show all</a></small></td>
<td>1.&nbsp;<small><a href="editcollection?colID=%s&amp;ln=%s&amp;mtype=perform_modifydbquery">Modify collection query</a></small></td>
<td>2.&nbsp;<small><a href="editcollection?colID=%s&amp;ln=%s&amp;mtype=perform_modifyrestricted">Modify access restrictions</a></small></td>
<td>3.&nbsp;<small><a href="editcollection?colID=%s&amp;ln=%s&amp;mtype=perform_modifytranslations">Modify translations</a></small></td>
<td>4.&nbsp;<small><a href="editcollection?colID=%s&amp;ln=%s&amp;mtype=perform_deletecollection">Delete collection</a></small></td>
</tr><tr>
<td>5.&nbsp;<small><a href="editcollection?colID=%s&amp;ln=%s&amp;mtype=perform_showportalboxes">Modify portalboxes</a></small></td>
<td>6.&nbsp;<small><a href="editcollection?colID=%s&amp;ln=%s&amp;mtype=perform_showsearchfields#6">Modify search fields</a></small></td>
<td>7.&nbsp;<small><a href="editcollection?colID=%s&amp;ln=%s&amp;mtype=perform_showsearchoptions#7">Modify search options</a></small></td>
<td>8.&nbsp;<small><a href="editcollection?colID=%s&amp;ln=%s&amp;mtype=perform_showsortoptions#8">Modify sort options</a></small></td>
<td>9.&nbsp;<small><a href="editcollection?colID=%s&amp;ln=%s&amp;mtype=perform_modifyrankmethods#9">Modify rank options</a></small></td>
</tr><tr>
<td>10.&nbsp;<small><a href="editcollection?colID=%s&amp;ln=%s&amp;mtype=perform_showoutputformats#10">Modify output formats</a></small></td>
<td>11.&nbsp;<small><a href="editcollection?colID=%s&amp;ln=%s&amp;mtype=perform_manage_external_collections#11">Configuration of related external collections</a></small></td>
<td>12.&nbsp;<small><a href="editcollection?colID=%s&amp;ln=%s&amp;mtype=perform_showdetailedrecordoptions#12">Detailed record page options</a></small></td>
</tr>
</table>
""" % (colID, ln, colID, ln, colID, ln, colID, ln, colID, ln, colID, ln, colID, ln, colID, ln, colID, ln, colID, ln, colID, ln, colID, ln, colID, ln)
if mtype == "perform_modifydbquery" and content:
fin_output += content
elif mtype == "perform_modifydbquery" or not mtype:
fin_output += perform_modifydbquery(colID, ln, callback='')
if mtype == "perform_modifyrestricted" and content:
fin_output += content
elif mtype == "perform_modifyrestricted" or not mtype:
fin_output += perform_modifyrestricted(colID, ln, callback='')
if mtype == "perform_modifytranslations" and content:
fin_output += content
elif mtype == "perform_modifytranslations" or not mtype:
fin_output += perform_modifytranslations(colID, ln, callback='')
if mtype == "perform_deletecollection" and content:
fin_output += content
elif mtype == "perform_deletecollection" or not mtype:
fin_output += perform_deletecollection(colID, ln, callback='')
if mtype == "perform_showportalboxes" and content:
fin_output += content
elif mtype == "perform_showportalboxes" or not mtype:
fin_output += perform_showportalboxes(colID, ln, callback='')
if mtype == "perform_showsearchfields" and content:
fin_output += content
elif mtype == "perform_showsearchfields" or not mtype:
fin_output += perform_showsearchfields(colID, ln, callback='')
if mtype == "perform_showsearchoptions" and content:
fin_output += content
elif mtype == "perform_showsearchoptions" or not mtype:
fin_output += perform_showsearchoptions(colID, ln, callback='')
if mtype == "perform_showsortoptions" and content:
fin_output += content
elif mtype == "perform_showsortoptions" or not mtype:
fin_output += perform_showsortoptions(colID, ln, callback='')
if mtype == "perform_modifyrankmethods" and content:
fin_output += content
elif mtype == "perform_modifyrankmethods" or not mtype:
fin_output += perform_modifyrankmethods(colID, ln, callback='')
if mtype == "perform_showoutputformats" and content:
fin_output += content
elif mtype == "perform_showoutputformats" or not mtype:
fin_output += perform_showoutputformats(colID, ln, callback='')
if mtype == "perform_manage_external_collections" and content:
fin_output += content
elif mtype == "perform_manage_external_collections" or not mtype:
fin_output += perform_manage_external_collections(colID, ln, callback='')
if mtype == "perform_showdetailedrecordoptions" and content:
fin_output += content
elif mtype == "perform_showdetailedrecordoptions" or not mtype:
fin_output += perform_showdetailedrecordoptions(colID, ln, callback='')
return addadminbox("Overview of edit options for collection '%s'" % col_dict[colID], [fin_output])
def perform_checkwebcollstatus(colID, ln, confirm=0, callback='yes'):
"""Check status of the collection tables with respect to the webcoll cache."""
subtitle = """<a name="11"></a>Webcoll Status&nbsp;&nbsp;&nbsp;[<a href="%s/help/admin/websearch-admin-guide#5">?</a>]""" % CFG_SITE_URL
output = ""
colID = int(colID)
col_dict = dict(get_def_name('', "collection"))
output += """<br /><b>Last updates:</b><br />"""
collection_table_update_time = ""
collection_web_update_time = ""
collection_table_update_time = get_table_update_time('collection')
output += "Collection table last updated: %s<br />" % collection_table_update_time
try:
file = open("%s/collections/last_updated" % CFG_CACHEDIR)
collection_web_update_time = file.readline().strip()
output += "Collection cache last updated: %s<br />" % collection_web_update_time
file.close()
except:
pass
# reformat collection_web_update_time to the format suitable for comparisons
try:
collection_web_update_time = strftime("%Y-%m-%d %H:%M:%S",
time.strptime(collection_web_update_time, "%d %b %Y %H:%M:%S"))
except ValueError, e:
pass
if collection_table_update_time > collection_web_update_time:
output += """<br /><b><span class="info">Warning: The collections have been modified since last time Webcoll was executed, to process the changes, Webcoll must be executed.</span></b><br />"""
header = ['ID', 'Name', 'Time', 'Status', 'Progress']
actions = []
output += """<br /><b>Last BibSched tasks:</b><br />"""
res = run_sql("select id, proc, host, user, runtime, sleeptime, arguments, status, progress from schTASK where proc='webcoll' and runtime< now() ORDER by runtime")
if len(res) > 0:
(id, proc, host, user, runtime, sleeptime, arguments, status, progress) = res[len(res) - 1]
webcoll__update_time = runtime
actions.append([id, proc, runtime, (status !="" and status or ''), (progress !="" and progress or '')])
else:
actions.append(['', 'webcoll', '', '', 'Not executed yet'])
res = run_sql("select id, proc, host, user, runtime, sleeptime, arguments, status, progress from schTASK where proc='bibindex' and runtime< now() ORDER by runtime")
if len(res) > 0:
(id, proc, host, user, runtime, sleeptime, arguments, status, progress) = res[len(res) - 1]
actions.append([id, proc, runtime, (status !="" and status or ''), (progress !="" and progress or '')])
else:
actions.append(['', 'bibindex', '', '', 'Not executed yet'])
output += tupletotable(header=header, tuple=actions)
output += """<br /><b>Next scheduled BibSched run:</b><br />"""
actions = []
res = run_sql("select id, proc, host, user, runtime, sleeptime, arguments, status, progress from schTASK where proc='webcoll' and runtime > now() ORDER by runtime")
webcoll_future = ""
if len(res) > 0:
(id, proc, host, user, runtime, sleeptime, arguments, status, progress) = res[0]
webcoll__update_time = runtime
actions.append([id, proc, runtime, (status !="" and status or ''), (progress !="" and progress or '')])
webcoll_future = "yes"
else:
actions.append(['', 'webcoll', '', '', 'Not scheduled'])
res = run_sql("select id, proc, host, user, runtime, sleeptime, arguments, status, progress from schTASK where proc='bibindex' and runtime > now() ORDER by runtime")
bibindex_future = ""
if len(res) > 0:
(id, proc, host, user, runtime, sleeptime, arguments, status, progress) = res[0]
actions.append([id, proc, runtime, (status !="" and status or ''), (progress !="" and progress or '')])
bibindex_future = "yes"
else:
actions.append(['', 'bibindex', '', '', 'Not scheduled'])
output += tupletotable(header=header, tuple=actions)
if webcoll_future == "":
output += """<br /><b><span class="info">Warning: Webcoll is not scheduled for a future run by bibsched, any updates to the collection will not be processed.</span></b><br />"""
if bibindex_future == "":
output += """<br /><b><span class="info">Warning: Bibindex is not scheduled for a future run by bibsched, any updates to the records will not be processed.</span></b><br />"""
body = [output]
if callback:
return perform_index(colID, ln, "perform_checkwebcollstatus", addadminbox(subtitle, body))
else:
return addadminbox(subtitle, body)
def perform_modifyrestricted(colID, ln, rest='', callback='yes', confirm=-1):
"""modify which apache group is allowed to access the collection.
rest - the groupname"""
subtitle = ''
output = ""
col_dict = dict(get_def_name('', "collection"))
action_id = acc_get_action_id(VIEWRESTRCOLL)
if colID and col_dict.has_key(int(colID)):
colID = int(colID)
subtitle = """<a name="2">2. Modify access restrictions for collection '%s'</a>&nbsp;&nbsp;&nbsp;<small>[<a title="See guide" href="%s/help/admin/websearch-admin-guide#3.2">?</a>]</small>""" % (col_dict[colID], CFG_SITE_URL)
output = """<p>Please note that Invenio versions greater than <em>0.92.1</em> manage collection restriction via the standard
<strong><a href="/admin/webaccess/webaccessadmin.py/showactiondetails?id_action=%i">WebAccess Admin Interface</a></strong> (action '%s').</p>
""" % (action_id, VIEWRESTRCOLL)
body = [output]
if callback:
return perform_editcollection(colID, ln, "perform_modifyrestricted", addadminbox(subtitle, body))
else:
return addadminbox(subtitle, body)
def perform_checkcollectionstatus(colID, ln, confirm=0, callback='yes'):
"""Check the configuration of the collections."""
from invenio.search_engine import collection_restricted_p, restricted_collection_cache
subtitle = """<a name="11"></a>Collection Status&nbsp;&nbsp;&nbsp;[<a href="%s/help/admin/websearch-admin-guide#6">?</a>]""" % CFG_SITE_URL
output = ""
colID = int(colID)
col_dict = dict(get_def_name('', "collection"))
collections = run_sql("SELECT id, name, dbquery, nbrecs FROM collection "
"ORDER BY id")
header = ['ID', 'Name','Query', 'Subcollections', 'Restricted', 'Hosted',
'I18N', 'Status', 'Number of records']
rnk_list = get_def_name('', "rnkMETHOD")
actions = []
restricted_collection_cache.recreate_cache_if_needed()
for (id, name, dbquery, nbrecs) in collections:
reg_sons = col_has_son(id, 'r')
vir_sons = col_has_son(id, 'v')
status = ""
hosted = ""
if str(dbquery).startswith("hostedcollection:"): hosted = """<b><span class="info">Yes</span></b>"""
else: hosted = """<b><span class="info">No</span></b>"""
langs = run_sql("SELECT ln from collectionname where id_collection=%s", (id, ))
i8n = ""
if len(langs) > 0:
for lang in langs:
i8n += "%s, " % lang
else:
i8n = """<b><span class="info">None</span></b>"""
if reg_sons and dbquery:
status = """<b><span class="warning">1:Conflict</span></b>"""
elif not dbquery and not reg_sons:
status = """<b><span class="warning">2:Empty</span></b>"""
if (reg_sons or vir_sons):
subs = """<b><span class="info">Yes</span></b>"""
else:
subs = """<b><span class="info">No</span></b>"""
if dbquery is None:
dbquery = """<b><span class="info">No</span></b>"""
restricted = collection_restricted_p(name, recreate_cache_if_needed=False)
if restricted:
restricted = """<b><span class="warning">Yes</span></b>"""
if status:
status += """<b><span class="warning">,3:Restricted</span></b>"""
else:
status += """<b><span class="warning">3:Restricted</span></b>"""
else:
restricted = """<b><span class="info">No</span></b>"""
if status == "":
status = """<b><span class="info">OK</span></b>"""
actions.append([id, """<a href="%s/admin/websearch/websearchadmin.py/editcollection?colID=%s&amp;ln=%s">%s</a>""" % (CFG_SITE_URL, id, ln, name), dbquery, subs, restricted, hosted, i8n, status, nbrecs])
output += tupletotable(header=header, tuple=actions)
body = [output]
return addadminbox(subtitle, body)
if callback:
return perform_index(colID, ln, "perform_checkcollectionstatus", addadminbox(subtitle, body))
else:
return addadminbox(subtitle, body)
def perform_checkexternalcollections(colID, ln, icl=None, update="", confirm=0, callback='yes'):
"""Check the external collections for inconsistencies."""
subtitle = """<a name="7"></a>Check external collections&nbsp;&nbsp;&nbsp;[<a href="%s/help/admin/websearch-admin-guide#7">?</a>]""" % CFG_SITE_URL
output = ""
colID = int(colID)
if icl:
if update == "add":
# icl : the "inconsistent list" comes as a string, it has to be converted back into a list
icl = eval(icl)
#icl = icl[1:-1].split(',')
for collection in icl:
#collection = str(collection[1:-1])
query_select = "SELECT name FROM externalcollection WHERE name like '%(name)s';" % {'name': collection}
results_select = run_sql(query_select)
if not results_select:
query_insert = "INSERT INTO externalcollection (name) VALUES ('%(name)s');" % {'name': collection}
run_sql(query_insert)
output += """<br /><span class=info>New collection \"%s\" has been added to the database table \"externalcollection\".</span><br />""" % (collection)
else:
output += """<br /><span class=info>Collection \"%s\" has already been added to the database table \"externalcollection\" or was already there.</span><br />""" % (collection)
elif update == "del":
# icl : the "inconsistent list" comes as a string, it has to be converted back into a list
icl = eval(icl)
#icl = icl[1:-1].split(',')
for collection in icl:
#collection = str(collection[1:-1])
query_select = "SELECT id FROM externalcollection WHERE name like '%(name)s';" % {'name': collection}
results_select = run_sql(query_select)
if results_select:
query_delete = "DELETE FROM externalcollection WHERE id like '%(id)s';" % {'id': results_select[0][0]}
query_delete_states = "DELETE FROM collection_externalcollection WHERE id_externalcollection like '%(id)s';" % {'id': results_select[0][0]}
run_sql(query_delete)
run_sql(query_delete_states)
output += """<br /><span class=info>Collection \"%s\" has been deleted from the database table \"externalcollection\".</span><br />""" % (collection)
else:
output += """<br /><span class=info>Collection \"%s\" has already been delete from the database table \"externalcollection\" or was never there.</span><br />""" % (collection)
external_collections_file = []
external_collections_db = []
for coll in external_collections_dictionary.values():
external_collections_file.append(coll.name)
external_collections_file.sort()
query = """SELECT name from externalcollection"""
results = run_sql(query)
for result in results:
external_collections_db.append(result[0])
external_collections_db.sort()
number_file = len(external_collections_file)
number_db = len(external_collections_db)
if external_collections_file == external_collections_db:
output += """<br /><span class="info">External collections are consistent.</span><br /><br />
&nbsp;&nbsp;&nbsp;- database table \"externalcollection\" has %(number_db)s collections<br />
&nbsp;&nbsp;&nbsp;- configuration file \"websearch_external_collections_config.py\" has %(number_file)s collections""" % {
"number_db" : number_db,
"number_file" : number_file}
elif len(external_collections_file) > len(external_collections_db):
external_collections_diff = list(set(external_collections_file) - set(external_collections_db))
external_collections_db.extend(external_collections_diff)
external_collections_db.sort()
if external_collections_file == external_collections_db:
output += """<br /><span class="warning">There is an inconsistency:</span><br /><br />
&nbsp;&nbsp;&nbsp;- database table \"externalcollection\" has %(number_db)s collections
&nbsp;(<span class="warning">missing: %(diff)s</span>)<br />
&nbsp;&nbsp;&nbsp;- configuration file \"websearch_external_collections_config.py\" has %(number_file)s collections
<br /><br /><a href="%(site_url)s/admin/websearch/websearchadmin.py/checkexternalcollections?colID=%(colID)s&amp;icl=%(diff)s&amp;update=add&amp;ln=%(ln)s">
Click here</a> to update your database adding the missing collections. If the problem persists please check your configuration manually.""" % {
"number_db" : number_db,
"number_file" : number_file,
"diff" : external_collections_diff,
"site_url" : CFG_SITE_URL,
"colID" : colID,
"ln" : ln}
else:
output += """<br /><span class="warning">There is an inconsistency:</span><br /><br />
&nbsp;&nbsp;&nbsp;- database table \"externalcollection\" has %(number_db)s collections<br />
&nbsp;&nbsp;&nbsp;- configuration file \"websearch_external_collections_config.py\" has %(number_file)s collections
<br /><br /><span class="warning">The external collections do not match.</span>
<br />To fix the problem please check your configuration manually.""" % {
"number_db" : number_db,
"number_file" : number_file}
elif len(external_collections_file) < len(external_collections_db):
external_collections_diff = list(set(external_collections_db) - set(external_collections_file))
external_collections_file.extend(external_collections_diff)
external_collections_file.sort()
if external_collections_file == external_collections_db:
output += """<br /><span class="warning">There is an inconsistency:</span><br /><br />
&nbsp;&nbsp;&nbsp;- database table \"externalcollection\" has %(number_db)s collections
&nbsp;(<span class="warning">extra: %(diff)s</span>)<br />
&nbsp;&nbsp;&nbsp;- configuration file \"websearch_external_collections_config.py\" has %(number_file)s collections
<br /><br /><a href="%(site_url)s/admin/websearch/websearchadmin.py/checkexternalcollections?colID=%(colID)s&amp;icl=%(diff)s&amp;update=del&amp;ln=%(ln)s">
Click here</a> to force remove the extra collections from your database (warning: use with caution!). If the problem persists please check your configuration manually.""" % {
"number_db" : number_db,
"number_file" : number_file,
"diff" : external_collections_diff,
"site_url" : CFG_SITE_URL,
"colID" : colID,
"ln" : ln}
else:
output += """<br /><span class="warning">There is an inconsistency:</span><br /><br />
&nbsp;&nbsp;&nbsp;- database table \"externalcollection\" has %(number_db)s collections<br />
&nbsp;&nbsp;&nbsp;- configuration file \"websearch_external_collections_config.py\" has %(number_file)s collections
<br /><br /><span class="warning">The external collections do not match.</span>
<br />To fix the problem please check your configuration manually.""" % {
"number_db" : number_db,
"number_file" : number_file}
else:
output += """<br /><span class="warning">There is an inconsistency:</span><br /><br />
&nbsp;&nbsp;&nbsp;- database table \"externalcollection\" has %(number_db)s collections<br />
&nbsp;&nbsp;&nbsp;- configuration file \"websearch_external_collections_config.py\" has %(number_file)s collections
<br /><br /><span class="warning">The number of external collections is the same but the collections do not match.</span>
<br />To fix the problem please check your configuration manually.""" % {
"number_db" : number_db,
"number_file" : number_file}
body = [output]
return addadminbox(subtitle, body)
if callback:
return perform_index(colID, ln, "perform_checkexternalcollections", addadminbox(subtitle, body))
else:
return addadminbox(subtitle, body)
def col_has_son(colID, rtype='r'):
"""Return True if the collection has at least one son."""
return run_sql("SELECT id_son FROM collection_collection WHERE id_dad=%s and type=%s LIMIT 1", (colID, rtype)) != ()
def get_col_tree(colID, rtype=''):
"""Returns a presentation of the tree as a list. TODO: Add loop detection
colID - startpoint for the tree
rtype - get regular or virtual part of the tree"""
try:
colID = int(colID)
stack = [colID]
ssize = 0
tree = [(colID, 0, 0, colID, 'r')]
while len(stack) > 0:
ccolID = stack.pop()
if ccolID == colID and rtype:
res = run_sql("SELECT id_son, score, type FROM collection_collection WHERE id_dad=%s AND type=%s ORDER BY score ASC,id_son", (ccolID, rtype))
else:
res = run_sql("SELECT id_son, score, type FROM collection_collection WHERE id_dad=%s ORDER BY score ASC,id_son", (ccolID, ))
ssize += 1
ntree = []
for i in range(0, len(res)):
id_son = res[i][0]
score = res[i][1]
rtype = res[i][2]
stack.append(id_son)
if i == (len(res) - 1):
up = 0
else:
up = 1
if i == 0:
down = 0
else:
down = 1
ntree.insert(0, (id_son, up, down, ccolID, rtype))
tree = tree[0:ssize] + ntree + tree[ssize:len(tree)]
return tree
except StandardError, e:
register_exception()
return ()
def add_col_dad_son(add_dad, add_son, rtype):
"""Add a son to a collection (dad)
add_dad - add to this collection id
add_son - add this collection id
rtype - either regular or virtual"""
try:
res = run_sql("SELECT score FROM collection_collection WHERE id_dad=%s ORDER BY score ASC", (add_dad, ))
highscore = 0
for score in res:
if int(score[0]) > highscore:
highscore = int(score[0])
highscore += 1
res = run_sql("INSERT INTO collection_collection(id_dad,id_son,score,type) values(%s,%s,%s,%s)", (add_dad, add_son, highscore, rtype))
return (1, highscore)
except StandardError, e:
register_exception()
return (0, e)
def compare_on_val(first, second):
"""Compare the two values"""
return cmp(first[1], second[1])
def get_col_fld(colID=-1, type = '', id_field=''):
"""Returns either all portalboxes associated with a collection, or based on either colID or language or both.
colID - collection id
ln - language id"""
sql = "SELECT id_field,id_fieldvalue,type,score,score_fieldvalue FROM collection_field_fieldvalue, field WHERE id_field=field.id"
params = []
if colID > -1:
sql += " AND id_collection=%s"
params.append(colID)
if id_field:
sql += " AND id_field=%s"
params.append(id_field)
if type:
sql += " AND type=%s"
params.append(type)
sql += " ORDER BY type, score desc, score_fieldvalue desc"
res = run_sql(sql, tuple(params))
return res
def get_col_pbx(colID=-1, ln='', position = ''):
"""Returns either all portalboxes associated with a collection, or based on either colID or language or both.
colID - collection id
ln - language id"""
sql = "SELECT id_portalbox, id_collection, ln, score, position, title, body FROM collection_portalbox, portalbox WHERE id_portalbox = portalbox.id"
params = []
if colID > -1:
sql += " AND id_collection=%s"
params.append(colID)
if ln:
sql += " AND ln=%s"
params.append(ln)
if position:
sql += " AND position=%s"
params.append(position)
sql += " ORDER BY position, ln, score desc"
res = run_sql(sql, tuple(params))
return res
def get_col_fmt(colID=-1):
"""Returns all formats currently associated with a collection, or for one specific collection
colID - the id of the collection"""
if colID not in [-1, "-1"]:
res = run_sql("SELECT id_format, id_collection, code, score FROM collection_format, format WHERE id_format = format.id AND id_collection=%s ORDER BY score desc", (colID, ))
else:
res = run_sql("SELECT id_format, id_collection, code, score FROM collection_format, format WHERE id_format = format.id ORDER BY score desc")
return res
def get_col_rnk(colID, ln):
""" Returns a list of the rank methods the given collection is attached to
colID - id from collection"""
try:
res1 = dict(run_sql("SELECT id_rnkMETHOD, '' FROM collection_rnkMETHOD WHERE id_collection=%s", (colID, )))
res2 = get_def_name('', "rnkMETHOD")
result = filter(lambda x: res1.has_key(x[0]), res2)
return result
except StandardError, e:
return ()
def get_pbx():
"""Returns all portalboxes"""
res = run_sql("SELECT id, title, body FROM portalbox ORDER by title,body")
return res
def get_fld_value(fldvID = ''):
"""Returns fieldvalue"""
sql = "SELECT id, name, value FROM fieldvalue"
params = []
if fldvID:
sql += " WHERE id=%s"
params.append(fldvID)
sql += " ORDER BY name"
res = run_sql(sql, tuple(params))
return res
def get_pbx_pos():
"""Returns a list of all the positions for a portalbox"""
position = {}
position["rt"] = "Right Top"
position["lt"] = "Left Top"
position["te"] = "Title Epilog"
position["tp"] = "Title Prolog"
position["ne"] = "Narrow by coll epilog"
position["np"] = "Narrow by coll prolog"
return position
def get_sort_nametypes():
"""Return a list of the various translationnames for the fields"""
type = {}
type['soo'] = 'Sort options'
type['seo'] = 'Search options'
type['sew'] = 'Search within'
return type
def get_fmt_nametypes():
"""Return a list of the various translationnames for the output formats"""
type = []
type.append(('ln', 'Long name'))
return type
def get_fld_nametypes():
"""Return a list of the various translationnames for the fields"""
type = []
type.append(('ln', 'Long name'))
return type
def get_col_nametypes():
"""Return a list of the various translationnames for the collections"""
type = []
type.append(('ln', 'Long name'))
return type
def find_last(tree, start_son):
"""Find the previous collection in the tree with the same father as start_son"""
id_dad = tree[start_son][3]
while start_son > 0:
start_son -= 1
if tree[start_son][3] == id_dad:
return start_son
def find_next(tree, start_son):
"""Find the next collection in the tree with the same father as start_son"""
id_dad = tree[start_son][3]
while start_son < len(tree):
start_son += 1
if tree[start_son][3] == id_dad:
return start_son
def remove_col_subcol(id_son, id_dad, type):
"""Remove a collection as a son of another collection in the tree, if collection isn't used elsewhere in the tree, remove all registered sons of the id_son.
id_son - collection id of son to remove
id_dad - the id of the dad"""
try:
if id_son != id_dad:
tree = get_col_tree(id_son)
run_sql("DELETE FROM collection_collection WHERE id_son=%s and id_dad=%s", (id_son, id_dad))
else:
tree = get_col_tree(id_son, type)
run_sql("DELETE FROM collection_collection WHERE id_son=%s and id_dad=%s and type=%s", (id_son, id_dad, type))
if not run_sql("SELECT id_dad,id_son,type,score from collection_collection WHERE id_son=%s and type=%s", (id_son, type)):
for (id, up, down, dad, rtype) in tree:
run_sql("DELETE FROM collection_collection WHERE id_son=%s and id_dad=%s", (id, dad))
return (1, "")
except StandardError, e:
return (0, e)
def check_col(add_dad, add_son):
"""Check if the collection can be placed as a son of the dad without causing loops.
add_dad - collection id
add_son - collection id"""
try:
stack = [add_dad]
res = run_sql("SELECT id_dad FROM collection_collection WHERE id_dad=%s AND id_son=%s", (add_dad, add_son))
if res:
raise StandardError
while len(stack) > 0:
colID = stack.pop()
res = run_sql("SELECT id_dad FROM collection_collection WHERE id_son=%s", (colID, ))
for id in res:
if int(id[0]) == int(add_son):
# raise StandardError # this was the original but it didnt work
return(0)
else:
stack.append(id[0])
return (1, "")
except StandardError, e:
return (0, e)
def attach_rnk_col(colID, rnkID):
"""attach rank method to collection
rnkID - id from rnkMETHOD table
colID - id of collection, as in collection table """
try:
res = run_sql("INSERT INTO collection_rnkMETHOD(id_collection, id_rnkMETHOD) values (%s,%s)", (colID, rnkID))
return (1, "")
except StandardError, e:
register_exception()
return (0, e)
def detach_rnk_col(colID, rnkID):
"""detach rank method from collection
rnkID - id from rnkMETHOD table
colID - id of collection, as in collection table """
try:
res = run_sql("DELETE FROM collection_rnkMETHOD WHERE id_collection=%s AND id_rnkMETHOD=%s", (colID, rnkID))
return (1, "")
except StandardError, e:
register_exception()
return (0, e)
def switch_col_treescore(col_1, col_2):
try:
res1 = run_sql("SELECT score FROM collection_collection WHERE id_dad=%s and id_son=%s", (col_1[3], col_1[0]))
res2 = run_sql("SELECT score FROM collection_collection WHERE id_dad=%s and id_son=%s", (col_2[3], col_2[0]))
res = run_sql("UPDATE collection_collection SET score=%s WHERE id_dad=%s and id_son=%s", (res2[0][0], col_1[3], col_1[0]))
res = run_sql("UPDATE collection_collection SET score=%s WHERE id_dad=%s and id_son=%s", (res1[0][0], col_2[3], col_2[0]))
return (1, "")
except StandardError, e:
register_exception()
return (0, e)
def move_col_tree(col_from, col_to, move_to_rtype=''):
"""Move a collection from one point in the tree to another. becomes a son of the endpoint.
col_from - move this collection from current point
col_to - and set it as a son of this collection.
move_to_rtype - either virtual or regular collection"""
try:
res = run_sql("SELECT score FROM collection_collection WHERE id_dad=%s ORDER BY score asc", (col_to[0], ))
highscore = 0
for score in res:
if int(score[0]) > highscore:
highscore = int(score[0])
highscore += 1
if not move_to_rtype:
move_to_rtype = col_from[4]
res = run_sql("DELETE FROM collection_collection WHERE id_son=%s and id_dad=%s", (col_from[0], col_from[3]))
res = run_sql("INSERT INTO collection_collection(id_dad,id_son,score,type) values(%s,%s,%s,%s)", (col_to[0], col_from[0], highscore, move_to_rtype))
return (1, "")
except StandardError, e:
register_exception()
return (0, e)
def remove_pbx(colID, pbxID, ln):
"""Removes a portalbox from the collection given.
colID - the collection the format is connected to
pbxID - the portalbox which should be removed from the collection.
ln - the language of the portalbox to be removed"""
try:
res = run_sql("DELETE FROM collection_portalbox WHERE id_collection=%s AND id_portalbox=%s AND ln=%s", (colID, pbxID, ln))
return (1, "")
except StandardError, e:
register_exception()
return (0, e)
def remove_fmt(colID, fmtID):
"""Removes a format from the collection given.
colID - the collection the format is connected to
fmtID - the format which should be removed from the collection."""
try:
res = run_sql("DELETE FROM collection_format WHERE id_collection=%s AND id_format=%s", (colID, fmtID))
return (1, "")
except StandardError, e:
register_exception()
return (0, e)
def remove_fld(colID, fldID, fldvID=''):
"""Removes a field from the collection given.
colID - the collection the format is connected to
fldID - the field which should be removed from the collection."""
try:
sql = "DELETE FROM collection_field_fieldvalue WHERE id_collection=%s AND id_field=%s"
params = [colID, fldID]
if fldvID:
if fldvID != "None":
sql += " AND id_fieldvalue=%s"
params.append(fldvID)
else:
sql += " AND id_fieldvalue is NULL"
res = run_sql(sql, tuple(params))
return (1, "")
except StandardError, e:
register_exception()
return (0, e)
def delete_fldv(fldvID):
"""Deletes all data for the given fieldvalue
fldvID - delete all data in the tables associated with fieldvalue and this id"""
try:
res = run_sql("DELETE FROM collection_field_fieldvalue WHERE id_fieldvalue=%s", (fldvID, ))
res = run_sql("DELETE FROM fieldvalue WHERE id=%s", (fldvID, ))
return (1, "")
except StandardError, e:
register_exception()
return (0, e)
def delete_pbx(pbxID):
"""Deletes all data for the given portalbox
pbxID - delete all data in the tables associated with portalbox and this id """
try:
res = run_sql("DELETE FROM collection_portalbox WHERE id_portalbox=%s", (pbxID, ))
res = run_sql("DELETE FROM portalbox WHERE id=%s", (pbxID, ))
return (1, "")
except StandardError, e:
register_exception()
return (0, e)
def delete_fmt(fmtID):
"""Deletes all data for the given format
fmtID - delete all data in the tables associated with format and this id """
try:
res = run_sql("DELETE FROM format WHERE id=%s", (fmtID, ))
res = run_sql("DELETE FROM collection_format WHERE id_format=%s", (fmtID, ))
res = run_sql("DELETE FROM formatname WHERE id_format=%s", (fmtID, ))
return (1, "")
except StandardError, e:
register_exception()
return (0, e)
def delete_col(colID):
"""Deletes all data for the given collection
colID - delete all data in the tables associated with collection and this id """
try:
res = run_sql("DELETE FROM collection WHERE id=%s", (colID, ))
res = run_sql("DELETE FROM collectionname WHERE id_collection=%s", (colID, ))
res = run_sql("DELETE FROM collection_rnkMETHOD WHERE id_collection=%s", (colID, ))
res = run_sql("DELETE FROM collection_collection WHERE id_dad=%s", (colID, ))
res = run_sql("DELETE FROM collection_collection WHERE id_son=%s", (colID, ))
res = run_sql("DELETE FROM collection_portalbox WHERE id_collection=%s", (colID, ))
res = run_sql("DELETE FROM collection_format WHERE id_collection=%s", (colID, ))
res = run_sql("DELETE FROM collection_field_fieldvalue WHERE id_collection=%s", (colID, ))
return (1, "")
except StandardError, e:
register_exception()
return (0, e)
def add_fmt(code, name, rtype):
"""Add a new output format. Returns the id of the format.
code - the code for the format, max 6 chars.
name - the default name for the default language of the format.
rtype - the default nametype"""
try:
res = run_sql("INSERT INTO format (code, name) values (%s,%s)", (code, name))
fmtID = run_sql("SELECT id FROM format WHERE code=%s", (code,))
res = run_sql("INSERT INTO formatname(id_format, type, ln, value) VALUES (%s,%s,%s,%s)",
(fmtID[0][0], rtype, CFG_SITE_LANG, name))
return (1, fmtID)
except StandardError, e:
register_exception()
return (0, e)
def update_fldv(fldvID, name, value):
"""Modify existing fieldvalue
fldvID - id of fieldvalue to modify
value - the value of the fieldvalue
name - the name of the fieldvalue."""
try:
res = run_sql("UPDATE fieldvalue set name=%s where id=%s", (name, fldvID))
res = run_sql("UPDATE fieldvalue set value=%s where id=%s", (value, fldvID))
return (1, "")
except StandardError, e:
register_exception()
return (0, e)
def add_fldv(name, value):
"""Add a new fieldvalue, returns id of fieldvalue
value - the value of the fieldvalue
name - the name of the fieldvalue."""
try:
res = run_sql("SELECT id FROM fieldvalue WHERE name=%s and value=%s", (name, value))
if not res:
res = run_sql("INSERT INTO fieldvalue (name, value) values (%s,%s)", (name, value))
res = run_sql("SELECT id FROM fieldvalue WHERE name=%s and value=%s", (name, value))
if res:
return (1, res[0][0])
else:
raise StandardError
except StandardError, e:
register_exception()
return (0, e)
def add_pbx(title, body):
try:
res = run_sql("INSERT INTO portalbox (title, body) values (%s,%s)", (title, body))
res = run_sql("SELECT id FROM portalbox WHERE title=%s AND body=%s", (title, body))
if res:
return (1, res[0][0])
else:
raise StandardError
except StandardError, e:
register_exception()
return (0, e)
def add_col(colNAME, dbquery=None):
"""Adds a new collection to collection table
colNAME - the default name for the collection, saved to collection and collectionname
dbquery - query related to the collection"""
# BTW, sometimes '' are passed instead of None, so change them to None
if not dbquery:
dbquery = None
try:
rtype = get_col_nametypes()[0][0]
colID = run_sql("SELECT id FROM collection WHERE id=1")
if colID:
res = run_sql("INSERT INTO collection (name,dbquery) VALUES (%s,%s)",
(colNAME,dbquery))
else:
res = run_sql("INSERT INTO collection (id,name,dbquery) VALUES (1,%s,%s)",
(colNAME,dbquery))
colID = run_sql("SELECT id FROM collection WHERE name=%s", (colNAME,))
res = run_sql("INSERT INTO collectionname(id_collection, type, ln, value) VALUES (%s,%s,%s,%s)",
(colID[0][0], rtype, CFG_SITE_LANG, colNAME))
if colID:
return (1, colID[0][0])
else:
raise StandardError
except StandardError, e:
register_exception()
return (0, e)
def add_col_pbx(colID, pbxID, ln, position, score=''):
"""add a portalbox to the collection.
colID - the id of the collection involved
pbxID - the portalbox to add
ln - which language the portalbox is for
score - decides which portalbox is the most important
position - position on page the portalbox should appear."""
try:
if score:
res = run_sql("INSERT INTO collection_portalbox(id_portalbox, id_collection, ln, score, position) values (%s,%s,'%s',%s,%s)", (pbxID, colID, ln, score, position))
else:
res = run_sql("SELECT score FROM collection_portalbox WHERE id_collection=%s and ln=%s and position=%s ORDER BY score desc, ln, position", (colID, ln, position))
if res:
score = int(res[0][0])
else:
score = 0
res = run_sql("INSERT INTO collection_portalbox(id_portalbox, id_collection, ln, score, position) values (%s,%s,%s,%s,%s)", (pbxID, colID, ln, (score + 1), position))
return (1, "")
except StandardError, e:
register_exception()
return (0, e)
def add_col_fmt(colID, fmtID, score=''):
"""Add a output format to the collection.
colID - the id of the collection involved
fmtID - the id of the format.
score - the score of the format, decides sorting, if not given, place the format on top"""
try:
if score:
res = run_sql("INSERT INTO collection_format(id_format, id_collection, score) values (%s,%s,%s)", (fmtID, colID, score))
else:
res = run_sql("SELECT score FROM collection_format WHERE id_collection=%s ORDER BY score desc", (colID, ))
if res:
score = int(res[0][0])
else:
score = 0
res = run_sql("INSERT INTO collection_format(id_format, id_collection, score) values (%s,%s,%s)", (fmtID, colID, (score + 1)))
return (1, "")
except StandardError, e:
register_exception()
return (0, e)
def add_col_fld(colID, fldID, type, fldvID=''):
"""Add a sort/search/field to the collection.
colID - the id of the collection involved
fldID - the id of the field.
fldvID - the id of the fieldvalue.
type - which type, seo, sew...
score - the score of the format, decides sorting, if not given, place the format on top"""
try:
if fldvID and fldvID not in [-1, "-1"]:
run_sql("DELETE FROM collection_field_fieldvalue WHERE id_collection=%s AND id_field=%s and type=%s and id_fieldvalue is NULL", (colID, fldID, type))
res = run_sql("SELECT score FROM collection_field_fieldvalue WHERE id_collection=%s AND id_field=%s and type=%s ORDER BY score desc", (colID, fldID, type))
if res:
score = int(res[0][0])
res = run_sql("SELECT score_fieldvalue FROM collection_field_fieldvalue WHERE id_collection=%s AND id_field=%s and type=%s ORDER BY score_fieldvalue desc", (colID, fldID, type))
else:
res = run_sql("SELECT score FROM collection_field_fieldvalue WHERE id_collection=%s and type=%s ORDER BY score desc", (colID, type))
if res:
score = int(res[0][0]) + 1
else:
score = 1
res = run_sql("SELECT id_collection,id_field,id_fieldvalue,type,score,score_fieldvalue FROM collection_field_fieldvalue where id_field=%s and id_collection=%s and type=%s and id_fieldvalue=%s", (fldID, colID, type, fldvID))
if not res:
run_sql("UPDATE collection_field_fieldvalue SET score_fieldvalue=score_fieldvalue+1 WHERE id_field=%s AND id_collection=%s and type=%s", (fldID, colID, type))
res = run_sql("INSERT INTO collection_field_fieldvalue(id_field, id_fieldvalue, id_collection, type, score, score_fieldvalue) values (%s,%s,%s,%s,%s,%s)", (fldID, fldvID, colID, type, score, 1))
else:
return (0, (1, "Already exists"))
else:
res = run_sql("SELECT id_collection,id_field,id_fieldvalue,type,score,score_fieldvalue FROM collection_field_fieldvalue WHERE id_collection=%s AND type=%s and id_field=%s and id_fieldvalue is NULL", (colID, type, fldID))
if res:
return (0, (1, "Already exists"))
else:
run_sql("UPDATE collection_field_fieldvalue SET score=score+1")
res = run_sql("INSERT INTO collection_field_fieldvalue(id_field, id_collection, type, score,score_fieldvalue) values (%s,%s,%s,%s, 0)", (fldID, colID, type, 1))
return (1, "")
except StandardError, e:
register_exception()
return (0, e)
def modify_dbquery(colID, dbquery=None):
"""Modify the dbquery of an collection.
colID - the id of the collection involved
dbquery - the new dbquery"""
# BTW, sometimes '' is passed instead of None, so change it to None
if not dbquery:
dbquery = None
try:
res = run_sql("UPDATE collection SET dbquery=%s WHERE id=%s", (dbquery, colID))
return (1, "")
except StandardError, e:
register_exception()
return (0, e)
def modify_pbx(colID, pbxID, sel_ln, score='', position='', title='', body=''):
"""Modify a portalbox
colID - the id of the collection involved
pbxID - the id of the portalbox that should be modified
sel_ln - the language of the portalbox that should be modified
title - the title
body - the content
score - if several portalboxes in one position, who should appear on top.
position - position on page"""
try:
if title:
res = run_sql("UPDATE portalbox SET title=%s WHERE id=%s", (title, pbxID))
if body:
res = run_sql("UPDATE portalbox SET body=%s WHERE id=%s", (body, pbxID))
if score:
res = run_sql("UPDATE collection_portalbox SET score=%s WHERE id_collection=%s and id_portalbox=%s and ln=%s", (score, colID, pbxID, sel_ln))
if position:
res = run_sql("UPDATE collection_portalbox SET position=%s WHERE id_collection=%s and id_portalbox=%s and ln=%s", (position, colID, pbxID, sel_ln))
return (1, "")
except Exception, e:
register_exception()
return (0, e)
def switch_fld_score(colID, id_1, id_2):
"""Switch the scores of id_1 and id_2 in collection_field_fieldvalue
colID - collection the id_1 or id_2 is connected to
id_1/id_2 - id field from tables like format..portalbox...
table - name of the table"""
try:
res1 = run_sql("SELECT score FROM collection_field_fieldvalue WHERE id_collection=%s and id_field=%s", (colID, id_1))
res2 = run_sql("SELECT score FROM collection_field_fieldvalue WHERE id_collection=%s and id_field=%s", (colID, id_2))
if res1[0][0] == res2[0][0]:
return (0, (1, "Cannot rearrange the selected fields, either rearrange by name or use the mySQL client to fix the problem."))
else:
res = run_sql("UPDATE collection_field_fieldvalue SET score=%s WHERE id_collection=%s and id_field=%s", (res2[0][0], colID, id_1))
res = run_sql("UPDATE collection_field_fieldvalue SET score=%s WHERE id_collection=%s and id_field=%s", (res1[0][0], colID, id_2))
return (1, "")
except StandardError, e:
register_exception()
return (0, e)
def switch_fld_value_score(colID, id_1, fldvID_1, fldvID_2):
"""Switch the scores of two field_value
colID - collection the id_1 or id_2 is connected to
id_1/id_2 - id field from tables like format..portalbox...
table - name of the table"""
try:
res1 = run_sql("SELECT score_fieldvalue FROM collection_field_fieldvalue WHERE id_collection=%s and id_field=%s and id_fieldvalue=%s", (colID, id_1, fldvID_1))
res2 = run_sql("SELECT score_fieldvalue FROM collection_field_fieldvalue WHERE id_collection=%s and id_field=%s and id_fieldvalue=%s", (colID, id_1, fldvID_2))
if res1[0][0] == res2[0][0]:
return (0, (1, "Cannot rearrange the selected fields, either rearrange by name or use the mySQL client to fix the problem."))
else:
res = run_sql("UPDATE collection_field_fieldvalue SET score_fieldvalue=%s WHERE id_collection=%s and id_field=%s and id_fieldvalue=%s", (res2[0][0], colID, id_1, fldvID_1))
res = run_sql("UPDATE collection_field_fieldvalue SET score_fieldvalue=%s WHERE id_collection=%s and id_field=%s and id_fieldvalue=%s", (res1[0][0], colID, id_1, fldvID_2))
return (1, "")
except Exception, e:
register_exception()
return (0, e)
def switch_pbx_score(colID, id_1, id_2, sel_ln):
"""Switch the scores of id_1 and id_2 in the table given by the argument.
colID - collection the id_1 or id_2 is connected to
id_1/id_2 - id field from tables like format..portalbox...
table - name of the table"""
try:
res1 = run_sql("SELECT score FROM collection_portalbox WHERE id_collection=%s and id_portalbox=%s and ln=%s", (colID, id_1, sel_ln))
res2 = run_sql("SELECT score FROM collection_portalbox WHERE id_collection=%s and id_portalbox=%s and ln=%s", (colID, id_2, sel_ln))
if res1[0][0] == res2[0][0]:
return (0, (1, "Cannot rearrange the selected fields, either rearrange by name or use the mySQL client to fix the problem."))
res = run_sql("UPDATE collection_portalbox SET score=%s WHERE id_collection=%s and id_portalbox=%s and ln=%s", (res2[0][0], colID, id_1, sel_ln))
res = run_sql("UPDATE collection_portalbox SET score=%s WHERE id_collection=%s and id_portalbox=%s and ln=%s", (res1[0][0], colID, id_2, sel_ln))
return (1, "")
except Exception, e:
register_exception()
return (0, e)
def switch_score(colID, id_1, id_2, table):
"""Switch the scores of id_1 and id_2 in the table given by the argument.
colID - collection the id_1 or id_2 is connected to
id_1/id_2 - id field from tables like format..portalbox...
table - name of the table"""
try:
res1 = run_sql("SELECT score FROM collection_%s WHERE id_collection=%%s and id_%s=%%s" % (table, table), (colID, id_1))
res2 = run_sql("SELECT score FROM collection_%s WHERE id_collection=%%s and id_%s=%%s" % (table, table), (colID, id_2))
if res1[0][0] == res2[0][0]:
return (0, (1, "Cannot rearrange the selected fields, either rearrange by name or use the mySQL client to fix the problem."))
res = run_sql("UPDATE collection_%s SET score=%%s WHERE id_collection=%%s and id_%s=%%s" % (table, table), (res2[0][0], colID, id_1))
res = run_sql("UPDATE collection_%s SET score=%%s WHERE id_collection=%%s and id_%s=%%s" % (table, table), (res1[0][0], colID, id_2))
return (1, "")
except Exception, e:
register_exception()
return (0, e)
def get_detailed_page_tabs(colID=None, recID=None, ln=CFG_SITE_LANG):
"""
Returns the complete list of tabs to be displayed in the
detailed record pages.
Returned structured is a dict with
- key : last component of the url that leads to detailed record tab: http:www.../CFG_SITE_RECORD/74/key
- values: a dictionary with the following keys:
- label: *string* label to be printed as tab (Not localized here)
- visible: *boolean* if False, tab should not be shown
- enabled: *boolean* if True, tab should be disabled
- order: *int* position of the tab in the list of tabs
- ln: language of the tab labels
returns dict
"""
_ = gettext_set_language(ln)
tabs = {'metadata' : {'label': _('Information'), 'visible': False, 'enabled': True, 'order': 1},
'references': {'label': _('References'), 'visible': False, 'enabled': True, 'order': 2},
'citations' : {'label': _('Citations'), 'visible': False, 'enabled': True, 'order': 3},
'keywords' : {'label': _('Keywords'), 'visible': False, 'enabled': True, 'order': 4},
'comments' : {'label': _('Comments'), 'visible': False, 'enabled': True, 'order': 5},
'reviews' : {'label': _('Reviews'), 'visible': False, 'enabled': True, 'order': 6},
'usage' : {'label': _('Usage statistics'), 'visible': False, 'enabled': True, 'order': 7},
'files' : {'label': _('Files'), 'visible': False, 'enabled': True, 'order': 8},
'plots' : {'label': _('Plots'), 'visible': False, 'enabled': True, 'order': 9},
'holdings' : {'label': _('Holdings'), 'visible': False, 'enabled': True, 'order': 10},
'linkbacks' : {'label': _('Linkbacks'), 'visible': False, 'enabled': True, 'order': 11},
}
res = run_sql("SELECT tabs FROM collectiondetailedrecordpagetabs " + \
"WHERE id_collection=%s", (colID, ))
if len(res) > 0:
tabs_state = res[0][0].split(';')
for tab_state in tabs_state:
if tabs.has_key(tab_state):
tabs[tab_state]['visible'] = True;
else:
# no preference set for this collection.
# assume all tabs are displayed
for key in tabs.keys():
tabs[key]['visible'] = True
if not CFG_WEBCOMMENT_ALLOW_COMMENTS:
tabs['comments']['visible'] = False
tabs['comments']['enabled'] = False
if not CFG_WEBCOMMENT_ALLOW_REVIEWS:
tabs['reviews']['visible'] = False
tabs['reviews']['enabled'] = False
if recID is not None:
# Disable references if no references found
#bfo = BibFormatObject(recID)
#if bfe_references.format_element(bfo, '', '') == '':
# tabs['references']['enabled'] = False
## FIXME: the above was commented out because bfe_references
## may be too slow. And we do not really need this anyway
## because we can disable tabs in WebSearch Admin on a
## collection-by-collection basis. If we need this, then we
## should probably call bfo.fields('999') here that should be
## much faster than calling bfe_references.
# Disable citations if not citations found
#if len(get_cited_by(recID)) == 0:
# tabs['citations']['enabled'] = False
## FIXME: the above was commented out because get_cited_by()
## may be too slow. And we do not really need this anyway
## because we can disable tags in WebSearch Admin on a
## collection-by-collection basis.
# Disable Files tab if no file found except for Plots:
disable_files_tab_p = True
for abibdoc in BibRecDocs(recID).list_bibdocs():
abibdoc_type = abibdoc.get_type()
if abibdoc_type == 'Plot':
continue # ignore attached plots
else:
if CFG_INSPIRE_SITE and not \
abibdoc_type in ('', 'INSPIRE-PUBLIC', 'Supplementary Material'):
# ignore non-empty, non-INSPIRE-PUBLIC, non-suppl doctypes for INSPIRE
continue
# okay, we found at least one non-Plot file:
disable_files_tab_p = False
break
if disable_files_tab_p:
tabs['files']['enabled'] = False
#Disable holdings tab if collection != Books
collection = run_sql("""select name from collection where id=%s""", (colID, ))
if collection[0][0] != 'Books':
tabs['holdings']['enabled'] = False
# Disable Plots tab if no docfile of doctype Plot found
brd = BibRecDocs(recID)
if len(brd.list_bibdocs('Plot')) == 0:
tabs['plots']['enabled'] = False
if CFG_CERN_SITE:
from invenio.search_engine import get_collection_reclist
if recID in get_collection_reclist("Books & Proceedings"):
tabs['holdings']['visible'] = True
tabs['holdings']['enabled'] = True
tabs[''] = tabs['metadata']
del tabs['metadata']
return tabs
def get_detailed_page_tabs_counts(recID):
"""
Returns the number of citations, references and comments/reviews
that have to be shown on the corresponding tabs in the
detailed record pages
@param recID: record id
@return: dictionary with following keys
'Citations': number of citations to be shown in the "Citations" tab
'References': number of references to be shown in the "References" tab
'Comments': number of comments to be shown in the "Comments" tab
'Reviews': number of reviews to be shown in the "Reviews" tab
"""
num_comments = 0 #num of comments
num_reviews = 0 #num of reviews
tabs_counts = {'Citations' : 0,
'References' : -1,
'Discussions' : 0,
'Comments' : 0,
'Reviews' : 0
}
from invenio.search_engine import get_field_tags, get_record
if CFG_BIBRANK_SHOW_CITATION_LINKS:
tabs_counts['Citations'] = get_cited_by_count(recID)
if not CFG_CERN_SITE:#FIXME:should be replaced by something like CFG_SHOW_REFERENCES
reftag = ""
reftags = get_field_tags("reference")
if reftags:
reftag = reftags[0]
tmprec = get_record(recID)
if reftag and len(reftag) > 4:
tabs_counts['References'] = len(record_get_field_instances(tmprec, reftag[0:3], reftag[3], reftag[4]))
# obtain number of comments/reviews
from invenio.webcommentadminlib import get_nb_reviews, get_nb_comments
if CFG_WEBCOMMENT_ALLOW_COMMENTS and CFG_WEBSEARCH_SHOW_COMMENT_COUNT:
num_comments = get_nb_comments(recID, count_deleted=False)
if CFG_WEBCOMMENT_ALLOW_REVIEWS and CFG_WEBSEARCH_SHOW_REVIEW_COUNT:
num_reviews = get_nb_reviews(recID, count_deleted=False)
if num_comments:
tabs_counts['Comments'] = num_comments
tabs_counts['Discussions'] += num_comments
if num_reviews:
tabs_counts['Reviews'] = num_reviews
tabs_counts['Discussions'] += num_reviews
return tabs_counts
diff --git a/invenio/legacy/websearch/webinterface.py b/invenio/legacy/websearch/webinterface.py
index c953b654a..6a58807a8 100644
--- a/invenio/legacy/websearch/webinterface.py
+++ b/invenio/legacy/websearch/webinterface.py
@@ -1,1151 +1,1151 @@
## This file is part of Invenio.
## Copyright (C) 2006, 2007, 2008, 2009, 2010, 2011, 2012, 2013 CERN.
##
## Invenio is free software; you can redistribute it and/or
## modify it under the terms of the GNU General Public License as
## published by the Free Software Foundation; either version 2 of the
## License, or (at your option) any later version.
##
## Invenio is distributed in the hope that it will be useful, but
## WITHOUT ANY WARRANTY; without even the implied warranty of
## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
## General Public License for more details.
##
## You should have received a copy of the GNU General Public License
## along with Invenio; if not, write to the Free Software Foundation, Inc.,
## 59 Temple Place, Suite 330, Boston, MA 02111-1307, USA.
"""WebSearch URL handler."""
__revision__ = "$Id$"
import cgi
import os
import datetime
import time
import sys
from urllib import quote
from invenio.utils import apache
import threading
#maximum number of collaborating authors etc shown in GUI
MAX_COLLAB_LIST = 10
MAX_KEYWORD_LIST = 10
MAX_VENUE_LIST = 10
#tag constants
AUTHOR_TAG = "100__a"
AUTHOR_INST_TAG = "100__u"
COAUTHOR_TAG = "700__a"
COAUTHOR_INST_TAG = "700__u"
VENUE_TAG = "909C4p"
KEYWORD_TAG = "695__a"
FKEYWORD_TAG = "6531_a"
CFG_INSPIRE_UNWANTED_KEYWORDS_START = ['talk',
'conference',
'conference proceedings',
'numerical calculations',
'experimental results',
'review',
'bibliography',
'upper limit',
'lower limit',
'tables',
'search for',
'on-shell',
'off-shell',
'formula',
'lectures',
'book',
'thesis']
CFG_INSPIRE_UNWANTED_KEYWORDS_MIDDLE = ['GeV',
'((']
if sys.hexversion < 0x2040000:
# pylint: disable=W0622
from sets import Set as set
# pylint: enable=W0622
from invenio.config import \
CFG_SITE_URL, \
CFG_SITE_NAME, \
CFG_CACHEDIR, \
CFG_SITE_LANG, \
CFG_SITE_SECURE_URL, \
CFG_BIBRANK_SHOW_DOWNLOAD_STATS, \
CFG_WEBSEARCH_INSTANT_BROWSE_RSS, \
CFG_WEBSEARCH_RSS_TTL, \
CFG_WEBSEARCH_RSS_MAX_CACHED_REQUESTS, \
CFG_WEBSEARCH_DEFAULT_SEARCH_INTERFACE, \
CFG_WEBSEARCH_ENABLED_SEARCH_INTERFACES, \
CFG_WEBDIR, \
CFG_WEBSEARCH_USE_MATHJAX_FOR_FORMATS, \
CFG_WEBSEARCH_MAX_RECORDS_IN_GROUPS, \
CFG_WEBSEARCH_USE_ALEPH_SYSNOS, \
CFG_WEBSEARCH_RSS_I18N_COLLECTIONS, \
CFG_INSPIRE_SITE, \
CFG_WEBSEARCH_WILDCARD_LIMIT, \
CFG_SITE_RECORD
from invenio.dbquery import Error
from invenio.webinterface_handler import wash_urlargd, WebInterfaceDirectory
from invenio.utils.url import redirect_to_url, make_canonical_urlargd, drop_default_urlargd
from invenio.htmlutils import get_mathjax_header
from invenio.htmlutils import nmtoken_from_string
from invenio.webuser import getUid, page_not_authorized, get_user_preferences, \
collect_user_info, logoutUser, isUserSuperAdmin
from invenio.webcomment_webinterface import WebInterfaceCommentsPages
from invenio.weblinkback_webinterface import WebInterfaceRecordLinkbacksPages
from invenio.bibcirculation_webinterface import WebInterfaceHoldingsPages
from invenio.webpage import page, pageheaderonly, create_error_box
from invenio.base.i18n import gettext_set_language
from invenio.search_engine import check_user_can_view_record, \
collection_reclist_cache, \
collection_restricted_p, \
create_similarly_named_authors_link_box, \
get_colID, \
get_coll_i18nname, \
get_most_popular_field_values, \
get_mysql_recid_from_aleph_sysno, \
guess_primary_collection_of_a_record, \
page_end, \
page_start, \
perform_request_cache, \
perform_request_log, \
perform_request_search, \
restricted_collection_cache, \
get_coll_normalised_name, \
EM_REPOSITORY
from invenio.websearch_webcoll import perform_display_collection
from invenio.search_engine_utils import get_fieldvalues, \
get_fieldvalues_alephseq_like
from invenio.access_control_engine import acc_authorize_action
from invenio.access_control_config import VIEWRESTRCOLL
from invenio.access_control_mailcookie import mail_cookie_create_authorize_action
from invenio.modules.formatter import format_records
from invenio.modules.formatter.engine import get_output_formats
from invenio.websearch_webcoll import get_collection
from invenio.intbitset import intbitset
from invenio.bibupload import find_record_from_sysno
from invenio.bibrank_citation_searcher import get_cited_by_list
from invenio.bibrank_downloads_indexer import get_download_weight_total
from invenio.search_engine_summarizer import summarize_records
-from invenio.errorlib import register_exception
+from invenio.ext.logging import register_exception
from invenio.bibedit_webinterface import WebInterfaceEditPages
from invenio.bibeditmulti_webinterface import WebInterfaceMultiEditPages
from invenio.bibmerge_webinterface import WebInterfaceMergePages
from invenio.bibdocfile_webinterface import WebInterfaceManageDocFilesPages, WebInterfaceFilesPages
from invenio.search_engine import get_record
from invenio.shellutils import mymkdir
import invenio.template
websearch_templates = invenio.template.load('websearch')
search_results_default_urlargd = websearch_templates.search_results_default_urlargd
search_interface_default_urlargd = websearch_templates.search_interface_default_urlargd
try:
output_formats = [output_format['attrs']['code'].lower() for output_format in \
get_output_formats(with_attributes=True).values()]
except KeyError:
output_formats = ['xd', 'xm', 'hd', 'hb', 'hs', 'hx']
output_formats.extend(['hm', 't', 'h'])
def wash_search_urlargd(form):
"""
Create canonical search arguments from those passed via web form.
"""
argd = wash_urlargd(form, search_results_default_urlargd)
if argd.has_key('as'):
argd['aas'] = argd['as']
del argd['as']
if argd.get('aas', CFG_WEBSEARCH_DEFAULT_SEARCH_INTERFACE) not in CFG_WEBSEARCH_ENABLED_SEARCH_INTERFACES:
argd['aas'] = CFG_WEBSEARCH_DEFAULT_SEARCH_INTERFACE
# Sometimes, users pass ot=245,700 instead of
# ot=245&ot=700. Normalize that.
ots = []
for ot in argd['ot']:
ots += ot.split(',')
argd['ot'] = ots
# We can either get the mode of function as
# action=<browse|search>, or by setting action_browse or
# action_search.
if argd['action_browse']:
argd['action'] = 'browse'
elif argd['action_search']:
argd['action'] = 'search'
else:
if argd['action'] not in ('browse', 'search'):
argd['action'] = 'search'
del argd['action_browse']
del argd['action_search']
if argd['em'] != "":
argd['em'] = argd['em'].split(",")
return argd
class WebInterfaceUnAPIPages(WebInterfaceDirectory):
""" Handle /unapi set of pages."""
_exports = ['']
def __call__(self, req, form):
argd = wash_urlargd(form, {
'id' : (int, 0),
'format' : (str, '')})
formats_dict = get_output_formats(True)
formats = {}
for format in formats_dict.values():
if format['attrs']['visibility']:
formats[format['attrs']['code'].lower()] = format['attrs']['content_type']
del formats_dict
if argd['id'] and argd['format']:
## Translate back common format names
format = {
'nlm' : 'xn',
'marcxml' : 'xm',
'dc' : 'xd',
'endnote' : 'xe',
'mods' : 'xo'
}.get(argd['format'], argd['format'])
if format in formats:
redirect_to_url(req, '%s/%s/%s/export/%s' % (CFG_SITE_URL, CFG_SITE_RECORD, argd['id'], format))
else:
raise apache.SERVER_RETURN, apache.HTTP_NOT_ACCEPTABLE
elif argd['id']:
return websearch_templates.tmpl_unapi(formats, identifier=argd['id'])
else:
return websearch_templates.tmpl_unapi(formats)
index = __call__
class WebInterfaceRecordPages(WebInterfaceDirectory):
""" Handling of a /CFG_SITE_RECORD/<recid> URL fragment """
_exports = ['', 'files', 'reviews', 'comments', 'usage',
'references', 'export', 'citations', 'holdings', 'edit',
'keywords', 'multiedit', 'merge', 'plots', 'linkbacks']
#_exports.extend(output_formats)
def __init__(self, recid, tab, format=None):
self.recid = recid
self.tab = tab
self.format = format
self.files = WebInterfaceFilesPages(self.recid)
self.reviews = WebInterfaceCommentsPages(self.recid, reviews=1)
self.comments = WebInterfaceCommentsPages(self.recid)
self.usage = self
self.references = self
self.keywords = self
self.holdings = WebInterfaceHoldingsPages(self.recid)
self.citations = self
self.plots = self
self.export = WebInterfaceRecordExport(self.recid, self.format)
self.edit = WebInterfaceEditPages(self.recid)
self.merge = WebInterfaceMergePages(self.recid)
self.linkbacks = WebInterfaceRecordLinkbacksPages(self.recid)
return
def __call__(self, req, form):
argd = wash_search_urlargd(form)
argd['recid'] = self.recid
argd['tab'] = self.tab
if self.format is not None:
argd['of'] = self.format
req.argd = argd
uid = getUid(req)
if uid == -1:
return page_not_authorized(req, "../",
text="You are not authorized to view this record.",
navmenuid='search')
elif uid > 0:
pref = get_user_preferences(uid)
try:
if not form.has_key('rg'):
# fetch user rg preference only if not overridden via URL
argd['rg'] = int(pref['websearch_group_records'])
except (KeyError, ValueError):
pass
user_info = collect_user_info(req)
(auth_code, auth_msg) = check_user_can_view_record(user_info, self.recid)
if argd['rg'] > CFG_WEBSEARCH_MAX_RECORDS_IN_GROUPS and acc_authorize_action(req, 'runbibedit')[0] != 0:
argd['rg'] = CFG_WEBSEARCH_MAX_RECORDS_IN_GROUPS
#check if the user has rights to set a high wildcard limit
#if not, reduce the limit set by user, with the default one
if CFG_WEBSEARCH_WILDCARD_LIMIT > 0 and (argd['wl'] > CFG_WEBSEARCH_WILDCARD_LIMIT or argd['wl'] == 0):
if acc_authorize_action(req, 'runbibedit')[0] != 0:
argd['wl'] = CFG_WEBSEARCH_WILDCARD_LIMIT
# only superadmins can use verbose parameter for obtaining debug information
if not isUserSuperAdmin(user_info):
argd['verbose'] = 0
if auth_code and user_info['email'] == 'guest':
cookie = mail_cookie_create_authorize_action(VIEWRESTRCOLL, {'collection' : guess_primary_collection_of_a_record(self.recid)})
target = CFG_SITE_SECURE_URL + '/youraccount/login' + \
make_canonical_urlargd({'action': cookie, 'ln' : argd['ln'], 'referer' : CFG_SITE_SECURE_URL + req.unparsed_uri}, {})
return redirect_to_url(req, target, norobot=True)
elif auth_code:
return page_not_authorized(req, "../", \
text=auth_msg, \
navmenuid='search')
from invenio.search_engine import record_exists, get_merged_recid
# check if the current record has been deleted
# and has been merged, case in which the deleted record
# will be redirect to the new one
record_status = record_exists(argd['recid'])
merged_recid = get_merged_recid(argd['recid'])
if record_status == -1 and merged_recid:
url = CFG_SITE_URL + '/' + CFG_SITE_RECORD + '/%s?ln=%s'
url %= (str(merged_recid), argd['ln'])
redirect_to_url(req, url)
elif record_status == -1:
req.status = apache.HTTP_GONE ## The record is gone!
# mod_python does not like to return [] in case when of=id:
out = perform_request_search(req, **argd)
if isinstance(out, intbitset):
return out.fastdump()
elif out == []:
return str(out)
else:
return out
# Return the same page wether we ask for /CFG_SITE_RECORD/123 or /CFG_SITE_RECORD/123/
index = __call__
class WebInterfaceRecordRestrictedPages(WebInterfaceDirectory):
""" Handling of a /record-restricted/<recid> URL fragment """
_exports = ['', 'files', 'reviews', 'comments', 'usage',
'references', 'export', 'citations', 'holdings', 'edit',
'keywords', 'multiedit', 'merge', 'plots', 'linkbacks']
#_exports.extend(output_formats)
def __init__(self, recid, tab, format=None):
self.recid = recid
self.tab = tab
self.format = format
self.files = WebInterfaceFilesPages(self.recid)
self.reviews = WebInterfaceCommentsPages(self.recid, reviews=1)
self.comments = WebInterfaceCommentsPages(self.recid)
self.usage = self
self.references = self
self.keywords = self
self.holdings = WebInterfaceHoldingsPages(self.recid)
self.citations = self
self.plots = self
self.export = WebInterfaceRecordExport(self.recid, self.format)
self.edit = WebInterfaceEditPages(self.recid)
self.merge = WebInterfaceMergePages(self.recid)
self.linkbacks = WebInterfaceRecordLinkbacksPages(self.recid)
return
def __call__(self, req, form):
argd = wash_search_urlargd(form)
argd['recid'] = self.recid
if self.format is not None:
argd['of'] = self.format
req.argd = argd
uid = getUid(req)
user_info = collect_user_info(req)
if uid == -1:
return page_not_authorized(req, "../",
text="You are not authorized to view this record.",
navmenuid='search')
elif uid > 0:
pref = get_user_preferences(uid)
try:
if not form.has_key('rg'):
# fetch user rg preference only if not overridden via URL
argd['rg'] = int(pref['websearch_group_records'])
except (KeyError, ValueError):
pass
if argd['rg'] > CFG_WEBSEARCH_MAX_RECORDS_IN_GROUPS and acc_authorize_action(req, 'runbibedit')[0] != 0:
argd['rg'] = CFG_WEBSEARCH_MAX_RECORDS_IN_GROUPS
#check if the user has rights to set a high wildcard limit
#if not, reduce the limit set by user, with the default one
if CFG_WEBSEARCH_WILDCARD_LIMIT > 0 and (argd['wl'] > CFG_WEBSEARCH_WILDCARD_LIMIT or argd['wl'] == 0):
if acc_authorize_action(req, 'runbibedit')[0] != 0:
argd['wl'] = CFG_WEBSEARCH_WILDCARD_LIMIT
# only superadmins can use verbose parameter for obtaining debug information
if not isUserSuperAdmin(user_info):
argd['verbose'] = 0
record_primary_collection = guess_primary_collection_of_a_record(self.recid)
if collection_restricted_p(record_primary_collection):
(auth_code, dummy) = acc_authorize_action(user_info, VIEWRESTRCOLL, collection=record_primary_collection)
if auth_code:
return page_not_authorized(req, "../",
text="You are not authorized to view this record.",
navmenuid='search')
# Keep all the arguments, they might be reused in the
# record page itself to derivate other queries
req.argd = argd
# mod_python does not like to return [] in case when of=id:
out = perform_request_search(req, **argd)
if isinstance(out, intbitset):
return out.fastdump()
elif out == []:
return str(out)
else:
return out
# Return the same page wether we ask for /CFG_SITE_RECORD/123 or /CFG_SITE_RECORD/123/
index = __call__
class WebInterfaceSearchResultsPages(WebInterfaceDirectory):
""" Handling of the /search URL and its sub-pages. """
_exports = ['', 'authenticate', 'cache', 'log']
def __call__(self, req, form):
""" Perform a search. """
argd = wash_search_urlargd(form)
_ = gettext_set_language(argd['ln'])
if req.method == 'POST':
raise apache.SERVER_RETURN, apache.HTTP_METHOD_NOT_ALLOWED
uid = getUid(req)
user_info = collect_user_info(req)
if uid == -1:
return page_not_authorized(req, "../",
text=_("You are not authorized to view this area."),
navmenuid='search')
elif uid > 0:
pref = get_user_preferences(uid)
try:
if not form.has_key('rg'):
# fetch user rg preference only if not overridden via URL
argd['rg'] = int(pref['websearch_group_records'])
except (KeyError, ValueError):
pass
if argd['rg'] > CFG_WEBSEARCH_MAX_RECORDS_IN_GROUPS and acc_authorize_action(req, 'runbibedit')[0] != 0:
argd['rg'] = CFG_WEBSEARCH_MAX_RECORDS_IN_GROUPS
involved_collections = set()
involved_collections.update(argd['c'])
involved_collections.add(argd['cc'])
if argd['id'] > 0:
argd['recid'] = argd['id']
if argd['idb'] > 0:
argd['recidb'] = argd['idb']
if argd['sysno']:
tmp_recid = find_record_from_sysno(argd['sysno'])
if tmp_recid:
argd['recid'] = tmp_recid
if argd['sysnb']:
tmp_recid = find_record_from_sysno(argd['sysnb'])
if tmp_recid:
argd['recidb'] = tmp_recid
if argd['recid'] > 0:
if argd['recidb'] > argd['recid']:
# Hack to check if among the restricted collections
# at least a record of the range is there and
# then if the user is not authorized for that
# collection.
recids = intbitset(xrange(argd['recid'], argd['recidb']))
restricted_collection_cache.recreate_cache_if_needed()
for collname in restricted_collection_cache.cache:
(auth_code, auth_msg) = acc_authorize_action(user_info, VIEWRESTRCOLL, collection=collname)
if auth_code and user_info['email'] == 'guest':
coll_recids = get_collection(collname).reclist
if coll_recids & recids:
cookie = mail_cookie_create_authorize_action(VIEWRESTRCOLL, {'collection' : collname})
target = CFG_SITE_SECURE_URL + '/youraccount/login' + \
make_canonical_urlargd({'action': cookie, 'ln' : argd['ln'], 'referer' : CFG_SITE_SECURE_URL + req.unparsed_uri}, {})
return redirect_to_url(req, target, norobot=True)
elif auth_code:
return page_not_authorized(req, "../", \
text=auth_msg, \
navmenuid='search')
else:
involved_collections.add(guess_primary_collection_of_a_record(argd['recid']))
# If any of the collection requires authentication, redirect
# to the authentication form.
for coll in involved_collections:
if collection_restricted_p(coll):
(auth_code, auth_msg) = acc_authorize_action(user_info, VIEWRESTRCOLL, collection=coll)
if auth_code and user_info['email'] == 'guest':
cookie = mail_cookie_create_authorize_action(VIEWRESTRCOLL, {'collection' : coll})
target = CFG_SITE_SECURE_URL + '/youraccount/login' + \
make_canonical_urlargd({'action': cookie, 'ln' : argd['ln'], 'referer' : CFG_SITE_SECURE_URL + req.unparsed_uri}, {})
return redirect_to_url(req, target, norobot=True)
elif auth_code:
return page_not_authorized(req, "../", \
text=auth_msg, \
navmenuid='search')
#check if the user has rights to set a high wildcard limit
#if not, reduce the limit set by user, with the default one
if CFG_WEBSEARCH_WILDCARD_LIMIT > 0 and (argd['wl'] > CFG_WEBSEARCH_WILDCARD_LIMIT or argd['wl'] == 0):
auth_code, auth_message = acc_authorize_action(req, 'runbibedit')
if auth_code != 0:
argd['wl'] = CFG_WEBSEARCH_WILDCARD_LIMIT
# only superadmins can use verbose parameter for obtaining debug information
if not isUserSuperAdmin(user_info):
argd['verbose'] = 0
# Keep all the arguments, they might be reused in the
# search_engine itself to derivate other queries
req.argd = argd
# mod_python does not like to return [] in case when of=id:
out = perform_request_search(req, **argd)
if isinstance(out, intbitset):
return out.fastdump()
elif out == []:
return str(out)
else:
return out
def cache(self, req, form):
"""Search cache page."""
argd = wash_urlargd(form, {'action': (str, 'show')})
return perform_request_cache(req, action=argd['action'])
def log(self, req, form):
"""Search log page."""
argd = wash_urlargd(form, {'date': (str, '')})
return perform_request_log(req, date=argd['date'])
def authenticate(self, req, form):
"""Restricted search results pages."""
argd = wash_search_urlargd(form)
user_info = collect_user_info(req)
for coll in argd['c'] + [argd['cc']]:
if collection_restricted_p(coll):
(auth_code, auth_msg) = acc_authorize_action(user_info, VIEWRESTRCOLL, collection=coll)
if auth_code and user_info['email'] == 'guest':
cookie = mail_cookie_create_authorize_action(VIEWRESTRCOLL, {'collection' : coll})
target = CFG_SITE_SECURE_URL + '/youraccount/login' + \
make_canonical_urlargd({'action': cookie, 'ln' : argd['ln'], 'referer' : CFG_SITE_SECURE_URL + req.unparsed_uri}, {})
return redirect_to_url(req, target, norobot=True)
elif auth_code:
return page_not_authorized(req, "../", \
text=auth_msg, \
navmenuid='search')
#check if the user has rights to set a high wildcard limit
#if not, reduce the limit set by user, with the default one
if CFG_WEBSEARCH_WILDCARD_LIMIT > 0 and (argd['wl'] > CFG_WEBSEARCH_WILDCARD_LIMIT or argd['wl'] == 0):
auth_code, auth_message = acc_authorize_action(req, 'runbibedit')
if auth_code != 0:
argd['wl'] = CFG_WEBSEARCH_WILDCARD_LIMIT
# only superadmins can use verbose parameter for obtaining debug information
if not isUserSuperAdmin(user_info):
argd['verbose'] = 0
# Keep all the arguments, they might be reused in the
# search_engine itself to derivate other queries
req.argd = argd
uid = getUid(req)
if uid > 0:
pref = get_user_preferences(uid)
try:
if not form.has_key('rg'):
# fetch user rg preference only if not overridden via URL
argd['rg'] = int(pref['websearch_group_records'])
except (KeyError, ValueError):
pass
# mod_python does not like to return [] in case when of=id:
out = perform_request_search(req, **argd)
if isinstance(out, intbitset):
return out.fastdump()
elif out == []:
return str(out)
else:
return out
index = __call__
class WebInterfaceLegacySearchPages(WebInterfaceDirectory):
""" Handling of the /search.py URL and its sub-pages. """
_exports = ['', ('authenticate', 'index')]
def __call__(self, req, form):
""" Perform a search. """
argd = wash_search_urlargd(form)
# We either jump into the generic search form, or the specific
# /CFG_SITE_RECORD/... display if a recid is requested
if argd['recid'] != -1:
target = '/%s/%d' % (CFG_SITE_RECORD, argd['recid'])
del argd['recid']
else:
target = '/search'
target += make_canonical_urlargd(argd, search_results_default_urlargd)
return redirect_to_url(req, target, apache.HTTP_MOVED_PERMANENTLY)
index = __call__
# Parameters for the legacy URLs, of the form /?c=ALEPH
legacy_collection_default_urlargd = {
'as': (int, CFG_WEBSEARCH_DEFAULT_SEARCH_INTERFACE),
'aas': (int, CFG_WEBSEARCH_DEFAULT_SEARCH_INTERFACE),
'verbose': (int, 0),
'c': (str, CFG_SITE_NAME)}
class WebInterfaceSearchInterfacePages(WebInterfaceDirectory):
""" Handling of collection navigation."""
_exports = [('index.py', 'legacy_collection'),
('', 'legacy_collection'),
('search.py', 'legacy_search'),
'search', 'openurl',
'opensearchdescription', 'logout_SSO_hook']
search = WebInterfaceSearchResultsPages()
legacy_search = WebInterfaceLegacySearchPages()
def logout_SSO_hook(self, req, form):
"""Script triggered by the display of the centralized SSO logout
dialog. It logouts the user from Invenio and stream back the
expected picture."""
logoutUser(req)
req.content_type = 'image/gif'
req.encoding = None
req.filename = 'wsignout.gif'
req.headers_out["Content-Disposition"] = "inline; filename=wsignout.gif"
req.set_content_length(os.path.getsize('%s/img/wsignout.gif' % CFG_WEBDIR))
req.send_http_header()
req.sendfile('%s/img/wsignout.gif' % CFG_WEBDIR)
def _lookup(self, component, path):
""" This handler is invoked for the dynamic URLs (for
collections and records)"""
if component == 'collection':
c = '/'.join(path)
def answer(req, form):
"""Accessing collections cached pages."""
# Accessing collections: this is for accessing the
# cached page on top of each collection.
argd = wash_urlargd(form, search_interface_default_urlargd)
# We simply return the cached page of the collection
argd['c'] = c
if not argd['c']:
# collection argument not present; display
# home collection by default
argd['c'] = CFG_SITE_NAME
# Treat `as' argument specially:
if argd.has_key('as'):
argd['aas'] = argd['as']
del argd['as']
if argd.get('aas', CFG_WEBSEARCH_DEFAULT_SEARCH_INTERFACE) not in CFG_WEBSEARCH_ENABLED_SEARCH_INTERFACES:
argd['aas'] = CFG_WEBSEARCH_DEFAULT_SEARCH_INTERFACE
return display_collection(req, **argd)
return answer, []
elif component == CFG_SITE_RECORD and path and path[0] == 'merge':
return WebInterfaceMergePages(), path[1:]
elif component == CFG_SITE_RECORD and path and path[0] == 'edit':
return WebInterfaceEditPages(), path[1:]
elif component == CFG_SITE_RECORD and path and path[0] == 'multiedit':
return WebInterfaceMultiEditPages(), path[1:]
elif component == CFG_SITE_RECORD and path and path[0] in ('managedocfiles', 'managedocfilesasync'):
return WebInterfaceManageDocFilesPages(), path
elif component == CFG_SITE_RECORD or component == 'record-restricted':
try:
if CFG_WEBSEARCH_USE_ALEPH_SYSNOS:
# let us try to recognize /<CFG_SITE_RECORD>/<SYSNO> style of URLs:
# check for SYSNOs with an embedded slash; needed for [ARXIVINV-15]
if len(path) > 1 and get_mysql_recid_from_aleph_sysno(path[0] + "/" + path[1]):
path[0] = path[0] + "/" + path[1]
del path[1]
x = get_mysql_recid_from_aleph_sysno(path[0])
if x:
recid = x
else:
recid = int(path[0])
else:
recid = int(path[0])
except IndexError:
# display record #1 for URL /CFG_SITE_RECORD without a number
recid = 1
except ValueError:
if path[0] == '':
# display record #1 for URL /CFG_SITE_RECORD/ without a number
recid = 1
else:
# display page not found for URLs like /CFG_SITE_RECORD/foo
return None, []
from invenio.intbitset import __maxelem__
if recid <= 0 or recid > __maxelem__:
# __maxelem__ = 2147483647
# display page not found for URLs like /CFG_SITE_RECORD/-5 or /CFG_SITE_RECORD/0 or /CFG_SITE_RECORD/2147483649
return None, []
format = None
tab = ''
try:
if path[1] in ['', 'files', 'reviews', 'comments', 'usage',
'references', 'citations', 'holdings', 'edit',
'keywords', 'multiedit', 'merge', 'plots', 'linkbacks']:
tab = path[1]
elif path[1] == 'export':
tab = ''
format = path[2]
# format = None
# elif path[1] in output_formats:
# tab = ''
# format = path[1]
else:
# display page not found for URLs like /CFG_SITE_RECORD/references
# for a collection where 'references' tabs is not visible
return None, []
except IndexError:
# Keep normal url if tabs is not specified
pass
#if component == 'record-restricted':
#return WebInterfaceRecordRestrictedPages(recid, tab, format), path[1:]
#else:
return WebInterfaceRecordPages(recid, tab, format), path[1:]
elif component == 'sslredirect':
## Fallback solution for sslredirect special path that should
## be rather implemented as an Apache level redirection
def redirecter(req, form):
real_url = "http://" + '/'.join(path)
redirect_to_url(req, real_url)
return redirecter, []
return None, []
def openurl(self, req, form):
""" OpenURL Handler."""
argd = wash_urlargd(form, websearch_templates.tmpl_openurl_accepted_args)
ret_url = websearch_templates.tmpl_openurl2invenio(argd)
if ret_url:
return redirect_to_url(req, ret_url)
else:
return redirect_to_url(req, CFG_SITE_URL)
def opensearchdescription(self, req, form):
"""OpenSearch description file"""
req.content_type = "application/opensearchdescription+xml"
req.send_http_header()
argd = wash_urlargd(form, {'ln': (str, CFG_SITE_LANG),
'verbose': (int, 0) })
return websearch_templates.tmpl_opensearch_description(ln=argd['ln'])
def legacy_collection(self, req, form):
"""Collection URL backward compatibility handling."""
accepted_args = dict(legacy_collection_default_urlargd)
argd = wash_urlargd(form, accepted_args)
# Treat `as' argument specially:
if argd.has_key('as'):
argd['aas'] = argd['as']
del argd['as']
if argd.get('aas', CFG_WEBSEARCH_DEFAULT_SEARCH_INTERFACE) not in (0, 1):
argd['aas'] = CFG_WEBSEARCH_DEFAULT_SEARCH_INTERFACE
# If we specify no collection, then we don't need to redirect
# the user, so that accessing <http://yoursite/> returns the
# default collection.
if not form.has_key('c'):
return display_collection(req, **argd)
# make the collection an element of the path, and keep the
# other query elements as is. If the collection is CFG_SITE_NAME,
# however, redirect to the main URL.
c = argd['c']
del argd['c']
if c == CFG_SITE_NAME:
target = '/'
else:
target = '/collection/' + quote(c)
# Treat `as' argument specially:
# We are going to redirect, so replace `aas' by `as' visible argument:
if argd.has_key('aas'):
argd['as'] = argd['aas']
del argd['aas']
target += make_canonical_urlargd(argd, legacy_collection_default_urlargd)
return redirect_to_url(req, target)
def display_collection(req, c, aas, verbose, ln, em=""):
"""Display search interface page for collection c by looking
in the collection cache."""
_ = gettext_set_language(ln)
req.argd = drop_default_urlargd({'aas': aas, 'verbose': verbose, 'ln': ln, 'em' : em},
search_interface_default_urlargd)
if em != "":
em = em.split(",")
# get user ID:
try:
uid = getUid(req)
user_preferences = {}
if uid == -1:
return page_not_authorized(req, "../",
text="You are not authorized to view this collection",
navmenuid='search')
elif uid > 0:
user_preferences = get_user_preferences(uid)
except Error:
register_exception(req=req, alert_admin=True)
return page(title=_("Internal Error"),
body=create_error_box(req, verbose=verbose, ln=ln),
description="%s - Internal Error" % CFG_SITE_NAME,
keywords="%s, Internal Error" % CFG_SITE_NAME,
language=ln,
req=req,
navmenuid='search')
# start display:
req.content_type = "text/html"
req.send_http_header()
# deduce collection id:
colID = get_colID(get_coll_normalised_name(c))
if type(colID) is not int:
page_body = '<p>' + (_("Sorry, collection %s does not seem to exist.") % ('<strong>' + str(c) + '</strong>')) + '</p>'
page_body = '<p>' + (_("You may want to start browsing from %s.") % ('<a href="' + CFG_SITE_URL + '?ln=' + ln + '">' + get_coll_i18nname(CFG_SITE_NAME, ln) + '</a>')) + '</p>'
if req.method == 'HEAD':
raise apache.SERVER_RETURN, apache.HTTP_NOT_FOUND
return page(title=_("Collection %s Not Found") % cgi.escape(c),
body=page_body,
description=(CFG_SITE_NAME + ' - ' + _("Not found") + ': ' + cgi.escape(str(c))),
keywords="%s" % CFG_SITE_NAME,
uid=uid,
language=ln,
req=req,
navmenuid='search')
c_body, c_navtrail, c_portalbox_lt, c_portalbox_rt, c_portalbox_tp, c_portalbox_te, \
c_last_updated = perform_display_collection(colID, c, aas, ln, em,
user_preferences.get('websearch_helpbox', 1))
if em == "" or EM_REPOSITORY["body"] in em:
try:
title = get_coll_i18nname(c, ln)
except:
title = ""
else:
title = ""
show_title_p = True
body_css_classes = []
if c == CFG_SITE_NAME:
# Do not display title on home collection
show_title_p = False
body_css_classes.append('home')
if len(collection_reclist_cache.cache.keys()) == 1:
# if there is only one collection defined, do not print its
# title on the page as it would be displayed repetitively.
show_title_p = False
if aas == -1:
show_title_p = False
if CFG_INSPIRE_SITE == 1:
# INSPIRE should never show title, but instead use css to
# style collections
show_title_p = False
body_css_classes.append(nmtoken_from_string(c))
# RSS:
rssurl = CFG_SITE_URL + '/rss'
rssurl_params = []
if c != CFG_SITE_NAME:
rssurl_params.append('cc=' + quote(c))
if ln != CFG_SITE_LANG and \
c in CFG_WEBSEARCH_RSS_I18N_COLLECTIONS:
rssurl_params.append('ln=' + ln)
if rssurl_params:
rssurl += '?' + '&amp;'.join(rssurl_params)
if 'hb' in CFG_WEBSEARCH_USE_MATHJAX_FOR_FORMATS:
metaheaderadd = get_mathjax_header(req.is_https())
else:
metaheaderadd = ''
return page(title=title,
body=c_body,
navtrail=c_navtrail,
description="%s - %s" % (CFG_SITE_NAME, c),
keywords="%s, %s" % (CFG_SITE_NAME, c),
metaheaderadd=metaheaderadd,
uid=uid,
language=ln,
req=req,
cdspageboxlefttopadd=c_portalbox_lt,
cdspageboxrighttopadd=c_portalbox_rt,
titleprologue=c_portalbox_tp,
titleepilogue=c_portalbox_te,
lastupdated=c_last_updated,
navmenuid='search',
rssurl=rssurl,
body_css_classes=body_css_classes,
show_title_p=show_title_p,
show_header=em == "" or EM_REPOSITORY["header"] in em,
show_footer=em == "" or EM_REPOSITORY["footer"] in em)
class WebInterfaceRSSFeedServicePages(WebInterfaceDirectory):
"""RSS 2.0 feed service pages."""
def __call__(self, req, form):
"""RSS 2.0 feed service."""
# Keep only interesting parameters for the search
default_params = websearch_templates.rss_default_urlargd
# We need to keep 'jrec' and 'rg' here in order to have
# 'multi-page' RSS. These parameters are not kept be default
# as we don't want to consider them when building RSS links
# from search and browse pages.
default_params.update({'jrec':(int, 1),
'rg': (int, CFG_WEBSEARCH_INSTANT_BROWSE_RSS)})
argd = wash_urlargd(form, default_params)
user_info = collect_user_info(req)
for coll in argd['c'] + [argd['cc']]:
if collection_restricted_p(coll):
(auth_code, auth_msg) = acc_authorize_action(user_info, VIEWRESTRCOLL, collection=coll)
if auth_code and user_info['email'] == 'guest':
cookie = mail_cookie_create_authorize_action(VIEWRESTRCOLL, {'collection' : coll})
target = CFG_SITE_SECURE_URL + '/youraccount/login' + \
make_canonical_urlargd({'action': cookie, 'ln' : argd['ln'], 'referer' : CFG_SITE_SECURE_URL + req.unparsed_uri}, {})
return redirect_to_url(req, target, norobot=True)
elif auth_code:
return page_not_authorized(req, "../", \
text=auth_msg, \
navmenuid='search')
# Create a standard filename with these parameters
current_url = websearch_templates.build_rss_url(argd)
cache_filename = current_url.split('/')[-1]
# In the same way as previously, add 'jrec' & 'rg'
req.content_type = "application/rss+xml"
req.send_http_header()
try:
# Try to read from cache
path = "%s/rss/%s.xml" % (CFG_CACHEDIR, cache_filename)
# Check if cache needs refresh
filedesc = open(path, "r")
last_update_time = datetime.datetime.fromtimestamp(os.stat(os.path.abspath(path)).st_mtime)
assert(datetime.datetime.now() < last_update_time + datetime.timedelta(minutes=CFG_WEBSEARCH_RSS_TTL))
c_rss = filedesc.read()
filedesc.close()
req.write(c_rss)
return
except Exception, e:
# do it live and cache
previous_url = None
if argd['jrec'] > 1:
prev_jrec = argd['jrec'] - argd['rg']
if prev_jrec < 1:
prev_jrec = 1
previous_url = websearch_templates.build_rss_url(argd,
jrec=prev_jrec)
#check if the user has rights to set a high wildcard limit
#if not, reduce the limit set by user, with the default one
if CFG_WEBSEARCH_WILDCARD_LIMIT > 0 and (argd['wl'] > CFG_WEBSEARCH_WILDCARD_LIMIT or argd['wl'] == 0):
if acc_authorize_action(req, 'runbibedit')[0] != 0:
argd['wl'] = CFG_WEBSEARCH_WILDCARD_LIMIT
req.argd = argd
recIDs = perform_request_search(req, of="id",
c=argd['c'], cc=argd['cc'],
p=argd['p'], f=argd['f'],
p1=argd['p1'], f1=argd['f1'],
m1=argd['m1'], op1=argd['op1'],
p2=argd['p2'], f2=argd['f2'],
m2=argd['m2'], op2=argd['op2'],
p3=argd['p3'], f3=argd['f3'],
m3=argd['m3'], wl=argd['wl'])
nb_found = len(recIDs)
next_url = None
if len(recIDs) >= argd['jrec'] + argd['rg']:
next_url = websearch_templates.build_rss_url(argd,
jrec=(argd['jrec'] + argd['rg']))
first_url = websearch_templates.build_rss_url(argd, jrec=1)
last_url = websearch_templates.build_rss_url(argd, jrec=nb_found - argd['rg'] + 1)
recIDs = recIDs[-argd['jrec']:(-argd['rg'] - argd['jrec']):-1]
rss_prologue = '<?xml version="1.0" encoding="UTF-8"?>\n' + \
websearch_templates.tmpl_xml_rss_prologue(current_url=current_url,
previous_url=previous_url,
next_url=next_url,
first_url=first_url, last_url=last_url,
nb_found=nb_found,
jrec=argd['jrec'], rg=argd['rg'],
cc=argd['cc']) + '\n'
req.write(rss_prologue)
rss_body = format_records(recIDs,
of='xr',
ln=argd['ln'],
user_info=user_info,
record_separator="\n",
req=req, epilogue="\n")
rss_epilogue = websearch_templates.tmpl_xml_rss_epilogue() + '\n'
req.write(rss_epilogue)
# update cache
dirname = "%s/rss" % (CFG_CACHEDIR)
mymkdir(dirname)
fullfilename = "%s/rss/%s.xml" % (CFG_CACHEDIR, cache_filename)
try:
# Remove the file just in case it already existed
# so that a bit of space is created
os.remove(fullfilename)
except OSError:
pass
# Check if there's enough space to cache the request.
if len(os.listdir(dirname)) < CFG_WEBSEARCH_RSS_MAX_CACHED_REQUESTS:
try:
os.umask(022)
f = open(fullfilename, "w")
f.write(rss_prologue + rss_body + rss_epilogue)
f.close()
except IOError, v:
if v[0] == 36:
# URL was too long. Never mind, don't cache
pass
else:
raise repr(v)
index = __call__
class WebInterfaceRecordExport(WebInterfaceDirectory):
""" Handling of a /<CFG_SITE_RECORD>/<recid>/export/<format> URL fragment """
_exports = output_formats
def __init__(self, recid, format=None):
self.recid = recid
self.format = format
for output_format in output_formats:
self.__dict__[output_format] = self
return
def __call__(self, req, form):
argd = wash_search_urlargd(form)
argd['recid'] = self.recid
if self.format is not None:
argd['of'] = self.format
req.argd = argd
uid = getUid(req)
if uid == -1:
return page_not_authorized(req, "../",
text="You are not authorized to view this record.",
navmenuid='search')
elif uid > 0:
pref = get_user_preferences(uid)
try:
if not form.has_key('rg'):
# fetch user rg preference only if not overridden via URL
argd['rg'] = int(pref['websearch_group_records'])
except (KeyError, ValueError):
pass
# Check if the record belongs to a restricted primary
# collection. If yes, redirect to the authenticated URL.
user_info = collect_user_info(req)
(auth_code, auth_msg) = check_user_can_view_record(user_info, self.recid)
if argd['rg'] > CFG_WEBSEARCH_MAX_RECORDS_IN_GROUPS and acc_authorize_action(req, 'runbibedit')[0] != 0:
argd['rg'] = CFG_WEBSEARCH_MAX_RECORDS_IN_GROUPS
#check if the user has rights to set a high wildcard limit
#if not, reduce the limit set by user, with the default one
if CFG_WEBSEARCH_WILDCARD_LIMIT > 0 and (argd['wl'] > CFG_WEBSEARCH_WILDCARD_LIMIT or argd['wl'] == 0):
if acc_authorize_action(req, 'runbibedit')[0] != 0:
argd['wl'] = CFG_WEBSEARCH_WILDCARD_LIMIT
# only superadmins can use verbose parameter for obtaining debug information
if not isUserSuperAdmin(user_info):
argd['verbose'] = 0
if auth_code and user_info['email'] == 'guest':
cookie = mail_cookie_create_authorize_action(VIEWRESTRCOLL, {'collection' : guess_primary_collection_of_a_record(self.recid)})
target = CFG_SITE_SECURE_URL + '/youraccount/login' + \
make_canonical_urlargd({'action': cookie, 'ln' : argd['ln'], 'referer' : CFG_SITE_SECURE_URL + req.unparsed_uri}, {})
return redirect_to_url(req, target, norobot=True)
elif auth_code:
return page_not_authorized(req, "../", \
text=auth_msg, \
navmenuid='search')
# mod_python does not like to return [] in case when of=id:
out = perform_request_search(req, **argd)
if isinstance(out, intbitset):
return out.fastdump()
elif out == []:
return str(out)
else:
return out
# Return the same page wether we ask for /CFG_SITE_RECORD/123/export/xm or /CFG_SITE_RECORD/123/export/xm/
index = __call__
diff --git a/invenio/legacy/websession/webgroup.py b/invenio/legacy/websession/webgroup.py
index a265b0143..b24733f91 100644
--- a/invenio/legacy/websession/webgroup.py
+++ b/invenio/legacy/websession/webgroup.py
@@ -1,932 +1,932 @@
## This file is part of Invenio.
## Copyright (C) 2006, 2007, 2008, 2009, 2010, 2011 CERN.
##
## Invenio is free software; you can redistribute it and/or
## modify it under the terms of the GNU General Public License as
## published by the Free Software Foundation; either version 2 of the
## License, or (at your option) any later version.
##
## Invenio is distributed in the hope that it will be useful, but
## WITHOUT ANY WARRANTY; without even the implied warranty of
## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
## General Public License for more details.
##
## You should have received a copy of the GNU General Public License
## along with Invenio; if not, write to the Free Software Foundation, Inc.,
## 59 Temple Place, Suite 330, Boston, MA 02111-1307, USA.
"""Group features."""
__revision__ = "$Id$"
import sys
from invenio.config import CFG_SITE_LANG
from invenio.base.i18n import gettext_set_language
from invenio.websession_config import CFG_WEBSESSION_INFO_MESSAGES, \
CFG_WEBSESSION_USERGROUP_STATUS, \
CFG_WEBSESSION_GROUP_JOIN_POLICY, \
InvenioWebSessionError, \
InvenioWebSessionWarning
from invenio.webuser import nickname_valid_p, get_user_info
from invenio.webmessage import perform_request_send
-from invenio.errorlib import register_exception
+from invenio.ext.logging import register_exception
import invenio.webgroup_dblayer as db
from invenio.dbquery import IntegrityError
try:
import invenio.template
websession_templates = invenio.template.load('websession')
except ImportError:
pass
if sys.hexversion < 0x2040000:
# pylint: disable=W0622
from sets import Set as set
# pylint: enable=W0622
def perform_request_groups_display(uid, infos=[], warnings = [], \
ln=CFG_SITE_LANG):
"""Display all the groups the user belongs to.
@param uid: user id
@param info: info about last user action
@param ln: language
@return: body with warnings
"""
_ = gettext_set_language(ln)
body = ""
body_admin = display_admin_groups(uid, ln)
body_member = display_member_groups(uid, ln)
body_external = display_external_groups(uid, ln)
body = websession_templates.tmpl_display_all_groups(infos=infos,
admin_group_html=body_admin,
member_group_html=body_member,
external_group_html=body_external,
warnings=warnings,
ln=ln)
return body
def display_admin_groups(uid, ln=CFG_SITE_LANG):
"""Display groups the user is admin of.
@param uid: user id
@param ln: language
@return: body
return html groups representation the user is admin of
"""
body = ""
record = db.get_groups_by_user_status(uid=uid,
user_status=CFG_WEBSESSION_USERGROUP_STATUS["ADMIN"])
body = websession_templates.tmpl_display_admin_groups(groups=record,
ln=ln)
return body
def display_member_groups(uid, ln=CFG_SITE_LANG):
"""Display groups the user is member of.
@param uid: user id
@param ln: language
@return: body
body : html groups representation the user is member of
"""
body = ""
records = db.get_groups_by_user_status(uid,
user_status=CFG_WEBSESSION_USERGROUP_STATUS["MEMBER"] )
body = websession_templates.tmpl_display_member_groups(groups=records,
ln=ln)
return body
def display_external_groups(uid, ln=CFG_SITE_LANG):
"""Display groups the user is admin of.
@param uid: user id
@param ln: language
@return: body
return html groups representation the user is admin of
"""
body = ""
record = db.get_external_groups(uid)
if record:
body = websession_templates.tmpl_display_external_groups(groups=record,
ln=ln)
else:
body = None
return body
def perform_request_input_create_group(group_name,
group_description,
join_policy,
warnings=[],
ln=CFG_SITE_LANG):
"""Display form for creating new group.
@param group_name: name of the group entered if the page has been reloaded
@param group_description: description entered if the page has been reloaded
@param join_policy: join policy chosen if the page has been reloaded
@param warnings: warnings
@param ln: language
@return: body with warnings
body: html for group creation page
"""
body = ""
body = websession_templates.tmpl_display_input_group_info(group_name,
group_description,
join_policy,
act_type="create",
warnings=warnings,
ln=ln)
return body
def perform_request_create_group(uid,
group_name,
group_description,
join_policy,
ln=CFG_SITE_LANG):
"""Create new group.
@param group_name: name of the group entered
@param group_description: description of the group entered
@param join_policy: join policy of the group entered
@param ln: language
@return: body with warnings
warning != [] if group_name or join_policy are not valid
or if the name already exists in the database
body="1" if succeed in order to display info on the main page
"""
_ = gettext_set_language(ln)
body = ""
warnings = []
infos = []
if group_name == "":
try:
raise InvenioWebSessionWarning(_('Please enter a group name.'))
except InvenioWebSessionWarning, exc:
register_exception(stream='warning')
warnings.append(exc.message)
body = perform_request_input_create_group(group_name,
group_description,
join_policy,
warnings=warnings)
elif not group_name_valid_p(group_name):
try:
raise InvenioWebSessionWarning(_('Please enter a valid group name.'))
except InvenioWebSessionWarning, exc:
register_exception(stream='warning')
warnings.append(exc.message)
body = perform_request_input_create_group(group_name,
group_description,
join_policy,
warnings=warnings)
elif join_policy=="-1":
try:
raise InvenioWebSessionWarning(_('Please choose a group join policy.'))
except InvenioWebSessionWarning, exc:
register_exception(stream='warning')
warnings.append(exc.message)
body = perform_request_input_create_group(group_name,
group_description,
join_policy,
warnings=warnings)
elif db.group_name_exist(group_name):
try:
raise InvenioWebSessionWarning(_('Group name already exists. Please choose another group name.'))
except InvenioWebSessionWarning, exc:
register_exception(stream='warning')
warnings.append(exc.message)
body = perform_request_input_create_group(group_name,
group_description,
join_policy,
warnings=warnings)
else:
db.insert_new_group(uid,
group_name,
group_description,
join_policy)
infos.append(CFG_WEBSESSION_INFO_MESSAGES["GROUP_CREATED"])
body = perform_request_groups_display(uid,
infos=infos,
warnings=warnings,
ln=ln)
return body
def perform_request_input_join_group(uid,
group_name,
search,
warnings=[],
ln=CFG_SITE_LANG):
"""Return html for joining new group.
@param group_name: name of the group entered if user is looking for a group
@param search=1 if search performed else 0
@param warnings: warnings coming from perform_request_join_group
@param ln: language
@return: body with warnings
"""
group_from_search = {}
records = db.get_visible_group_list(uid=uid)
if search:
group_from_search = db.get_visible_group_list(uid, group_name)
body = websession_templates.tmpl_display_input_join_group(records.items(),
group_name,
group_from_search.items(),
search,
warnings=warnings,
ln=ln)
return body
def perform_request_join_group(uid,
grpID,
group_name,
search,
ln=CFG_SITE_LANG):
"""Join group.
@param grpID: list of the groups the user wants to join,
only one value must be selected among the two group lists
(default group list, group list resulting from the search)
@param group_name: name of the group entered if search on name performed
@param search=1 if search performed else 0
@param ln: language
@return: body with warnings
warnings != [] if 0 or more than one group is selected
"""
_ = gettext_set_language(ln)
body = ""
warnings = []
infos = []
if "-1" in grpID:
grpID = filter(lambda x: x != '-1', grpID)
if len(grpID)==1 :
grpID = int(grpID[0])
# test if user is already member or pending
status = db.get_user_status(uid, grpID)
if status:
try:
raise InvenioWebSessionWarning(_('You are already member of the group.'))
except InvenioWebSessionWarning, exc:
register_exception(stream='warning')
warnings.append(exc.message)
body = perform_request_groups_display(uid,
infos=infos,
warnings=warnings,
ln=ln)
# insert new user of group
else:
group_infos = db.get_group_infos(grpID)
group_type = group_infos[0][3]
if group_type == CFG_WEBSESSION_GROUP_JOIN_POLICY["VISIBLEMAIL"]:
db.insert_new_member(uid,
grpID,
CFG_WEBSESSION_USERGROUP_STATUS["PENDING"])
admin = db.get_users_by_status(grpID,
CFG_WEBSESSION_USERGROUP_STATUS["ADMIN"])[0][1]
group_name = group_infos[0][1]
msg_subjet, msg_body = websession_templates.tmpl_admin_msg(
group_name=group_name,
grpID=grpID,
ln=ln)
(body, dummy, dummy) = \
perform_request_send(uid,
msg_to_user=admin,
msg_to_group="",
msg_subject=msg_subjet,
msg_body=msg_body,
ln=ln)
infos.append(CFG_WEBSESSION_INFO_MESSAGES["JOIN_REQUEST"])
elif group_type == CFG_WEBSESSION_GROUP_JOIN_POLICY["VISIBLEOPEN"]:
db.insert_new_member(uid,
grpID,
CFG_WEBSESSION_USERGROUP_STATUS["MEMBER"])
infos.append(CFG_WEBSESSION_INFO_MESSAGES["JOIN_GROUP"])
body = perform_request_groups_display(uid,
infos=infos,
warnings=warnings,
ln=ln)
else:
try:
raise InvenioWebSessionWarning(_('Please select only one group.'))
except InvenioWebSessionWarning, exc:
register_exception(stream='warning')
warnings.append(exc.message)
body = perform_request_input_join_group(uid,
group_name,
search,
warnings,
ln)
return body
def perform_request_input_leave_group(uid,
warnings=[],
ln=CFG_SITE_LANG):
"""Return html for leaving group.
@param uid: user ID
@param warnings: warnings != [] if 0 group is selected or if not admin
of the
@param ln: language
@return: body with warnings
"""
body = ""
groups = []
records = db.get_groups_by_user_status(uid=uid,
user_status=CFG_WEBSESSION_USERGROUP_STATUS["MEMBER"])
map(lambda x: groups.append((x[0], x[1])), records)
body = websession_templates.tmpl_display_input_leave_group(groups,
warnings=warnings,
ln=ln)
return body
def perform_request_leave_group(uid, grpID, confirmed=0, ln=CFG_SITE_LANG):
"""Leave group.
@param uid: user ID
@param grpID: ID of the group the user wants to leave
@param warnings: warnings != [] if 0 group is selected
@param confirmed: a confirmed page is first displayed
@param ln: language
@return: body with warnings
"""
_ = gettext_set_language(ln)
body = ""
warnings = []
infos = []
if not grpID == -1:
if confirmed:
db.leave_group(grpID, uid)
infos.append(CFG_WEBSESSION_INFO_MESSAGES["LEAVE_GROUP"])
body = perform_request_groups_display(uid,
infos=infos,
warnings=warnings,
ln=ln)
else:
body = websession_templates.tmpl_confirm_leave(uid, grpID, ln)
else:
try:
raise InvenioWebSessionWarning(_('Please select one group.'))
except InvenioWebSessionWarning, exc:
register_exception(stream='warning')
warnings.append(exc.message)
body = perform_request_input_leave_group(uid,
warnings= warnings,
ln=ln)
return body
def perform_request_edit_group(uid,
grpID,
warnings=[],
ln=CFG_SITE_LANG):
"""Return html for group editing.
@param uid: user ID
@param grpID: ID of the group
@param warnings: warnings
@param ln: language
@return: body with warnings
"""
body = ""
_ = gettext_set_language(ln)
user_status = db.get_user_status(uid, grpID)
if not len(user_status):
try:
raise InvenioWebSessionError(_('Sorry, there was an error with the database.'))
except InvenioWebSessionError, exc:
register_exception()
body = websession_templates.tmpl_error(exc.message, ln)
return body
elif user_status[0][0] != CFG_WEBSESSION_USERGROUP_STATUS['ADMIN']:
try:
raise InvenioWebSessionError(_('Sorry, you do not have sufficient rights on this group.'))
except InvenioWebSessionError, exc:
register_exception()
body = websession_templates.tmpl_error(exc.message, ln)
return body
group_infos = db.get_group_infos(grpID)[0]
if not len(group_infos):
try:
raise InvenioWebSessionError(_('Sorry, there was an error with the database.'))
except InvenioWebSessionError, exc:
register_exception()
body = websession_templates.tmpl_error(exc.message, ln)
return body
body = websession_templates.tmpl_display_input_group_info(
group_name=group_infos[1],
group_description=group_infos[2],
join_policy=group_infos[3],
act_type="update",
grpID=grpID,
warnings=warnings,
ln=ln)
return body
def perform_request_update_group(uid, grpID, group_name, group_description,
join_policy, ln=CFG_SITE_LANG):
"""Update group datas in database.
@param uid: user ID
@param grpID: ID of the group
@param group_name: name of the group
@param group_description: description of the group
@param join_policy: join policy of the group
@param ln: language
@return: body with warnings
"""
body = ''
warnings = []
infos = []
_ = gettext_set_language(ln)
group_name_available = db.group_name_exist(group_name)
if group_name == "":
try:
raise InvenioWebSessionWarning(_('Please enter a group name.'))
except InvenioWebSessionWarning, exc:
register_exception(stream='warning')
warnings.append(exc.message)
body = perform_request_edit_group(uid,
grpID,
warnings=warnings,
ln=ln)
elif not group_name_valid_p(group_name):
try:
raise InvenioWebSessionWarning(_('Please enter a valid group name.'))
except InvenioWebSessionWarning, exc:
register_exception(stream='warning')
warnings.append(exc.message)
body = perform_request_edit_group(uid,
grpID,
warnings=warnings,
ln=ln)
elif join_policy == "-1":
try:
raise InvenioWebSessionWarning(_('Please choose a group join policy.'))
except InvenioWebSessionWarning, exc:
register_exception(stream='warning')
warnings.append(exc.message)
body = perform_request_edit_group(uid,
grpID,
warnings=warnings,
ln=ln)
elif (group_name_available and group_name_available[0][0]!= grpID):
try:
raise InvenioWebSessionWarning(_('Group name already exists. Please choose another group name.'))
except InvenioWebSessionWarning, exc:
register_exception(stream='warning')
warnings.append(exc.message)
body = perform_request_edit_group(uid,
grpID,
warnings=warnings,
ln=ln)
else:
grpID = db.update_group_infos(grpID,
group_name,
group_description,
join_policy)
infos.append(CFG_WEBSESSION_INFO_MESSAGES["GROUP_UPDATED"])
body = perform_request_groups_display(uid,
infos=infos,
warnings=warnings,
ln=CFG_SITE_LANG)
return body
def perform_request_delete_group(uid, grpID, confirmed=0, ln=CFG_SITE_LANG):
"""First display confirm message(confirmed=0).
then(confirmed=1) delete group and all its members
@param uid: user ID
@param grpID: ID of the group
@param confirmed: =1 if confirmed message has been previously displayed
@param ln: language
@return: body with warnings
"""
body = ""
warnings = []
infos = []
_ = gettext_set_language(ln)
group_infos = db.get_group_infos(grpID)
user_status = db.get_user_status(uid, grpID)
if not group_infos:
try:
raise InvenioWebSessionWarning(_('The group has already been deleted.'))
except InvenioWebSessionWarning, exc:
register_exception(stream='warning')
warnings.append(exc.message)
body = perform_request_groups_display(uid,
infos=infos,
warnings=warnings,
ln=CFG_SITE_LANG)
else:
if not len(user_status):
try:
raise InvenioWebSessionError(_('Sorry, there was an error with the database.'))
except InvenioWebSessionError, exc:
register_exception()
body = websession_templates.tmpl_error(exc.message, ln)
return body
elif confirmed:
group_infos = db.get_group_infos(grpID)
group_name = group_infos[0][1]
msg_subjet, msg_body = websession_templates.tmpl_delete_msg(
group_name=group_name, ln=ln)
(body, dummy, dummy) = perform_request_send(
uid,
msg_to_user="",
msg_to_group=group_name,
msg_subject=msg_subjet,
msg_body=msg_body,
ln=ln)
db.delete_group_and_members(grpID)
infos.append(CFG_WEBSESSION_INFO_MESSAGES["GROUP_DELETED"])
body = perform_request_groups_display(uid,
infos=infos,
warnings=warnings,
ln=CFG_SITE_LANG)
else:
body = websession_templates.tmpl_confirm_delete(grpID, ln)
return body
def perform_request_manage_member(uid,
grpID,
infos=[],
warnings=[],
ln=CFG_SITE_LANG):
"""Return html for managing group's members.
@param uid: user ID
@param grpID: ID of the group
@param info: info about last user action
@param warnings: warnings
@param ln: language
@return: body with warnings
"""
body = ''
_ = gettext_set_language(ln)
user_status = db.get_user_status(uid, grpID)
if not len(user_status):
try:
raise InvenioWebSessionError(_('Sorry, there was an error with the database.'))
except InvenioWebSessionError, exc:
register_exception()
body = websession_templates.tmpl_error(exc.message, ln)
return body
elif user_status[0][0] != CFG_WEBSESSION_USERGROUP_STATUS['ADMIN']:
try:
raise InvenioWebSessionError(_('Sorry, you do not have sufficient rights on this group.'))
except InvenioWebSessionError, exc:
register_exception()
body = websession_templates.tmpl_error(exc.message, ln)
return body
group_infos = db.get_group_infos(grpID)
if not len(group_infos):
try:
raise InvenioWebSessionError(_('Sorry, there was an error with the database.'))
except InvenioWebSessionError, exc:
register_exception()
body = websession_templates.tmpl_error(exc.message, ln)
return body
members = db.get_users_by_status(grpID,
CFG_WEBSESSION_USERGROUP_STATUS["MEMBER"])
pending_members = db.get_users_by_status(grpID,
CFG_WEBSESSION_USERGROUP_STATUS["PENDING"])
body = websession_templates.tmpl_display_manage_member(grpID=grpID,
group_name=group_infos[0][1],
members=members,
pending_members=pending_members,
warnings=warnings,
infos=infos,
ln=ln)
return body
def perform_request_remove_member(uid, grpID, member_id, ln=CFG_SITE_LANG):
"""Remove member from a group.
@param uid: user ID
@param grpID: ID of the group
@param member_id: selected member ID
@param ln: language
@return: body with warnings
"""
body = ''
warnings = []
infos = []
_ = gettext_set_language(ln)
user_status = db.get_user_status(uid, grpID)
if not len(user_status):
try:
raise InvenioWebSessionError(_('Sorry, there was an error with the database.'))
except InvenioWebSessionError, exc:
register_exception()
body = websession_templates.tmpl_error(exc.message, ln)
return body
if member_id == -1:
try:
raise InvenioWebSessionWarning(_('Please choose a member if you want to remove him from the group.'))
except InvenioWebSessionWarning, exc:
register_exception(stream='warning')
warnings.append(exc.message)
body = perform_request_manage_member(uid,
grpID,
warnings=warnings,
ln=ln)
else:
db.delete_member(grpID, member_id)
infos.append(CFG_WEBSESSION_INFO_MESSAGES["MEMBER_DELETED"])
body = perform_request_manage_member(uid,
grpID,
infos=infos,
warnings=warnings,
ln=ln)
return body
def perform_request_add_member(uid, grpID, user_id, ln=CFG_SITE_LANG):
"""Add waiting member to a group.
@param uid: user ID
@param grpID: ID of the group
@param user_id: selected member ID
@param ln: language
@return: body with warnings
"""
body = ''
warnings = []
infos = []
_ = gettext_set_language(ln)
user_status = db.get_user_status(uid, grpID)
if not len(user_status):
try:
raise InvenioWebSessionError(_('Sorry, there was an error with the database.'))
except InvenioWebSessionError, exc:
register_exception()
body = websession_templates.tmpl_error(exc.message, ln)
return body
if user_id == -1:
try:
raise InvenioWebSessionWarning(_('Please choose a user from the list if you want him to be added to the group.'))
except InvenioWebSessionWarning, exc:
register_exception(stream='warning')
warnings.append(exc.message)
body = perform_request_manage_member(uid,
grpID,
warnings=warnings,
ln=ln)
else :
# test if user is already member or pending
status = db.get_user_status(user_id, grpID)
if status and status[0][0] == 'M':
try:
raise InvenioWebSessionWarning(_('The user is already member of the group.'))
except InvenioWebSessionWarning, exc:
register_exception(stream='warning')
warnings.append(exc.message)
body = perform_request_manage_member(uid,
grpID,
infos=infos,
warnings=warnings,
ln=ln)
else:
db.add_pending_member(grpID,
user_id,
CFG_WEBSESSION_USERGROUP_STATUS["MEMBER"])
infos.append(CFG_WEBSESSION_INFO_MESSAGES["MEMBER_ADDED"])
group_infos = db.get_group_infos(grpID)
group_name = group_infos[0][1]
user = get_user_info(user_id, ln)[2]
msg_subjet, msg_body = websession_templates.tmpl_member_msg(
group_name=group_name, accepted=1, ln=ln)
(body, dummy, dummy) = perform_request_send(
uid, msg_to_user=user, msg_to_group="", msg_subject=msg_subjet,
msg_body=msg_body, ln=ln)
body = perform_request_manage_member(uid,
grpID,
infos=infos,
warnings=warnings,
ln=ln)
return body
def perform_request_reject_member(uid,
grpID,
user_id,
ln=CFG_SITE_LANG):
"""Reject waiting member and delete it from the list.
@param uid: user ID
@param grpID: ID of the group
@param member_id: selected member ID
@param ln: language
@return: body with warnings
"""
body = ''
warnings = []
infos = []
_ = gettext_set_language(ln)
user_status = db.get_user_status(uid, grpID)
if not len(user_status):
try:
raise InvenioWebSessionError(_('Sorry, there was an error with the database.'))
except InvenioWebSessionError, exc:
register_exception()
body = websession_templates.tmpl_error(exc.message, ln)
return body
if user_id == -1:
try:
raise InvenioWebSessionWarning(_('Please choose a user from the list if you want him to be removed from waiting list.'))
except InvenioWebSessionWarning, exc:
register_exception(stream='warning')
warnings.append(exc.message)
body = perform_request_manage_member(uid,
grpID,
warnings=warnings,
ln=ln)
else :
# test if user is already member or pending
status = db.get_user_status(user_id, grpID)
if not status:
try:
raise InvenioWebSessionWarning(_('The user request for joining group has already been rejected.'))
except InvenioWebSessionWarning, exc:
register_exception(stream='warning')
warnings.append(exc.message)
body = perform_request_manage_member(uid,
grpID,
infos=infos,
warnings=warnings,
ln=ln)
else:
db.delete_member(grpID, user_id)
group_infos = db.get_group_infos(grpID)
group_name = group_infos[0][1]
user = get_user_info(user_id, ln)[2]
msg_subjet, msg_body = websession_templates.tmpl_member_msg(
group_name=group_name,
accepted=0,
ln=ln)
(body, dummy, dummy) = perform_request_send(
uid,
msg_to_user=user,
msg_to_group="",
msg_subject=msg_subjet,
msg_body=msg_body,
ln=ln)
infos.append(CFG_WEBSESSION_INFO_MESSAGES["MEMBER_REJECTED"])
body = perform_request_manage_member(uid,
grpID,
infos=infos,
warnings=warnings,
ln=ln)
return body
def account_group(uid, ln=CFG_SITE_LANG):
"""Display group info for myaccount.py page.
@param uid: user id (int)
@param ln: language
@return: html body
"""
nb_admin_groups = db.count_nb_group_user(uid,
CFG_WEBSESSION_USERGROUP_STATUS["ADMIN"])
nb_member_groups = db.count_nb_group_user(uid,
CFG_WEBSESSION_USERGROUP_STATUS["MEMBER"])
nb_total_groups = nb_admin_groups + nb_member_groups
return websession_templates.tmpl_group_info(nb_admin_groups,
nb_member_groups,
nb_total_groups,
ln=ln)
def get_navtrail(ln=CFG_SITE_LANG, title=""):
"""Gets the navtrail for title.
@param title: title of the page
@param ln: language
@return: HTML output
"""
navtrail = websession_templates.tmpl_navtrail(ln, title)
return navtrail
def synchronize_external_groups(userid, groups, login_method):
"""Synchronize external groups adding new groups that aren't already
added, adding subscription for userid to groups, for groups that the user
isn't already subsribed to, removing subscription to groups the user is no
more subsribed to.
@param userid, the intger representing the user inside the db
@param groups, a dictionary of group_name : group_description
@param login_method, a string unique to the type of authentication
the groups are associated to, to be used inside the db
"""
groups_already_known = db.get_login_method_groups(userid, login_method)
group_dict = {}
for name, groupid in groups_already_known:
group_dict[name] = groupid
groups_already_known_name = set([g[0] for g in groups_already_known])
groups_name = set(groups.keys())
nomore_groups = groups_already_known_name - groups_name
for group in nomore_groups: # delete the user from no more affiliated group
db.delete_member(group_dict[group], userid)
potential_new_groups = groups_name - groups_already_known_name
for group in potential_new_groups:
groupid = db.get_group_id(group, login_method)
if groupid: # Adding the user to an already existent group
db.insert_new_member(userid, groupid[0][0],
CFG_WEBSESSION_USERGROUP_STATUS['MEMBER'])
else: # Adding a new group
try:
groupid = db.insert_new_group(userid, group, groups[group], \
CFG_WEBSESSION_GROUP_JOIN_POLICY['VISIBLEEXTERNAL'], \
login_method)
db.add_pending_member(groupid, userid,
CFG_WEBSESSION_USERGROUP_STATUS['MEMBER'])
except IntegrityError:
## The group already exists? Maybe because of concurrency?
groupid = db.get_group_id(group, login_method)
if groupid: # Adding the user to an already existent group
db.insert_new_member(userid, groupid[0][0],
CFG_WEBSESSION_USERGROUP_STATUS['MEMBER'])
def synchronize_groups_with_login_method():
"""For each login_method, if possible, synchronize groups in a bulk fashion
(i.e. when fetch_all_users_groups_membership is implemented in the
external_authentication class). Otherwise, for each user that belong to at
least one external group for a given login_method, ask, if possible, for
his group memberships and merge them.
"""
from invenio.access_control_config import CFG_EXTERNAL_AUTHENTICATION
for login_method, authorizer in CFG_EXTERNAL_AUTHENTICATION.items():
if authorizer:
try:
usersgroups = authorizer.fetch_all_users_groups_membership()
synchronize_all_external_groups(usersgroups, login_method)
except (NotImplementedError, NameError):
users = db.get_all_users_with_groups_with_login_method(
login_method)
for email, uid in users.items():
try:
groups = authorizer.fetch_user_groups_membership(
email)
synchronize_external_groups(uid, groups, login_method)
except (NotImplementedError, NameError):
pass
def synchronize_all_external_groups(usersgroups, login_method):
"""Merges all the groups vs users memberships.
@param usersgroups: is {'mygroup': ('description',
['email1', 'email2', ...]), ...}
@return: True in case everythings is ok, False otherwise
"""
db_users = db.get_all_users() # All users of the database {email:uid, ...}
db_users_set = set(db_users.keys()) # Set of all users set('email1',
# 'email2', ...)
for key, value in usersgroups.items():
# cleaning users not in db
cleaned_user_list = set()
for username in value[1]:
username = username.upper()
if username in db_users_set:
cleaned_user_list.add(db_users[username])
if cleaned_user_list:
usersgroups[key] = (value[0], cleaned_user_list)
else: # List of user now is empty
del usersgroups[key] # cleaning not interesting groups
# now for each group we got a description and a set of uid
groups_already_known = db.get_all_login_method_groups(login_method)
# groups in the db {groupname: id}
groups_already_known_set = set(groups_already_known.keys())
# set of the groupnames in db
usersgroups_set = set(usersgroups.keys()) # set of groupnames to be merged
# deleted groups!
nomore_groups = groups_already_known_set - usersgroups_set
for group_name in nomore_groups:
db.delete_group_and_members(groups_already_known[group_name])
# new groups!
new_groups = usersgroups_set - groups_already_known_set
for group_name in new_groups:
groupid = db.insert_only_new_group(
group_name,
usersgroups[group_name][0], # description
CFG_WEBSESSION_GROUP_JOIN_POLICY['VISIBLEEXTERNAL'],
login_method)
for uid in usersgroups[group_name][1]:
db.insert_new_member(uid,
groupid,
CFG_WEBSESSION_USERGROUP_STATUS['MEMBER'])
# changed groups?
changed_groups = usersgroups_set & groups_already_known_set
groups_description = db.get_all_groups_description(login_method)
for group_name in changed_groups:
users_already_in_group = db.get_users_in_group(
groups_already_known[group_name])
users_already_in_group_set = set(users_already_in_group)
users_in_group_set = usersgroups[group_name][1]
# no more affiliation
nomore_users = users_already_in_group_set - users_in_group_set
for uid in nomore_users:
db.delete_member(groups_already_known[group_name], uid)
# new affiliation
new_users = users_in_group_set - users_already_in_group_set
for uid in new_users:
db.insert_new_member(uid,
groups_already_known[group_name],
CFG_WEBSESSION_USERGROUP_STATUS['MEMBER'])
# check description
if groups_description[group_name] != usersgroups[group_name][0]:
db.update_group_infos(groups_already_known[group_name],
group_name,
usersgroups[group_name][0],
CFG_WEBSESSION_GROUP_JOIN_POLICY['VISIBLEEXTERNAL'])
def group_name_valid_p(group_name):
"""Test if the group's name is valid."""
return nickname_valid_p(group_name)
diff --git a/invenio/legacy/websession/webinterface.py b/invenio/legacy/websession/webinterface.py
index 26fb5dae5..0e358b776 100644
--- a/invenio/legacy/websession/webinterface.py
+++ b/invenio/legacy/websession/webinterface.py
@@ -1,1796 +1,1796 @@
# -*- coding: utf-8 -*-
##
## This file is part of Invenio.
## Copyright (C) 2006, 2007, 2008, 2009, 2010, 2011 CERN.
##
## Invenio is free software; you can redistribute it and/or
## modify it under the terms of the GNU General Public License as
## published by the Free Software Foundation; either version 2 of the
## License, or (at your option) any later version.
##
## Invenio is distributed in the hope that it will be useful, but
## WITHOUT ANY WARRANTY; without even the implied warranty of
## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
## General Public License for more details.
##
## You should have received a copy of the GNU General Public License
## along with Invenio; if not, write to the Free Software Foundation, Inc.,
## 59 Temple Place, Suite 330, Boston, MA 02111-1307, USA.
from invenio.webstat import register_customevent
"""Invenio ACCOUNT HANDLING"""
__revision__ = "$Id$"
__lastupdated__ = """$Date$"""
import cgi
from datetime import timedelta
import os
import re
from invenio.config import \
CFG_ACCESS_CONTROL_LEVEL_ACCOUNTS, \
CFG_ACCESS_CONTROL_LEVEL_SITE, \
CFG_ACCESS_CONTROL_NOTIFY_USER_ABOUT_NEW_ACCOUNT, \
CFG_SITE_NAME, \
CFG_SITE_NAME_INTL, \
CFG_SITE_SUPPORT_EMAIL, \
CFG_SITE_SECURE_URL, \
CFG_SITE_URL, \
CFG_CERN_SITE, \
CFG_WEBSESSION_RESET_PASSWORD_EXPIRE_IN_DAYS, \
CFG_OPENAIRE_SITE
from invenio import webuser
from invenio.webpage import page
from invenio import webaccount
from invenio import webbasket
from invenio import webalert
from invenio.dbquery import run_sql
from invenio.webmessage import account_new_mail
from invenio.access_control_engine import acc_authorize_action
from invenio.webinterface_handler import wash_urlargd, WebInterfaceDirectory
from invenio.utils.apache import SERVER_RETURN, HTTP_NOT_FOUND
from invenio.utils.url import redirect_to_url, make_canonical_urlargd
from invenio import webgroup
from invenio import webgroup_dblayer
from invenio.base.i18n import gettext_set_language, wash_language
from invenio.ext.email import send_email
-from invenio.errorlib import register_exception
+from invenio.ext.logging import register_exception
from invenio.access_control_mailcookie import mail_cookie_retrieve_kind, \
mail_cookie_check_pw_reset, mail_cookie_delete_cookie, \
mail_cookie_create_pw_reset, mail_cookie_check_role, \
mail_cookie_check_mail_activation, InvenioWebAccessMailCookieError, \
InvenioWebAccessMailCookieDeletedError, mail_cookie_check_authorize_action
from invenio.access_control_config import CFG_WEBACCESS_WARNING_MSGS, \
CFG_EXTERNAL_AUTH_USING_SSO, CFG_EXTERNAL_AUTH_LOGOUT_SSO, \
CFG_EXTERNAL_AUTHENTICATION, CFG_EXTERNAL_AUTH_SSO_REFRESH, \
CFG_OPENID_CONFIGURATIONS, CFG_OAUTH2_CONFIGURATIONS, \
CFG_OAUTH1_CONFIGURATIONS, CFG_OAUTH2_PROVIDERS, CFG_OAUTH1_PROVIDERS, \
CFG_OPENID_PROVIDERS, CFG_OPENID_AUTHENTICATION, \
CFG_OAUTH1_AUTHENTICATION, CFG_OAUTH2_AUTHENTICATION
from invenio.session import get_session
from invenio.modules import apikeys as web_api_key
import invenio.template
websession_templates = invenio.template.load('websession')
bibcatalog_templates = invenio.template.load('bibcatalog')
class WebInterfaceYourAccountPages(WebInterfaceDirectory):
_exports = ['', 'edit', 'change', 'lost', 'display',
'send_email', 'youradminactivities', 'access',
'delete', 'logout', 'login', 'register', 'resetpassword',
'robotlogin', 'robotlogout', 'apikey', 'openid',
'oauth1', 'oauth2']
_force_https = True
def index(self, req, form):
redirect_to_url(req, '%s/youraccount/display' % CFG_SITE_SECURE_URL)
def access(self, req, form):
args = wash_urlargd(form, {'mailcookie' : (str, '')})
_ = gettext_set_language(args['ln'])
title = _("Mail Cookie Service")
try:
kind = mail_cookie_retrieve_kind(args['mailcookie'])
if kind == 'pw_reset':
redirect_to_url(req, '%s/youraccount/resetpassword?k=%s&ln=%s' % (CFG_SITE_SECURE_URL, args['mailcookie'], args['ln']))
elif kind == 'role':
uid = webuser.getUid(req)
try:
(role_name, expiration) = mail_cookie_check_role(args['mailcookie'], uid)
except InvenioWebAccessMailCookieDeletedError:
return page(title=_("Role authorization request"), req=req, body=_("This request for an authorization has already been authorized."), uid=webuser.getUid(req), navmenuid='youraccount', language=args['ln'], secure_page_p=1)
return page(title=title,
body=webaccount.perform_back(
_("You have successfully obtained an authorization as %(x_role)s! "
"This authorization will last until %(x_expiration)s and until "
"you close your browser if you are a guest user.") %
{'x_role' : '<strong>%s</strong>' % role_name,
'x_expiration' : '<em>%s</em>' % expiration.strftime("%Y-%m-%d %H:%M:%S")},
'/youraccount/display?ln=%s' % args['ln'], _('login'), args['ln']),
req=req,
uid=webuser.getUid(req),
language=args['ln'],
lastupdated=__lastupdated__,
navmenuid='youraccount',
secure_page_p=1)
elif kind == 'mail_activation':
try:
email = mail_cookie_check_mail_activation(args['mailcookie'])
if not email:
raise StandardError
webuser.confirm_email(email)
body = "<p>" + _("You have confirmed the validity of your email"
" address!") + "</p>"
if CFG_ACCESS_CONTROL_LEVEL_ACCOUNTS == 1:
body += "<p>" + _("Please, wait for the administrator to "
"enable your account.") + "</p>"
else:
uid = webuser.update_Uid(req, email)
body += "<p>" + _("You can now go to %(x_url_open)syour account page%(x_url_close)s.") % {'x_url_open' : '<a href="/youraccount/display?ln=%s">' % args['ln'], 'x_url_close' : '</a>'} + "</p>"
return page(title=_("Email address successfully activated"),
body=body, req=req, language=args['ln'], uid=webuser.getUid(req), lastupdated=__lastupdated__, navmenuid='youraccount', secure_page_p=1)
except InvenioWebAccessMailCookieDeletedError, e:
body = "<p>" + _("You have already confirmed the validity of your email address!") + "</p>"
if CFG_ACCESS_CONTROL_LEVEL_ACCOUNTS == 1:
body += "<p>" + _("Please, wait for the administrator to "
"enable your account.") + "</p>"
else:
body += "<p>" + _("You can now go to %(x_url_open)syour account page%(x_url_close)s.") % {'x_url_open' : '<a href="/youraccount/display?ln=%s">' % args['ln'], 'x_url_close' : '</a>'} + "</p>"
return page(title=_("Email address successfully activated"),
body=body, req=req, language=args['ln'], uid=webuser.getUid(req), lastupdated=__lastupdated__, navmenuid='youraccount', secure_page_p=1)
return webuser.page_not_authorized(req, "../youraccount/access",
text=_("This request for confirmation of an email "
"address is not valid or"
" is expired."), navmenuid='youraccount')
except InvenioWebAccessMailCookieError:
return webuser.page_not_authorized(req, "../youraccount/access",
text=_("This request for an authorization is not valid or"
" is expired."), navmenuid='youraccount')
def resetpassword(self, req, form):
args = wash_urlargd(form, {
'k' : (str, ''),
'reset' : (int, 0),
'password' : (str, ''),
'password2' : (str, '')
})
_ = gettext_set_language(args['ln'])
title = _('Reset password')
reset_key = args['k']
try:
email = mail_cookie_check_pw_reset(reset_key)
except InvenioWebAccessMailCookieDeletedError:
return page(title=title, req=req, body=_("This request for resetting a password has already been used."), uid=webuser.getUid(req), navmenuid='youraccount', language=args['ln'], secure_page_p=1)
except InvenioWebAccessMailCookieError:
return webuser.page_not_authorized(req, "../youraccount/access",
text=_("This request for resetting a password is not valid or"
" is expired."), navmenuid='youraccount')
if email is None or CFG_ACCESS_CONTROL_LEVEL_ACCOUNTS >= 3:
return webuser.page_not_authorized(req, "../youraccount/resetpassword",
text=_("This request for resetting the password is not valid or"
" is expired."), navmenuid='youraccount')
if not args['reset']:
return page(title=title,
body=webaccount.perform_reset_password(args['ln'], email, reset_key),
req=req,
secure_page_p = 1,
language=args['ln'],
lastupdated=__lastupdated__,
navmenuid='youraccount')
elif args['password'] != args['password2']:
msg = _('The two provided passwords aren\'t equal.')
return page(title=title,
body=webaccount.perform_reset_password(args['ln'], email, reset_key, msg),
req=req,
secure_page_p = 1,
language=args['ln'],
lastupdated=__lastupdated__,
navmenuid='youraccount')
run_sql('UPDATE user SET password=AES_ENCRYPT(email,%s) WHERE email=%s', (args['password'], email))
mail_cookie_delete_cookie(reset_key)
return page(title=title,
body=webaccount.perform_back(
_("The password was successfully set! "
"You can now proceed with the login."),
CFG_SITE_SECURE_URL + '/youraccount/login?ln=%s' % args['ln'], _('login'), args['ln']),
req=req,
language=args['ln'],
lastupdated=__lastupdated__,
navmenuid='youraccount', secure_page_p=1)
def display(self, req, form):
args = wash_urlargd(form, {})
uid = webuser.getUid(req)
# load the right message language
_ = gettext_set_language(args['ln'])
if uid == -1 or CFG_ACCESS_CONTROL_LEVEL_SITE >= 1:
return webuser.page_not_authorized(req, "../youraccount/display",
navmenuid='youraccount')
if webuser.isGuestUser(uid):
return page(title=_("Your Account"),
body=webaccount.perform_info(req, args['ln']),
description="%s Personalize, Main page" % CFG_SITE_NAME_INTL.get(args['ln'], CFG_SITE_NAME),
keywords=_("%s, personalize") % CFG_SITE_NAME_INTL.get(args['ln'], CFG_SITE_NAME),
uid=uid,
req=req,
secure_page_p = 1,
language=args['ln'],
lastupdated=__lastupdated__,
navmenuid='youraccount')
username = webuser.get_nickname_or_email(uid)
user_info = webuser.collect_user_info(req)
bask = user_info['precached_usebaskets'] and webbasket.account_list_baskets(uid, ln=args['ln']) or ''
aler = user_info['precached_usealerts'] and webalert.account_list_alerts(uid, ln=args['ln']) or ''
sear = webalert.account_list_searches(uid, ln=args['ln'])
msgs = user_info['precached_usemessages'] and account_new_mail(uid, ln=args['ln']) or ''
grps = user_info['precached_usegroups'] and webgroup.account_group(uid, ln=args['ln']) or ''
appr = user_info['precached_useapprove']
sbms = user_info['precached_viewsubmissions']
comments = user_info['precached_sendcomments']
loan = ''
admn = webaccount.perform_youradminactivities(user_info, args['ln'])
return page(title=_("Your Account"),
body=webaccount.perform_display_account(req, username, bask, aler, sear, msgs, loan, grps, sbms, appr, admn, args['ln'], comments),
description="%s Personalize, Main page" % CFG_SITE_NAME_INTL.get(args['ln'], CFG_SITE_NAME),
keywords=_("%s, personalize") % CFG_SITE_NAME_INTL.get(args['ln'], CFG_SITE_NAME),
uid=uid,
req=req,
secure_page_p = 1,
language=args['ln'],
lastupdated=__lastupdated__,
navmenuid='youraccount')
def apikey(self, req, form):
args = wash_urlargd(form, {
'key_description' : (str, None),
'key_id' : (str, None),
'referer': (str, '')
})
uid = webuser.getUid(req)
# load the right message language
_ = gettext_set_language(args['ln'])
if uid == -1 or CFG_ACCESS_CONTROL_LEVEL_SITE >= 1:
return webuser.page_not_authorized(req, "../youraccount/edit",
navmenuid='youraccount')
if webuser.isGuestUser(uid):
return webuser.page_not_authorized(req, "../youraccount/edit",
text=_("This functionality is forbidden to guest users."),
navmenuid='youraccount')
if args['key_id']:
web_api_key.mark_web_api_key_as_removed(args['key_id'])
else:
uid = webuser.getUid(req)
web_api_key.create_new_web_api_key(uid, args['key_description'])
if args['referer']:
redirect_to_url(req, args['referer'])
else:
redirect_to_url(req, '%s/youraccount/edit?ln=%s' % (CFG_SITE_SECURE_URL, args['ln']))
def edit(self, req, form):
args = wash_urlargd(form, {"verbose" : (int, 0)})
uid = webuser.getUid(req)
# load the right message language
_ = gettext_set_language(args['ln'])
if uid == -1 or CFG_ACCESS_CONTROL_LEVEL_SITE >= 1:
return webuser.page_not_authorized(req, "../youraccount/edit",
navmenuid='youraccount')
if webuser.isGuestUser(uid):
return webuser.page_not_authorized(req, "../youraccount/edit",
text=_("This functionality is forbidden to guest users."),
navmenuid='youraccount')
body = ''
user_info = webuser.collect_user_info(req)
if args['verbose'] == 9:
keys = user_info.keys()
keys.sort()
for key in keys:
body += "<b>%s</b>:%s<br />" % (key, user_info[key])
#check if the user should see bibcatalog user name / passwd in the settings
can_config_bibcatalog = (acc_authorize_action(user_info, 'runbibedit')[0] == 0)
return page(title= _("Your Settings"),
body=body+webaccount.perform_set(webuser.get_email(uid),
args['ln'], can_config_bibcatalog,
verbose=args['verbose']),
navtrail="""<a class="navtrail" href="%s/youraccount/display?ln=%s">""" % (CFG_SITE_SECURE_URL, args['ln']) + _("Your Account") + """</a>""",
description=_("%s Personalize, Your Settings") % CFG_SITE_NAME_INTL.get(args['ln'], CFG_SITE_NAME),
keywords=_("%s, personalize") % CFG_SITE_NAME_INTL.get(args['ln'], CFG_SITE_NAME),
uid=uid,
req=req,
secure_page_p = 1,
language=args['ln'],
lastupdated=__lastupdated__,
navmenuid='youraccount')
def change(self, req, form):
args = wash_urlargd(form, {
'nickname': (str, None),
'email': (str, None),
'old_password': (str, None),
'password': (str, None),
'password2': (str, None),
'login_method': (str, ""),
'group_records' : (int, None),
'latestbox' : (int, None),
'helpbox' : (int, None),
'lang' : (str, None),
'bibcatalog_username' : (str, None),
'bibcatalog_password' : (str, None),
})
## Wash arguments:
args['login_method'] = wash_login_method(args['login_method'])
if args['email']:
args['email'] = args['email'].lower()
## Load the right message language:
_ = gettext_set_language(args['ln'])
## Identify user and load old preferences:
uid = webuser.getUid(req)
prefs = webuser.get_user_preferences(uid)
## Check rights:
if uid == -1 or CFG_ACCESS_CONTROL_LEVEL_SITE >= 1:
return webuser.page_not_authorized(req, "../youraccount/change",
navmenuid='youraccount')
# FIXME: the branching below is far from optimal. Should be
# based on the submitted form name ids, to know precisely on
# which form the user clicked. Not on the passed values, as
# is the case now. The function body is too big and in bad
# need of refactoring anyway.
## Will hold the output messages:
mess = ''
## Would hold link to previous page and title for the link:
act = None
linkname = None
title = None
## Change login method if needed:
if args['login_method'] and CFG_ACCESS_CONTROL_LEVEL_ACCOUNTS < 4 \
and args['login_method'] in CFG_EXTERNAL_AUTHENTICATION:
title = _("Settings edited")
act = "/youraccount/display?ln=%s" % args['ln']
linkname = _("Show account")
if prefs['login_method'] != args['login_method']:
if CFG_ACCESS_CONTROL_LEVEL_ACCOUNTS >= 4:
mess += '<p>' + _("Unable to change login method.")
elif not CFG_EXTERNAL_AUTHENTICATION[args['login_method']]:
# Switching to internal authentication: we drop any external datas
p_email = webuser.get_email(uid)
webuser.drop_external_settings(uid)
webgroup_dblayer.drop_external_groups(uid)
prefs['login_method'] = args['login_method']
webuser.set_user_preferences(uid, prefs)
mess += "<p>" + _("Switched to internal login method.") + " "
mess += _("Please note that if this is the first time that you are using this account "
"with the internal login method then the system has set for you "
"a randomly generated password. Please click the "
"following button to obtain a password reset request "
"link sent to you via email:") + '</p>'
mess += """<p><form method="post" action="../youraccount/send_email">
<input type="hidden" name="p_email" value="%s">
<input class="formbutton" type="submit" value="%s">
</form></p>""" % (p_email, _("Send Password"))
else:
res = run_sql("SELECT email FROM user WHERE id=%s", (uid,))
if res:
email = res[0][0]
else:
email = None
if not email:
mess += '<p>' + _("Unable to switch to external login method %s, because your email address is unknown.") % cgi.escape(args['login_method'])
else:
try:
if not CFG_EXTERNAL_AUTHENTICATION[args['login_method']].user_exists(email):
mess += '<p>' + _("Unable to switch to external login method %s, because your email address is unknown to the external login system.") % cgi.escape(args['login_method'])
else:
prefs['login_method'] = args['login_method']
webuser.set_user_preferences(uid, prefs)
mess += '<p>' + _("Login method successfully selected.")
except AttributeError:
mess += '<p>' + _("The external login method %s does not support email address based logins. Please contact the site administrators.") % cgi.escape(args['login_method'])
## Change email or nickname:
if args['email'] or args['nickname']:
uid2 = webuser.emailUnique(args['email'])
uid_with_the_same_nickname = webuser.nicknameUnique(args['nickname'])
current_nickname = webuser.get_nickname(uid)
if current_nickname and args['nickname'] and \
current_nickname != args['nickname']:
# User tried to set nickname while one is already
# defined (policy is that nickname is not to be
# changed)
mess += '<p>' + _("Your nickname has not been updated")
elif (CFG_ACCESS_CONTROL_LEVEL_ACCOUNTS >= 2 or (CFG_ACCESS_CONTROL_LEVEL_ACCOUNTS <= 1 and \
webuser.email_valid_p(args['email']))) \
and (args['nickname'] is None or webuser.nickname_valid_p(args['nickname'])) \
and uid2 != -1 and (uid2 == uid or uid2 == 0) \
and uid_with_the_same_nickname != -1 and (uid_with_the_same_nickname == uid or uid_with_the_same_nickname == 0):
if CFG_ACCESS_CONTROL_LEVEL_ACCOUNTS < 3:
change = webuser.updateDataUser(uid,
args['email'],
args['nickname'])
else:
return webuser.page_not_authorized(req, "../youraccount/change",
navmenuid='youraccount')
if change:
mess += '<p>' + _("Settings successfully edited.")
mess += '<p>' + _("Note that if you have changed your email address, "
"you will have to %(x_url_open)sreset your password%(x_url_close)s anew.") % \
{'x_url_open': '<a href="%s">' % (CFG_SITE_SECURE_URL + '/youraccount/lost?ln=%s' % args['ln']),
'x_url_close': '</a>'}
act = "/youraccount/display?ln=%s" % args['ln']
linkname = _("Show account")
title = _("Settings edited")
elif args['nickname'] is not None and not webuser.nickname_valid_p(args['nickname']):
mess += '<p>' + _("Desired nickname %s is invalid.") % cgi.escape(args['nickname'])
mess += " " + _("Please try again.")
act = "/youraccount/edit?ln=%s" % args['ln']
linkname = _("Edit settings")
title = _("Editing settings failed")
elif not webuser.email_valid_p(args['email']):
mess += '<p>' + _("Supplied email address %s is invalid.") % cgi.escape(args['email'])
mess += " " + _("Please try again.")
act = "/youraccount/edit?ln=%s" % args['ln']
linkname = _("Edit settings")
title = _("Editing settings failed")
elif uid2 == -1 or uid2 != uid and not uid2 == 0:
mess += '<p>' + _("Supplied email address %s already exists in the database.") % cgi.escape(args['email'])
mess += " " + websession_templates.tmpl_lost_your_password_teaser(args['ln'])
mess += " " + _("Or please try again.")
act = "/youraccount/edit?ln=%s" % args['ln']
linkname = _("Edit settings")
title = _("Editing settings failed")
elif uid_with_the_same_nickname == -1 or uid_with_the_same_nickname != uid and not uid_with_the_same_nickname == 0:
mess += '<p>' + _("Desired nickname %s is already in use.") % cgi.escape(args['nickname'])
mess += " " + _("Please try again.")
act = "/youraccount/edit?ln=%s" % args['ln']
linkname = _("Edit settings")
title = _("Editing settings failed")
## Change passwords:
if args['old_password'] or args['password'] or args['password2']:
if CFG_ACCESS_CONTROL_LEVEL_ACCOUNTS >= 3:
mess += '<p>' + _("Users cannot edit passwords on this site.")
else:
res = run_sql("SELECT id FROM user "
"WHERE AES_ENCRYPT(email,%s)=password AND id=%s",
(args['old_password'], uid))
if res:
if args['password'] == args['password2']:
webuser.updatePasswordUser(uid, args['password'])
mess += '<p>' + _("Password successfully edited.")
act = "/youraccount/display?ln=%s" % args['ln']
linkname = _("Show account")
title = _("Password edited")
else:
mess += '<p>' + _("Both passwords must match.")
mess += " " + _("Please try again.")
act = "/youraccount/edit?ln=%s" % args['ln']
linkname = _("Edit settings")
title = _("Editing password failed")
else:
mess += '<p>' + _("Wrong old password inserted.")
mess += " " + _("Please try again.")
act = "/youraccount/edit?ln=%s" % args['ln']
linkname = _("Edit settings")
title = _("Editing password failed")
## Change search-related settings:
if args['group_records']:
prefs = webuser.get_user_preferences(uid)
prefs['websearch_group_records'] = args['group_records']
prefs['websearch_latestbox'] = args['latestbox']
prefs['websearch_helpbox'] = args['helpbox']
webuser.set_user_preferences(uid, prefs)
title = _("Settings edited")
act = "/youraccount/display?ln=%s" % args['ln']
linkname = _("Show account")
mess += '<p>' + _("User settings saved correctly.")
## Change language-related settings:
if args['lang']:
lang = wash_language(args['lang'])
prefs = webuser.get_user_preferences(uid)
prefs['language'] = lang
args['ln'] = lang
_ = gettext_set_language(lang)
webuser.set_user_preferences(uid, prefs)
title = _("Settings edited")
act = "/youraccount/display?ln=%s" % args['ln']
linkname = _("Show account")
mess += '<p>' + _("User settings saved correctly.")
## Edit cataloging-related settings:
if args['bibcatalog_username'] or args['bibcatalog_password']:
act = "/youraccount/display?ln=%s" % args['ln']
linkname = _("Show account")
if ((len(args['bibcatalog_username']) == 0) or (len(args['bibcatalog_password']) == 0)):
title = _("Editing bibcatalog authorization failed")
mess += '<p>' + _("Empty username or password")
else:
title = _("Settings edited")
prefs['bibcatalog_username'] = args['bibcatalog_username']
prefs['bibcatalog_password'] = args['bibcatalog_password']
webuser.set_user_preferences(uid, prefs)
mess += '<p>' + _("User settings saved correctly.")
if not mess:
mess = _("Unable to update settings.")
if not act:
act = "/youraccount/edit?ln=%s" % args['ln']
if not linkname:
linkname = _("Edit settings")
if not title:
title = _("Editing settings failed")
## Finally, output the results:
return page(title=title,
body=webaccount.perform_back(mess, act, linkname, args['ln']),
navtrail="""<a class="navtrail" href="%s/youraccount/display?ln=%s">""" % (CFG_SITE_SECURE_URL, args['ln']) + _("Your Account") + """</a>""",
description="%s Personalize, Main page" % CFG_SITE_NAME_INTL.get(args['ln'], CFG_SITE_NAME),
keywords=_("%s, personalize") % CFG_SITE_NAME_INTL.get(args['ln'], CFG_SITE_NAME),
uid=uid,
req=req,
secure_page_p = 1,
language=args['ln'],
lastupdated=__lastupdated__,
navmenuid='youraccount')
def lost(self, req, form):
args = wash_urlargd(form, {})
uid = webuser.getUid(req)
# load the right message language
_ = gettext_set_language(args['ln'])
if uid == -1 or CFG_ACCESS_CONTROL_LEVEL_SITE >= 1:
return webuser.page_not_authorized(req, "../youraccount/lost",
navmenuid='youraccount')
return page(title=_("Lost your password?"),
body=webaccount.perform_lost(args['ln']),
navtrail="""<a class="navtrail" href="%s/youraccount/display?ln=%s">""" % (CFG_SITE_SECURE_URL, args['ln']) + _("Your Account") + """</a>""",
description="%s Personalize, Main page" % CFG_SITE_NAME_INTL.get(args['ln'], CFG_SITE_NAME),
keywords=_("%s, personalize") % CFG_SITE_NAME_INTL.get(args['ln'], CFG_SITE_NAME),
uid=uid,
req=req,
secure_page_p = 1,
language=args['ln'],
lastupdated=__lastupdated__,
navmenuid='youraccount')
def send_email(self, req, form):
# set all the declared query fields as local variables
args = wash_urlargd(form, {'p_email': (str, None)})
uid = webuser.getUid(req)
# load the right message language
_ = gettext_set_language(args['ln'])
if uid == -1 or CFG_ACCESS_CONTROL_LEVEL_SITE >= 1:
return webuser.page_not_authorized(req, "../youraccount/send_email",
navmenuid='youraccount')
user_prefs = webuser.get_user_preferences(webuser.emailUnique(args['p_email']))
if user_prefs:
if user_prefs['login_method'] in CFG_EXTERNAL_AUTHENTICATION and \
CFG_EXTERNAL_AUTHENTICATION[user_prefs['login_method']] is not None:
eMsg = _("Cannot send password reset request since you are using external authentication system.")
return page(title=_("Your Account"),
body=webaccount.perform_emailMessage(eMsg, args['ln']),
description="%s Personalize, Main page" % CFG_SITE_NAME_INTL.get(args['ln'], CFG_SITE_NAME),
keywords=_("%s, personalize" % CFG_SITE_NAME_INTL.get(args['ln'], CFG_SITE_NAME)),
uid=uid, req=req,
secure_page_p = 1,
language=args['ln'],
lastupdated=__lastupdated__,
navmenuid='youraccount')
try:
reset_key = mail_cookie_create_pw_reset(args['p_email'], cookie_timeout=timedelta(days=CFG_WEBSESSION_RESET_PASSWORD_EXPIRE_IN_DAYS))
except InvenioWebAccessMailCookieError:
reset_key = None
if reset_key is None:
eMsg = _("The entered email address does not exist in the database.")
return page(title=_("Your Account"),
body=webaccount.perform_emailMessage(eMsg, args['ln']),
description="%s Personalize, Main page" % CFG_SITE_NAME_INTL.get(args['ln'], CFG_SITE_NAME),
keywords=_("%s, personalize") % CFG_SITE_NAME_INTL.get(args['ln'], CFG_SITE_NAME),
uid=uid, req=req,
secure_page_p = 1,
language=args['ln'],
lastupdated=__lastupdated__,
navmenuid='youraccount')
ip_address = req.remote_host or req.remote_ip
if not send_email(CFG_SITE_SUPPORT_EMAIL, args['p_email'], "%s %s"
% (_("Password reset request for"),
CFG_SITE_NAME_INTL.get(args['ln'], CFG_SITE_NAME)),
websession_templates.tmpl_account_reset_password_email_body(
args['p_email'],reset_key, ip_address, args['ln'])):
eMsg = _("The entered email address is incorrect, please check that it is written correctly (e.g. johndoe@example.com).")
return page(title=_("Incorrect email address"),
body=webaccount.perform_emailMessage(eMsg, args['ln']),
description="%s Personalize, Main page" % CFG_SITE_NAME_INTL.get(args['ln'], CFG_SITE_NAME),
keywords=_("%s, personalize") % CFG_SITE_NAME_INTL.get(args['ln'], CFG_SITE_NAME),
uid=uid,
req=req,
secure_page_p = 1,
language=args['ln'],
lastupdated=__lastupdated__,
navmenuid='youraccount')
return page(title=_("Reset password link sent"),
body=webaccount.perform_emailSent(args['p_email'], args['ln']),
description="%s Personalize, Main page" % CFG_SITE_NAME_INTL.get(args['ln'], CFG_SITE_NAME),
keywords=_("%s, personalize") % CFG_SITE_NAME_INTL.get(args['ln'], CFG_SITE_NAME),
uid=uid, req=req,
secure_page_p = 1,
language=args['ln'],
lastupdated=__lastupdated__,
navmenuid='youraccount')
def youradminactivities(self, req, form):
args = wash_urlargd(form, {})
uid = webuser.getUid(req)
user_info = webuser.collect_user_info(req)
# load the right message language
_ = gettext_set_language(args['ln'])
if uid == -1 or CFG_ACCESS_CONTROL_LEVEL_SITE >= 1:
return webuser.page_not_authorized(req, "../youraccount/youradminactivities",
navmenuid='admin')
return page(title=_("Your Administrative Activities"),
body=webaccount.perform_youradminactivities(user_info, args['ln']),
navtrail="""<a class="navtrail" href="%s/youraccount/display?ln=%s">""" % (CFG_SITE_SECURE_URL, args['ln']) + _("Your Account") + """</a>""",
description="%s Personalize, Main page" % CFG_SITE_NAME_INTL.get(args['ln'], CFG_SITE_NAME),
keywords=_("%s, personalize") % CFG_SITE_NAME_INTL.get(args['ln'], CFG_SITE_NAME),
uid=uid,
req=req,
secure_page_p = 1,
language=args['ln'],
lastupdated=__lastupdated__,
navmenuid='admin')
def delete(self, req, form):
args = wash_urlargd(form, {})
uid = webuser.getUid(req)
# load the right message language
_ = gettext_set_language(args['ln'])
if uid == -1 or CFG_ACCESS_CONTROL_LEVEL_SITE >= 1:
return webuser.page_not_authorized(req, "../youraccount/delete",
navmenuid='youraccount')
return page(title=_("Delete Account"),
body=webaccount.perform_delete(args['ln']),
navtrail="""<a class="navtrail" href="%s/youraccount/display?ln=%s">""" % (CFG_SITE_SECURE_URL, args['ln']) + _("Your Account") + """</a>""",
description="%s Personalize, Main page" % CFG_SITE_NAME_INTL.get(args['ln'], CFG_SITE_NAME),
keywords=_("%s, personalize") % CFG_SITE_NAME_INTL.get(args['ln'], CFG_SITE_NAME),
uid=uid,
req=req,
secure_page_p = 1,
language=args['ln'],
lastupdated=__lastupdated__,
navmenuid='youraccount')
def logout(self, req, form):
args = wash_urlargd(form, {})
uid = webuser.logoutUser(req)
# load the right message language
_ = gettext_set_language(args['ln'])
if uid == -1 or CFG_ACCESS_CONTROL_LEVEL_SITE >= 1:
return webuser.page_not_authorized(req, "../youraccount/logout",
navmenuid='youraccount')
if CFG_EXTERNAL_AUTH_USING_SSO:
return redirect_to_url(req, CFG_EXTERNAL_AUTH_LOGOUT_SSO)
return page(title=_("Logout"),
body=webaccount.perform_logout(req, args['ln']),
navtrail="""<a class="navtrail" href="%s/youraccount/display?ln=%s">""" % (CFG_SITE_SECURE_URL, args['ln']) + _("Your Account") + """</a>""",
description="%s Personalize, Main page" % CFG_SITE_NAME_INTL.get(args['ln'], CFG_SITE_NAME),
keywords=_("%s, personalize") % CFG_SITE_NAME_INTL.get(args['ln'], CFG_SITE_NAME),
uid=uid,
req=req,
secure_page_p = 1,
language=args['ln'],
lastupdated=__lastupdated__,
navmenuid='youraccount')
def robotlogout(self, req, form):
"""
Implement logout method for external service providers.
"""
webuser.logoutUser(req)
if CFG_OPENAIRE_SITE:
from invenio.config import CFG_OPENAIRE_PORTAL_URL
redirect_to_url(req, CFG_OPENAIRE_PORTAL_URL)
else:
redirect_to_url(req, "%s/img/pix.png" % CFG_SITE_SECURE_URL)
def robotlogin(self, req, form):
"""
Implement authentication method for external service providers.
"""
from invenio.external_authentication import InvenioWebAccessExternalAuthError
args = wash_urlargd(form, {
'login_method': (str, None),
'remember_me' : (str, ''),
'referer': (str, ''),
'p_un': (str, ''),
'p_pw': (str, '')
})
# sanity checks:
args['login_method'] = wash_login_method(args['login_method'])
args['remember_me'] = args['remember_me'] != ''
locals().update(args)
if CFG_ACCESS_CONTROL_LEVEL_SITE > 0:
return webuser.page_not_authorized(req, CFG_SITE_SECURE_URL + "/youraccount/login?ln=%s" % args['ln'],
navmenuid='youraccount')
uid = webuser.getUid(req)
# load the right message language
_ = gettext_set_language(args['ln'])
try:
(iden, args['p_un'], args['p_pw'], msgcode) = webuser.loginUser(req, args['p_un'], args['p_pw'], args['login_method'])
except InvenioWebAccessExternalAuthError, err:
return page("Error", body=str(err), req=req)
if iden:
uid = webuser.update_Uid(req, args['p_un'], args['remember_me'])
uid2 = webuser.getUid(req)
if uid2 == -1:
webuser.logoutUser(req)
return webuser.page_not_authorized(req, CFG_SITE_SECURE_URL + "/youraccount/login?ln=%s" % args['ln'], uid=uid,
navmenuid='youraccount')
# login successful!
if args['referer']:
if CFG_OPENAIRE_SITE and args['referer'].startswith('https://openaire.cern.ch/deposit'):
## HACK for OpenAIRE
args['referer'] = args['referer'].replace('https://openaire.cern.ch/deposit', 'http://openaire.cern.ch/deposit')
redirect_to_url(req, args['referer'])
else:
return self.display(req, form)
else:
mess = CFG_WEBACCESS_WARNING_MSGS[msgcode] % cgi.escape(args['login_method'])
if msgcode == 14:
if webuser.username_exists_p(args['p_un']):
mess = CFG_WEBACCESS_WARNING_MSGS[15] % cgi.escape(args['login_method'])
act = CFG_SITE_SECURE_URL + '/youraccount/login%s' % make_canonical_urlargd({'ln' : args['ln'], 'referer' : args['referer']}, {})
return page(title=_("Login"),
body=webaccount.perform_back(mess, act, _("login"), args['ln']),
navtrail="""<a class="navtrail" href="%s/youraccount/display?ln=%s">""" % (CFG_SITE_SECURE_URL, args['ln']) + _("Your Account") + """</a>""",
description="%s Personalize, Main page" % CFG_SITE_NAME_INTL.get(args['ln'], CFG_SITE_NAME),
keywords="%s , personalize" % CFG_SITE_NAME_INTL.get(args['ln'], CFG_SITE_NAME),
uid=uid,
req=req,
secure_page_p = 1,
language=args['ln'],
lastupdated=__lastupdated__,
navmenuid='youraccount')
def login(self, req, form):
args = wash_urlargd(form, {
'p_un': (str, None),
'p_pw': (str, None),
'login_method': (str, None),
'action': (str, ''),
'remember_me' : (str, ''),
'referer': (str, '')})
if CFG_OPENAIRE_SITE:
from invenio.config import CFG_OPENAIRE_PORTAL_URL
if CFG_OPENAIRE_PORTAL_URL:
from invenio.utils.url import create_url
from base64 import encodestring
invenio_loginurl = args['referer'] or '%s/youraccount/display?ln=%s' % (CFG_SITE_SECURE_URL, args['ln'])
loginurl = create_url(CFG_OPENAIRE_PORTAL_URL, {"option": "com_openaire", "view": "login", "return": encodestring(invenio_loginurl)})
redirect_to_url(req, loginurl)
# sanity checks:
args['login_method'] = wash_login_method(args['login_method'])
if args['p_un']:
args['p_un'] = args['p_un'].strip()
args['remember_me'] = args['remember_me'] != ''
locals().update(args)
if CFG_ACCESS_CONTROL_LEVEL_SITE > 0:
return webuser.page_not_authorized(req, CFG_SITE_SECURE_URL + "/youraccount/login?ln=%s" % args['ln'],
navmenuid='youraccount')
uid = webuser.getUid(req)
# If user is already logged in, redirect it to referer or your account
# page
if uid > 0:
redirect_to_url(req, args['referer'] or '%s/youraccount/display?ln=%s' % (CFG_SITE_SECURE_URL, args['ln']))
# load the right message language
_ = gettext_set_language(args['ln'])
if args['action']:
cookie = args['action']
try:
action, arguments = mail_cookie_check_authorize_action(cookie)
except InvenioWebAccessMailCookieError:
pass
if not CFG_EXTERNAL_AUTH_USING_SSO:
if (args['p_un'] is None or not args['login_method']) and (not args['login_method'] in ['openid', 'oauth1', 'oauth2']):
return page(title=_("Login"),
body=webaccount.create_login_page_box(args['referer'], args['ln']),
navtrail="""<a class="navtrail" href="%s/youraccount/display?ln=%s">""" % (CFG_SITE_SECURE_URL, args['ln']) + _("Your Account") + """</a>""",
description="%s Personalize, Main page" % CFG_SITE_NAME_INTL.get(args['ln'], CFG_SITE_NAME),
keywords="%s , personalize" % CFG_SITE_NAME_INTL.get(args['ln'], CFG_SITE_NAME),
uid=uid,
req=req,
secure_page_p=1,
language=args['ln'],
lastupdated=__lastupdated__,
navmenuid='youraccount')
(iden, args['p_un'], args['p_pw'], msgcode) = webuser.loginUser(req, args['p_un'], args['p_pw'], args['login_method'])
else:
# Fake parameters for p_un & p_pw because SSO takes them from the environment
(iden, args['p_un'], args['p_pw'], msgcode) = webuser.loginUser(req, '', '', CFG_EXTERNAL_AUTH_USING_SSO)
args['remember_me'] = False
if iden:
uid = webuser.update_Uid(req, args['p_un'], args['remember_me'])
uid2 = webuser.getUid(req)
if uid2 == -1:
webuser.logoutUser(req)
return webuser.page_not_authorized(req, CFG_SITE_SECURE_URL + "/youraccount/login?ln=%s" % args['ln'], uid=uid,
navmenuid='youraccount')
# login successful!
try:
register_customevent("login", [req.remote_host or req.remote_ip, uid, args['p_un']])
except:
register_exception(suffix="Do the webstat tables exists? Try with 'webstatadmin --load-config'")
if args['referer']:
redirect_to_url(req, args['referer'].replace(CFG_SITE_URL, CFG_SITE_SECURE_URL))
else:
return self.display(req, form)
else:
mess = None
if isinstance(msgcode, (str, unicode)):
# if msgcode is string, show it.
mess = msgcode
elif msgcode in [21, 22, 23]:
mess = CFG_WEBACCESS_WARNING_MSGS[msgcode]
elif msgcode == 14:
if webuser.username_exists_p(args['p_un']):
mess = CFG_WEBACCESS_WARNING_MSGS[15] % cgi.escape(args['login_method'])
if not mess:
mess = CFG_WEBACCESS_WARNING_MSGS[msgcode] % cgi.escape(args['login_method'])
act = CFG_SITE_SECURE_URL + '/youraccount/login%s' % make_canonical_urlargd({'ln' : args['ln'], 'referer' : args['referer']}, {})
return page(title=_("Login"),
body=webaccount.perform_back(mess, act, _("login"), args['ln']),
navtrail="""<a class="navtrail" href="%s/youraccount/display?ln=%s">""" % (CFG_SITE_SECURE_URL, args['ln']) + _("Your Account") + """</a>""",
description="%s Personalize, Main page" % CFG_SITE_NAME_INTL.get(args['ln'], CFG_SITE_NAME),
keywords="%s , personalize" % CFG_SITE_NAME_INTL.get(args['ln'], CFG_SITE_NAME),
uid=uid,
req=req,
secure_page_p = 1,
language=args['ln'],
lastupdated=__lastupdated__,
navmenuid='youraccount')
def register(self, req, form):
args = wash_urlargd(form, {
'p_nickname': (str, None),
'p_email': (str, None),
'p_pw': (str, None),
'p_pw2': (str, None),
'action': (str, "login"),
'referer': (str, "")})
if CFG_ACCESS_CONTROL_LEVEL_SITE > 0:
return webuser.page_not_authorized(req, "../youraccount/register?ln=%s" % args['ln'],
navmenuid='youraccount')
uid = webuser.getUid(req)
# load the right message language
_ = gettext_set_language(args['ln'])
if args['p_nickname'] is None or args['p_email'] is None:
return page(title=_("Register"),
body=webaccount.create_register_page_box(args['referer'], args['ln']),
navtrail="""<a class="navtrail" href="%s/youraccount/display?ln=%s">""" % (CFG_SITE_SECURE_URL, args['ln']) + _("Your Account") + """</a>""",
description=_("%s Personalize, Main page") % CFG_SITE_NAME_INTL.get(args['ln'], CFG_SITE_NAME),
keywords="%s , personalize" % CFG_SITE_NAME_INTL.get(args['ln'], CFG_SITE_NAME),
uid=uid,
req=req,
secure_page_p = 1,
language=args['ln'],
lastupdated=__lastupdated__,
navmenuid='youraccount')
mess = ""
act = ""
if args['p_pw'] == args['p_pw2']:
ruid = webuser.registerUser(req, args['p_email'], args['p_pw'],
args['p_nickname'], ln=args['ln'])
else:
ruid = -2
if ruid == 0:
mess = _("Your account has been successfully created.")
title = _("Account created")
if CFG_ACCESS_CONTROL_NOTIFY_USER_ABOUT_NEW_ACCOUNT == 1:
mess += " " + _("In order to confirm its validity, an email message containing an account activation key has been sent to the given email address.")
mess += " " + _("Please follow instructions presented there in order to complete the account registration process.")
if CFG_ACCESS_CONTROL_LEVEL_ACCOUNTS >= 1:
mess += " " + _("A second email will be sent when the account has been activated and can be used.")
elif CFG_ACCESS_CONTROL_NOTIFY_USER_ABOUT_NEW_ACCOUNT != 1:
uid = webuser.update_Uid(req, args['p_email'])
mess += " " + _("You can now access your %(x_url_open)saccount%(x_url_close)s.") %\
{'x_url_open': '<a href="' + CFG_SITE_SECURE_URL + '/youraccount/display?ln=' + args['ln'] + '">',
'x_url_close': '</a>'}
elif ruid == -2:
mess = _("Both passwords must match.")
mess += " " + _("Please try again.")
act = "/youraccount/register?ln=%s" % args['ln']
title = _("Registration failure")
elif ruid == 1:
mess = _("Supplied email address %s is invalid.") % cgi.escape(args['p_email'])
mess += " " + _("Please try again.")
act = "/youraccount/register?ln=%s" % args['ln']
title = _("Registration failure")
elif ruid == 2:
mess = _("Desired nickname %s is invalid.") % cgi.escape(args['p_nickname'])
mess += " " + _("Please try again.")
act = "/youraccount/register?ln=%s" % args['ln']
title = _("Registration failure")
elif ruid == 3:
mess = _("Supplied email address %s already exists in the database.") % cgi.escape(args['p_email'])
mess += " " + websession_templates.tmpl_lost_your_password_teaser(args['ln'])
mess += " " + _("Or please try again.")
act = "/youraccount/register?ln=%s" % args['ln']
title = _("Registration failure")
elif ruid == 4:
mess = _("Desired nickname %s already exists in the database.") % cgi.escape(args['p_nickname'])
mess += " " + _("Please try again.")
act = "/youraccount/register?ln=%s" % args['ln']
title = _("Registration failure")
elif ruid == 5:
mess = _("Users cannot register themselves, only admin can register them.")
act = "/youraccount/register?ln=%s" % args['ln']
title = _("Registration failure")
elif ruid == 6:
mess = _("The site is having troubles in sending you an email for confirming your email address.") + _("The error has been logged and will be taken in consideration as soon as possible.")
act = "/youraccount/register?ln=%s" % args['ln']
title = _("Registration failure")
else:
# this should never happen
mess = _("Internal Error")
act = "/youraccount/register?ln=%s" % args['ln']
title = _("Registration failure")
return page(title=title,
body=webaccount.perform_back(mess,act, _("register"), args['ln']),
navtrail="""<a class="navtrail" href="%s/youraccount/display?ln=%s">""" % (CFG_SITE_SECURE_URL, args['ln']) + _("Your Account") + """</a>""",
description=_("%s Personalize, Main page") % CFG_SITE_NAME_INTL.get(args['ln'], CFG_SITE_NAME),
keywords="%s , personalize" % CFG_SITE_NAME_INTL.get(args['ln'], CFG_SITE_NAME),
uid=uid,
req=req,
secure_page_p = 1,
language=args['ln'],
lastupdated=__lastupdated__,
navmenuid='youraccount')
def openid(self, req, form):
"""
Constructs the URL of the login page of the OpenID provider and
redirects or constructs it.
"""
def get_consumer(req):
"""
Returns a consumer without a memory.
"""
return consumer.Consumer({"id": get_session(req)}, None)
def request_registration_data(request, provider):
"""
Adds simple registration (sreg) and attribute exchage (ax) extension
to given OpenID request.
@param request: OpenID request
@type request: openid.consumer.consumer.AuthRequest
@param provider: OpenID provider
@type provider: str
"""
# We ask the user nickname if the provider accepts sreg request.
sreg_request = sreg.SRegRequest(required = ['nickname'])
request.addExtension(sreg_request)
# If the provider is trusted, we may ask the email of the user, too.
ax_request = ax.FetchRequest()
if CFG_OPENID_CONFIGURATIONS[provider].get('trust_email', False):
ax_request.add(ax.AttrInfo(
'http://axschema.org/contact/email',
required = True))
ax_request.add(ax.AttrInfo(
'http://axschema.org/namePerson/friendly',
required = True))
request.addExtension(ax_request)
# All arguements must be extracted
content = {
'provider': (str, ''),
'identifier': (str, ''),
'referer': (str, '')
}
for key in CFG_OPENID_CONFIGURATIONS.keys():
content[key] = (str, '')
args = wash_urlargd(form, content)
# Load the right message language
_ = gettext_set_language(args['ln'])
try:
from openid.consumer import consumer
from openid.extensions import ax
from openid.extensions import sreg
except:
# Return login page with 'Need to install python-openid' error
return page(title = _("Login"),
body = webaccount.create_login_page_box(
'%s/youraccount/login?error=openid-python' % \
CFG_SITE_SECURE_URL,
args['ln']
),
navtrail = """
<a class="navtrail" href="%s/youraccount/display?ln=%s">
""" % (CFG_SITE_SECURE_URL, args['ln']) + _("Your Account") + """</a>""",
description = "%s Personalize, Main page" % \
CFG_SITE_NAME_INTL.get(args['ln'], CFG_SITE_NAME),
keywords = "%s , personalize" % \
CFG_SITE_NAME_INTL.get(args['ln'], CFG_SITE_NAME),
uid = 0,
req = req,
secure_page_p = 1,
language = args['ln'],
lastupdated = __lastupdated__,
navmenuid = 'youraccount')
# If either provider isn't activated or OpenID authentication is
# disabled, redirect to login page.
if not (args['provider'] in CFG_OPENID_PROVIDERS and
CFG_OPENID_AUTHENTICATION):
redirect_to_url(req, CFG_SITE_SECURE_URL + "/youraccount/login")
# Load the right message language
_ = gettext_set_language(args['ln'])
# Construct the OpenID identifier url according to given template in the
# configuration.
openid_url = CFG_OPENID_CONFIGURATIONS[args['provider']]['identifier'].\
format(args['identifier'])
oidconsumer = get_consumer(req)
try:
request = oidconsumer.begin(openid_url)
except consumer.DiscoveryFailure:
# If the identifier is invalid, then display login form with error
# message.
return page(title = _("Login"),
body = webaccount.create_login_page_box(
'%s/youraccount/login?error=openid-invalid' % \
CFG_SITE_SECURE_URL,
args['ln']
),
navtrail = """
<a class="navtrail" href="%s/youraccount/display?ln=%s">
""" % (CFG_SITE_SECURE_URL, args['ln']) + _("Your Account") + """</a>""",
description = "%s Personalize, Main page" % \
CFG_SITE_NAME_INTL.get(args['ln'], CFG_SITE_NAME),
keywords = "%s , personalize" % \
CFG_SITE_NAME_INTL.get(args['ln'], CFG_SITE_NAME),
uid = 0,
req = req,
secure_page_p = 1,
language = args['ln'],
lastupdated = __lastupdated__,
navmenuid = 'youraccount')
else:
trust_root = CFG_SITE_SECURE_URL + "/"
return_to = CFG_SITE_SECURE_URL + "/youraccount/login?"
if args['provider'] == 'openid':
# Look if the identifier is defined.
for key in CFG_OPENID_CONFIGURATIONS.keys():
if CFG_OPENID_CONFIGURATIONS[key]['identifier']!='{0}':
regexp = re.compile(CFG_OPENID_CONFIGURATIONS[key]\
['identifier'].\
format("\w+"), re.IGNORECASE)
if openid_url in CFG_OPENID_CONFIGURATIONS[key]\
['identifier'] or \
regexp.match(openid_url):
args['provider'] = key
break
return_to += "login_method=openid&provider=%s" % (
args['provider']
)
request_registration_data(request, args['provider'])
if args['referer']:
return_to += "&referer=%s" % args['referer']
if request.shouldSendRedirect():
redirect_url = request.redirectURL(
trust_root,
return_to,
immediate = False)
redirect_to_url(req, redirect_url)
else:
form_html = request.htmlMarkup(trust_root,
return_to,
form_tag_attrs = {
'id':'openid_message'
},
immediate = False)
return form_html
def oauth2(self, req, form):
args = wash_urlargd(form, {'provider': (str, '')})
# If either provider isn't activated or OAuth2 authentication is
# disabled, redirect to login page.
if not (args['provider'] in CFG_OAUTH2_PROVIDERS and
CFG_OAUTH2_AUTHENTICATION):
redirect_to_url(req, CFG_SITE_SECURE_URL + "/youraccount/login")
# Load the right message language
_ = gettext_set_language(args['ln'])
try:
from rauth.service import OAuth2Service
except:
# Return login page with 'Need to install rauth' error
return page(title = _("Login"),
body = webaccount.create_login_page_box(
'%s/youraccount/login?error=oauth-rauth' % \
CFG_SITE_SECURE_URL,
args['ln']
),
navtrail = """
<a class="navtrail" href="%s/youraccount/display?ln=%s">
""" % (CFG_SITE_SECURE_URL, args['ln']) + _("Your Account") + """</a>""",
description = "%s Personalize, Main page" % \
CFG_SITE_NAME_INTL.get(args['ln'], CFG_SITE_NAME),
keywords = "%s , personalize" % \
CFG_SITE_NAME_INTL.get(args['ln'], CFG_SITE_NAME),
uid = 0,
req = req,
secure_page_p = 1,
language = args['ln'],
lastupdated = __lastupdated__,
navmenuid = 'youraccount')
provider_name = args['provider']
# Load the configurations of the OAuth2 provider
config = CFG_OAUTH2_CONFIGURATIONS[provider_name]
try:
if not (config['consumer_key'] and config['consumer_secret']):
raise Exception
provider = OAuth2Service(
name = provider_name,
consumer_key = config['consumer_key'],
consumer_secret = config['consumer_secret'],
access_token_url = config['access_token_url'],
authorize_url = config['authorize_url']
)
except:
# Return login page with 'OAuth service isn't configurated' error
return page(title = _("Login"),
body = webaccount.create_login_page_box(
'%s/youraccount/login?error=oauth-config' % \
CFG_SITE_SECURE_URL,
args['ln']
),
navtrail = """
<a class="navtrail" href="%s/youraccount/display?ln=%s">
""" % (CFG_SITE_SECURE_URL, args['ln']) + _("Your Account") + """</a>""",
description = "%s Personalize, Main page" % \
CFG_SITE_NAME_INTL.get(args['ln'], CFG_SITE_NAME),
keywords = "%s , personalize" % \
CFG_SITE_NAME_INTL.get(args['ln'], CFG_SITE_NAME),
uid = 0,
req = req,
secure_page_p = 1,
language = args['ln'],
lastupdated = __lastupdated__,
navmenuid = 'youraccount')
# Construct the authorization url
params = config.get('authorize_parameters', {})
params['redirect_uri'] = '%s/youraccount/login?login_method=oauth2\
&provider=%s' % (CFG_SITE_SECURE_URL, args['provider'])
url = provider.get_authorize_url(**params)
redirect_to_url(req, url)
def oauth1(self, req, form):
args = wash_urlargd(form, {'provider': (str, '')})
# If either provider isn't activated or OAuth1 authentication is
# disabled, redirect to login page.
if not (args['provider'] in CFG_OAUTH1_PROVIDERS and
CFG_OAUTH1_AUTHENTICATION):
redirect_to_url(req, CFG_SITE_SECURE_URL + "/youraccount/login")
# Load the right message language
_ = gettext_set_language(args['ln'])
try:
from rauth.service import OAuth1Service
except:
# Return login page with 'Need to install rauth' error
return page(title = _("Login"),
body = webaccount.create_login_page_box(
'%s/youraccount/login?error=oauth-rauth' % \
CFG_SITE_SECURE_URL,
args['ln']
),
navtrail = """
<a class="navtrail" href="%s/youraccount/display?ln=%s">
""" % (CFG_SITE_SECURE_URL, args['ln']) + _("Your Account") + """</a>""",
description = "%s Personalize, Main page" % \
CFG_SITE_NAME_INTL.get(args['ln'], CFG_SITE_NAME),
keywords = "%s , personalize" % \
CFG_SITE_NAME_INTL.get(args['ln'], CFG_SITE_NAME),
uid = 0,
req = req,
secure_page_p = 1,
language = args['ln'],
lastupdated = __lastupdated__,
navmenuid = 'youraccount')
# Load the configurations of the OAuth1 provider
config = CFG_OAUTH1_CONFIGURATIONS[args['provider']]
try:
if not (config['consumer_key'] and config['consumer_secret']):
raise Exception
provider = OAuth1Service(
name = args['provider'],
consumer_key = config['consumer_key'],
consumer_secret = config['consumer_secret'],
request_token_url = config['request_token_url'],
access_token_url = config['access_token_url'],
authorize_url = config['authorize_url'],
header_auth = True
)
except:
# Return login page with 'OAuth service isn't configurated' error
return page(title = _("Login"),
body = webaccount.create_login_page_box(
'%s/youraccount/login?error=oauth-config' % \
CFG_SITE_SECURE_URL,
args['ln']
),
navtrail = """
<a class="navtrail" href="%s/youraccount/display?ln=%s">
""" % (CFG_SITE_SECURE_URL, args['ln']) + _("Your Account") + """</a>""",
description = "%s Personalize, Main page" % \
CFG_SITE_NAME_INTL.get(args['ln'], CFG_SITE_NAME),
keywords = "%s , personalize" % \
CFG_SITE_NAME_INTL.get(args['ln'], CFG_SITE_NAME),
uid = 0,
req = req,
secure_page_p = 1,
language = args['ln'],
lastupdated = __lastupdated__,
navmenuid = 'youraccount')
try:
# Obtain request token and its secret.
request_token, request_token_secret = \
provider.get_request_token(
method = 'GET',
data = {
'oauth_callback': \
"%s/youraccount/login?login_method=oauth1&provider=%s" % (
CFG_SITE_SECURE_URL,
args['provider']
)
}
)
except:
# Return login page with 'Cannot connect the provider' error
return page(title = _("Login"),
body = webaccount.create_login_page_box(
'%s/youraccount/login?error=connection-error' % \
CFG_SITE_SECURE_URL,
args['ln']
),
navtrail = """
<a class="navtrail" href="%s/youraccount/display?ln=%s">
""" % (CFG_SITE_SECURE_URL, args['ln']) + _("Your Account") + """</a>""",
description = "%s Personalize, Main page" % \
CFG_SITE_NAME_INTL.get(args['ln'], CFG_SITE_NAME),
keywords = "%s , personalize" % \
CFG_SITE_NAME_INTL.get(args['ln'], CFG_SITE_NAME),
uid = 0,
req = req,
secure_page_p = 1,
language = args['ln'],
lastupdated = __lastupdated__,
navmenuid = 'youraccount')
# Construct the authorization url.
authorize_parameters = config.get('authorize_parameters', {})
authorize_url = provider.get_authorize_url(request_token,
**authorize_parameters)
# Save request token into database since it will be used in
# authentication
query = """INSERT INTO oauth1_storage VALUES(%s, %s, NOW())"""
params = (request_token, request_token_secret)
run_sql(query, params)
redirect_to_url(req, authorize_url)
class WebInterfaceYourTicketsPages(WebInterfaceDirectory):
#support for /yourtickets url
_exports = ['', 'display']
def __call__(self, req, form):
#if there is no trailing slash
self.index(req, form)
def index(self, req, form):
#take all the parameters..
unparsed_uri = req.unparsed_uri
qstr = ""
if unparsed_uri.count('?') > 0:
dummy, qstr = unparsed_uri.split('?')
qstr = '?'+qstr
redirect_to_url(req, '/yourtickets/display'+qstr)
def display(self, req, form):
#show tickets for this user
argd = wash_urlargd(form, {'ln': (str, ''), 'start': (int, 1) })
uid = webuser.getUid(req)
ln = argd['ln']
start = argd['start']
_ = gettext_set_language(ln)
body = bibcatalog_templates.tmpl_your_tickets(uid, ln, start)
return page(title=_("Your tickets"),
body=body,
navtrail="""<a class="navtrail" href="%s/youraccount/display?ln=%s">""" % (CFG_SITE_SECURE_URL, argd['ln']) + _("Your Account") + """</a>""",
uid=uid,
req=req,
language=argd['ln'],
lastupdated=__lastupdated__,
secure_page_p=1)
class WebInterfaceYourGroupsPages(WebInterfaceDirectory):
_exports = ['', 'display', 'create', 'join', 'leave', 'edit', 'members']
def index(self, req, form):
redirect_to_url(req, '/yourgroups/display')
def display(self, req, form):
"""
Displays groups the user is admin of
and the groups the user is member of(but not admin)
@param ln: language
@return: the page for all the groups
"""
argd = wash_urlargd(form, {})
uid = webuser.getUid(req)
# load the right message language
_ = gettext_set_language(argd['ln'])
if uid == -1 or webuser.isGuestUser(uid) or CFG_ACCESS_CONTROL_LEVEL_SITE >= 1:
return webuser.page_not_authorized(req, "../yourgroups/display",
navmenuid='yourgroups')
user_info = webuser.collect_user_info(req)
if not user_info['precached_usegroups']:
return webuser.page_not_authorized(req, "../", \
text = _("You are not authorized to use groups."))
body = webgroup.perform_request_groups_display(uid=uid,
ln=argd['ln'])
return page(title = _("Your Groups"),
body = body,
navtrail = webgroup.get_navtrail(argd['ln']),
uid = uid,
req = req,
language = argd['ln'],
lastupdated = __lastupdated__,
navmenuid = 'yourgroups',
secure_page_p = 1)
def create(self, req, form):
"""create(): interface for creating a new group
@param group_name: : name of the new webgroup.Must be filled
@param group_description: : description of the new webgroup.(optionnal)
@param join_policy: : join policy of the new webgroup.Must be chosen
@param *button: which button was pressed
@param ln: language
@return: the compose page Create group
"""
argd = wash_urlargd(form, {'group_name': (str, ""),
'group_description': (str, ""),
'join_policy': (str, ""),
'create_button':(str, ""),
'cancel':(str, "")
})
uid = webuser.getUid(req)
# load the right message language
_ = gettext_set_language(argd['ln'])
if uid == -1 or webuser.isGuestUser(uid) or CFG_ACCESS_CONTROL_LEVEL_SITE >= 1:
return webuser.page_not_authorized(req, "../yourgroups/create",
navmenuid='yourgroups')
user_info = webuser.collect_user_info(req)
if not user_info['precached_usegroups']:
return webuser.page_not_authorized(req, "../", \
text = _("You are not authorized to use groups."))
if argd['cancel']:
url = CFG_SITE_SECURE_URL + '/yourgroups/display?ln=%s'
url %= argd['ln']
redirect_to_url(req, url)
if argd['create_button'] :
body= webgroup.perform_request_create_group(uid=uid,
group_name=argd['group_name'],
group_description=argd['group_description'],
join_policy=argd['join_policy'],
ln = argd['ln'])
else:
body = webgroup.perform_request_input_create_group(group_name=argd['group_name'],
group_description=argd['group_description'],
join_policy=argd['join_policy'],
ln=argd['ln'])
title = _("Create new group")
return page(title = title,
body = body,
navtrail = webgroup.get_navtrail(argd['ln'], title),
uid = uid,
req = req,
language = argd['ln'],
lastupdated = __lastupdated__,
navmenuid = 'yourgroups',
secure_page_p = 1)
def join(self, req, form):
"""join(): interface for joining a new group
@param grpID: : list of the group the user wants to become a member.
The user must select only one group.
@param group_name: : will search for groups matching group_name
@param *button: which button was pressed
@param ln: language
@return: the compose page Join group
"""
argd = wash_urlargd(form, {'grpID':(list, []),
'group_name':(str, ""),
'find_button':(str, ""),
'join_button':(str, ""),
'cancel':(str, "")
})
uid = webuser.getUid(req)
# load the right message language
_ = gettext_set_language(argd['ln'])
if uid == -1 or webuser.isGuestUser(uid) or CFG_ACCESS_CONTROL_LEVEL_SITE >= 1:
return webuser.page_not_authorized(req, "../yourgroups/join",
navmenuid='yourgroups')
user_info = webuser.collect_user_info(req)
if not user_info['precached_usegroups']:
return webuser.page_not_authorized(req, "../", \
text = _("You are not authorized to use groups."))
if argd['cancel']:
url = CFG_SITE_SECURE_URL + '/yourgroups/display?ln=%s'
url %= argd['ln']
redirect_to_url(req, url)
if argd['join_button']:
search = 0
if argd['group_name']:
search = 1
body = webgroup.perform_request_join_group(uid,
argd['grpID'],
argd['group_name'],
search,
argd['ln'])
else:
search = 0
if argd['find_button']:
search = 1
body = webgroup.perform_request_input_join_group(uid,
argd['group_name'],
search,
ln=argd['ln'])
title = _("Join New Group")
return page(title = title,
body = body,
navtrail = webgroup.get_navtrail(argd['ln'], title),
uid = uid,
req = req,
language = argd['ln'],
lastupdated = __lastupdated__,
navmenuid = 'yourgroups',
secure_page_p = 1)
def leave(self, req, form):
"""leave(): interface for leaving a group
@param grpID: : group the user wants to leave.
@param group_name: : name of the group the user wants to leave
@param *button: which button was pressed
@param confirmed: : the user is first asked to confirm
@param ln: language
@return: the compose page Leave group
"""
argd = wash_urlargd(form, {'grpID':(int, 0),
'group_name':(str, ""),
'leave_button':(str, ""),
'cancel':(str, ""),
'confirmed': (int, 0)
})
uid = webuser.getUid(req)
# load the right message language
_ = gettext_set_language(argd['ln'])
if uid == -1 or webuser.isGuestUser(uid) or CFG_ACCESS_CONTROL_LEVEL_SITE >= 1:
return webuser.page_not_authorized(req, "../yourgroups/leave",
navmenuid='yourgroups')
user_info = webuser.collect_user_info(req)
if not user_info['precached_usegroups']:
return webuser.page_not_authorized(req, "../", \
text = _("You are not authorized to use groups."))
if argd['cancel']:
url = CFG_SITE_SECURE_URL + '/yourgroups/display?ln=%s'
url %= argd['ln']
redirect_to_url(req, url)
if argd['leave_button']:
body = webgroup.perform_request_leave_group(uid,
argd['grpID'],
argd['confirmed'],
argd['ln'])
else:
body = webgroup.perform_request_input_leave_group(uid=uid,
ln=argd['ln'])
title = _("Leave Group")
return page(title = title,
body = body,
navtrail = webgroup.get_navtrail(argd['ln'], title),
uid = uid,
req = req,
language = argd['ln'],
lastupdated = __lastupdated__,
navmenuid = 'yourgroups',
secure_page_p = 1)
def edit(self, req, form):
"""edit(): interface for editing group
@param grpID: : group ID
@param group_name: : name of the new webgroup.Must be filled
@param group_description: : description of the new webgroup.(optionnal)
@param join_policy: : join policy of the new webgroup.Must be chosen
@param update: button update group pressed
@param delete: button delete group pressed
@param cancel: button cancel pressed
@param confirmed: : the user is first asked to confirm before deleting
@param ln: language
@return: the main page displaying all the groups
"""
argd = wash_urlargd(form, {'grpID': (int, 0),
'update': (str, ""),
'cancel': (str, ""),
'delete': (str, ""),
'group_name': (str, ""),
'group_description': (str, ""),
'join_policy': (str, ""),
'confirmed': (int, 0)
})
uid = webuser.getUid(req)
# load the right message language
_ = gettext_set_language(argd['ln'])
if uid == -1 or webuser.isGuestUser(uid) or CFG_ACCESS_CONTROL_LEVEL_SITE >= 1:
return webuser.page_not_authorized(req, "../yourgroups/display",
navmenuid='yourgroups')
user_info = webuser.collect_user_info(req)
if not user_info['precached_usegroups']:
return webuser.page_not_authorized(req, "../", \
text = _("You are not authorized to use groups."))
if argd['cancel']:
url = CFG_SITE_SECURE_URL + '/yourgroups/display?ln=%s'
url %= argd['ln']
redirect_to_url(req, url)
elif argd['delete']:
body = webgroup.perform_request_delete_group(uid=uid,
grpID=argd['grpID'],
confirmed=argd['confirmed'])
elif argd['update']:
body = webgroup.perform_request_update_group(uid= uid,
grpID=argd['grpID'],
group_name=argd['group_name'],
group_description=argd['group_description'],
join_policy=argd['join_policy'],
ln=argd['ln'])
else :
body= webgroup.perform_request_edit_group(uid=uid,
grpID=argd['grpID'],
ln=argd['ln'])
title = _("Edit Group")
return page(title = title,
body = body,
navtrail = webgroup.get_navtrail(argd['ln'], title),
uid = uid,
req = req,
language = argd['ln'],
lastupdated = __lastupdated__,
navmenuid = 'yourgroups',
secure_page_p = 1)
def members(self, req, form):
"""member(): interface for managing members of a group
@param grpID: : group ID
@param add_member: button add_member pressed
@param remove_member: button remove_member pressed
@param reject_member: button reject__member pressed
@param delete: button delete group pressed
@param member_id: : ID of the existing member selected
@param pending_member_id: : ID of the pending member selected
@param cancel: button cancel pressed
@param info: : info about last user action
@param ln: language
@return: the same page with data updated
"""
argd = wash_urlargd(form, {'grpID': (int, 0),
'cancel': (str, ""),
'add_member': (str, ""),
'remove_member': (str, ""),
'reject_member': (str, ""),
'member_id': (int, 0),
'pending_member_id': (int, 0)
})
uid = webuser.getUid(req)
# load the right message language
_ = gettext_set_language(argd['ln'])
if uid == -1 or webuser.isGuestUser(uid) or CFG_ACCESS_CONTROL_LEVEL_SITE >= 1:
return webuser.page_not_authorized(req, "../yourgroups/display",
navmenuid='yourgroups')
user_info = webuser.collect_user_info(req)
if not user_info['precached_usegroups']:
return webuser.page_not_authorized(req, "../", \
text = _("You are not authorized to use groups."))
if argd['cancel']:
url = CFG_SITE_SECURE_URL + '/yourgroups/display?ln=%s'
url %= argd['ln']
redirect_to_url(req, url)
if argd['remove_member']:
body = webgroup.perform_request_remove_member(uid=uid,
grpID=argd['grpID'],
member_id=argd['member_id'],
ln=argd['ln'])
elif argd['reject_member']:
body = webgroup.perform_request_reject_member(uid=uid,
grpID=argd['grpID'],
user_id=argd['pending_member_id'],
ln=argd['ln'])
elif argd['add_member']:
body = webgroup.perform_request_add_member(uid=uid,
grpID=argd['grpID'],
user_id=argd['pending_member_id'],
ln=argd['ln'])
else:
body= webgroup.perform_request_manage_member(uid=uid,
grpID=argd['grpID'],
ln=argd['ln'])
title = _("Edit group members")
return page(title = title,
body = body,
navtrail = webgroup.get_navtrail(argd['ln'], title),
uid = uid,
req = req,
language = argd['ln'],
lastupdated = __lastupdated__,
navmenuid = 'yourgroups',
secure_page_p = 1)
def wash_login_method(login_method):
"""
Wash the login_method parameter that came from the web input form.
@param login_method: Wanted login_method value as it came from the
web input form.
@type login_method: string
@return: Washed version of login_method. If the login_method
value is valid, then return it. If it is not valid, then
return `Local' (the default login method).
@rtype: string
@warning: Beware, 'Local' is hardcoded here!
"""
if login_method in CFG_EXTERNAL_AUTHENTICATION:
return login_method
else:
return 'Local'
diff --git a/invenio/legacy/webstyle/goto_webinterface.py b/invenio/legacy/webstyle/goto_webinterface.py
index 6931c21fc..eae691535 100644
--- a/invenio/legacy/webstyle/goto_webinterface.py
+++ b/invenio/legacy/webstyle/goto_webinterface.py
@@ -1,79 +1,79 @@
## This file is part of Invenio.
## Copyright (C) 2012 CERN.
##
## Invenio is free software; you can redistribute it and/or
## modify it under the terms of the GNU General Public License as
## published by the Free Software Foundation; either version 2 of the
## License, or (at your option) any later version.
##
## Invenio is distributed in the hope that it will be useful, but
## WITHOUT ANY WARRANTY; without even the implied warranty of
## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
## General Public License for more details.
##
## You should have received a copy of the GNU General Public License
## along with Invenio; if not, write to the Free Software Foundation, Inc.,
## 59 Temple Place, Suite 330, Boston, MA 02111-1307, USA.
"""
Implements persistent URLs
"""
import inspect
from invenio.config import CFG_SITE_URL
from invenio.webinterface_handler import WebInterfaceDirectory
from invenio.utils.apache import SERVER_RETURN, HTTP_NOT_FOUND
from invenio.utils.url import redirect_to_url
from invenio.modules.redirector.api import get_redirection_data
from invenio.webuser import collect_user_info
-from invenio.errorlib import register_exception
+from invenio.ext.logging import register_exception
class WebInterfaceGotoPages(WebInterfaceDirectory):
def _lookup(self, component, path):
try:
redirection_data = get_redirection_data(component)
goto_plugin = redirection_data['plugin']
args, dummy_varargs, dummy_varkw, defaults = inspect.getargspec(goto_plugin)
args = args and list(args) or []
args.reverse()
defaults = defaults and list(defaults) or []
defaults.reverse()
params_to_pass = {}
for arg, default in map(None, args, defaults):
params_to_pass[arg] = default
def goto_handler(req, form):
## Let's put what is in the GET query
for key, value in dict(form).items():
if key in params_to_pass:
params_to_pass[key] = str(value)
## Let's override the params_to_pass to the call with the
## arguments in the configuration
configuration_parameters = redirection_data['parameters'] or {}
params_to_pass.update(configuration_parameters)
## Let's add default parameters if the plugin expects them
if 'component' in params_to_pass:
params_to_pass['component'] = component
if 'path' in params_to_pass:
params_to_pass['path'] = path
if 'user_info' in params_to_pass:
params_to_pass['user_info'] = collect_user_info(req)
if 'req' in params_to_pass:
params_to_pass['req'] = req
try:
new_url = goto_plugin(**params_to_pass)
except Exception, err:
register_exception(req=req, alert_admin=True)
raise SERVER_RETURN(HTTP_NOT_FOUND)
if new_url:
if new_url.startswith('/'):
new_url = CFG_SITE_URL + new_url
redirect_to_url(req, new_url)
else:
raise SERVER_RETURN(HTTP_NOT_FOUND)
return goto_handler, []
except ValueError:
return None, []
diff --git a/invenio/legacy/websubmit/admin_engine.py b/invenio/legacy/websubmit/admin_engine.py
index 7ca8f33ee..3bb66b2a9 100644
--- a/invenio/legacy/websubmit/admin_engine.py
+++ b/invenio/legacy/websubmit/admin_engine.py
@@ -1,4246 +1,4246 @@
# -*- coding: utf-8 -*-
##
## This file is part of Invenio.
## Copyright (C) 2006, 2007, 2008, 2009, 2010, 2011 CERN.
##
## Invenio is free software; you can redistribute it and/or
## modify it under the terms of the GNU General Public License as
## published by the Free Software Foundation; either version 2 of the
## License, or (at your option) any later version.
##
## Invenio is distributed in the hope that it will be useful, but
## WITHOUT ANY WARRANTY; without even the implied warranty of
## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
## General Public License for more details.
##
## You should have received a copy of the GNU General Public License
## along with Invenio; if not, write to the Free Software Foundation, Inc.,
## 59 Temple Place, Suite 330, Boston, MA 02111-1307, USA.
__revision__ = "$Id$"
import re
from os.path import split, basename, isfile
from os import access, F_OK, R_OK, getpid, rename, unlink
from time import strftime, localtime
from invenio.websubmitadmin_dblayer import *
from invenio.websubmitadmin_config import *
from invenio.legacy.websubmit.config import CFG_RESERVED_SUBMISSION_FILENAMES
from invenio.access_control_admin import acc_get_all_roles, acc_get_role_users, acc_delete_user_role
from invenio.config import CFG_SITE_LANG, CFG_WEBSUBMIT_BIBCONVERTCONFIGDIR
from invenio.access_control_engine import acc_authorize_action
-from invenio.errorlib import register_exception
+from invenio.ext.logging import register_exception
from invenio.websubmitadmin_config import InvenioWebSubmitWarning
from invenio.base.i18n import gettext_set_language
import invenio.template
try:
websubmitadmin_templates = invenio.template.load('websubmitadmin')
except:
pass
## utility functions:
def is_adminuser(req, role):
"""check if user is a registered administrator. """
return acc_authorize_action(req, role)
def check_user(req, role, adminarea=2, authorized=0):
(auth_code, auth_message) = is_adminuser(req, role)
if not authorized and auth_code != 0:
return ("false", auth_message)
return ("", auth_message)
def get_navtrail(ln=CFG_SITE_LANG):
"""gets the navtrail for title...
@param title: title of the page
@param ln: language
@return: HTML output
"""
navtrail = websubmitadmin_templates.tmpl_navtrail(ln)
return navtrail
def stringify_listvars(mylist):
"""Accept a list (or a list of lists) (or tuples).
Convert each item in the list, into a string (replace None with the empty
string "").
@param mylist: A list/tuple of values, or a list/tuple of value list/tuples.
@return: a tuple of string values or a tuple of string value tuples
"""
string_list = []
try:
if type(mylist[0]) in (tuple,list):
for row in mylist:
string_list.append(map(lambda x: x is not None and str(x) or "", row))
else:
string_list = map(lambda x: x is not None and str(x) or "", mylist)
except IndexError:
pass
return string_list
def save_update_to_file(filepath, filecontent, notruncate=0, appendmode=0):
"""Save a string value to a file.
Save will create a new file if the file does not exist. Mode can be set to truncate an older file
or to refuse to create the file if it already exists. There is also a mode to "append" the string value
to a file.
@param filepath: (string) the full path to the file
@param filecontent: (string) the content to be written to the file
@param notruncate: (integer) should be 1 or 0, defaults to 0 (ZERO). If 0, existing file will be truncated;
if 1, file will not be written if it already exists
@param appendmode: (integer) should be 1 or 0, defaults to 0 (ZERO). If 1, data will be appended to the file
if it exists; if 0, file will be truncated (or not, depending on the notruncate mode) by new data.
@return: None
@exceptions raised:
- InvenioWebSubmitAdminWarningIOError: when operations involving writing to file failed.
"""
## sanity checking:
if notruncate not in (0, 1):
notruncate = 0
if appendmode not in (0, 1):
appendmode = 0
(fpath, fname) = split(filepath)
if fname == "":
## error opening file
msg = """Unable to open filepath [%s] - couldn't determine a valid filename""" % (filepath,)
raise InvenioWebSubmitAdminWarningIOError(msg)
## if fpath is not empty, append the trailing "/":
if fpath != "":
fpath += "/"
if appendmode == 0:
if notruncate != 0 and access("%s%s" % (fpath, fname), F_OK):
## in no-truncate mode, but file already exists!
msg = """Unable to write to file [%s] in "no-truncate mode" because file already exists"""\
% (fname,)
raise InvenioWebSubmitAdminWarningIOError(msg)
## file already exists, make temporary file first, then move it later
tmpfname = "%s_%s_%s" % (fname, strftime("%Y%m%d%H%M%S", localtime()), getpid())
## open temp file for writing:
try:
fp = open("%s%s" % (fpath, tmpfname), "w")
except IOError, e:
## cannot open file
msg = """Unable to write to file [%s%s] - cannot open file for writing""" % (fpath, fname)
raise InvenioWebSubmitAdminWarningIOError(msg)
## write contents to temp file:
try:
fp.write(filecontent)
fp.flush()
fp.close()
except IOError, e:
## could not write to temp file
msg = """Unable to write to file [%s]""" % (tmpfname,)
## remove the "temp file"
try:
fp.close()
unlink("%s%s" % (fpath, tmpfname))
except IOError:
pass
raise InvenioWebSubmitAdminWarningIOError(msg)
## rename temp file to final filename:
try:
rename("%s%s" % (fpath, tmpfname), "%s%s" % (fpath, fname))
except OSError:
## couldnt rename the tmp file to final file name
msg = """Unable to write to file [%s] - created temporary file [%s], but could not then rename it to [%s]"""\
% (fname, tmpfname, fname)
raise InvenioWebSubmitAdminWarningIOError(msg)
else:
## append mode:
try:
fp = open("%s%s" % (fpath, fname), "a")
except IOError, e:
## cannot open file
msg = """Unable to write to file [%s] - cannot open file for writing in append mode""" % (fname,)
raise InvenioWebSubmitAdminWarningIOError(msg)
## write contents to temp file:
try:
fp.write(filecontent)
fp.flush()
fp.close()
except IOError, e:
## could not write to temp file
msg = """Unable to write to file [%s] in append mode""" % (fname,)
## close the file
try:
fp.close()
except IOError:
pass
raise InvenioWebSubmitAdminWarningIOError(msg)
return
def string_is_alphanumeric_including_underscore(txtstring):
p_txtstring = re.compile(r'^\w*$')
m_txtstring = p_txtstring.search(txtstring)
if m_txtstring is not None:
return 1
else:
return 0
def function_name_is_valid(fname):
p_fname = re.compile(r'^(_|[a-zA-Z])\w*$')
m_fname = p_fname.search(fname)
if m_fname is not None:
return 1
else:
return 0
def wash_single_urlarg(urlarg, argreqdtype, argdefault, maxstrlen=None, minstrlen=None, truncatestr=0):
"""Wash a single argument according to some specifications.
@param urlarg: the argument to be tested, as passed from the form/url, etc
@param argreqdtype: (a python type) the type that the argument should conform to (argument required
type)
@argdefault: the default value that should be returned for the argument in the case that it
doesn't comply with the washing specifications
@param maxstrlen: (integer) the maximum length for a string argument; defaults to None, which means
that no maximum length is forced upon the string
@param minstrlen: (integer) the minimum length for a string argument; defaults to None, which means
that no minimum length is forced upon the string
@truncatestr: (integer) should be 1 or 0 (ZERO). A flag used to determine whether or not a string
argument that overstretches the maximum length (if one if provided) should be truncated, or reset
to the default for the argument. 0, means don't truncate and reset the argument; 1 means truncate
the string.
@return: the washed argument
@exceptions raised:
- ValueError: when it is not possible to cast an argument to the type passed as argreqdtype
"""
## sanity checking:
if maxstrlen is not None and type(maxstrlen) is not int:
maxstrlen = None
elif maxstrlen is int and maxstrlen < 1:
maxstrlen = None
if minstrlen is not None and type(minstrlen) is not int:
minstrlen = None
elif minstrlen is int and minstrlen < 1:
minstrlen = None
result = ""
arg_dst_type = argreqdtype
## if no urlarg, return the default for that argument:
if urlarg is None:
result = argdefault
return result
## get the type of the argument passed:
arg_src_type = type(urlarg)
value = urlarg
# First, handle the case where we want all the results. In
# this case, we need to ensure all the elements are strings,
# and not Field instances.
if arg_src_type in (list, tuple):
if arg_dst_type is list:
result = [str(x) for x in value]
return result
if arg_dst_type is tuple:
result = tuple([str(x) for x in value])
return result
# in all the other cases, we are only interested in the
# first value.
value = value[0]
# Maybe we already have what is expected? Then don't change
# anything.
if arg_src_type is arg_dst_type:
result = value
if arg_dst_type is str and maxstrlen is not None and len(result) > maxstrlen:
if truncatestr != 0:
result = result[0:maxstrlen]
else:
result = argdefault
elif arg_dst_type is str and minstrlen is not None and len(result) < minstrlen:
result = argdefault
return result
if arg_dst_type in (str, int):
try:
result = arg_dst_type(value)
if arg_dst_type is str and maxstrlen is not None and len(result) > maxstrlen:
if truncatestr != 0:
result = result[0:maxstrlen]
else:
result = argdefault
elif arg_dst_type is str and minstrlen is not None and len(result) < minstrlen:
result = argdefault
except:
result = argdefault
elif arg_dst_type is tuple:
result = (value,)
elif arg_dst_type is list:
result = [value]
elif arg_dst_type is dict:
result = {0: str(value)}
else:
raise ValueError('cannot cast form argument into type %r' % (arg_dst_type,))
return result
## Internal Business-Logic functions
## Functions for managing collection order, etc:
def build_submission_collection_tree(collection_id, has_brother_above=0, has_brother_below=0):
## get the name of this collection:
collection_name = get_collection_name(collection_id)
if collection_name is None:
collection_name = "Unknown Collection"
## make a data-structure containing the details of the collection:
collection_node = { 'collection_id' : collection_id, ## collection ID
'collection_name' : collection_name, ## collection Name
'collection_children' : [], ## list of 'collection' children nodes
'doctype_children' : [], ## list of 'doctype' children
'has_brother_above' : has_brother_above, ## has a sibling collection above in score
'has_brother_below' : has_brother_below, ## has a sibling collection below in score
}
## get the IDs and names of all doctypes attached to this collection:
res_doctype_children = get_doctype_children_of_collection(collection_id)
## for each child, add its details to the list of doctype children for this node:
for doctype in res_doctype_children:
doctype_node = { 'doctype_id' : doctype[0],
'doctype_lname' : doctype[1],
'catalogue_order' : doctype[2],
}
collection_node['doctype_children'].append(doctype_node)
## now get details of all collections attached to this one:
res_collection_children = get_collection_children_of_collection(collection_id)
num_collection_children = len(res_collection_children)
for child_num in xrange(0, num_collection_children):
brother_below = brother_above = 0
if child_num > 0:
## this is not the first brother - it has a brother above
brother_above = 1
if child_num < num_collection_children - 1:
## this is not the last brother - it has a brother below
brother_below = 1
collection_node['collection_children'].append(\
build_submission_collection_tree(collection_id=res_collection_children[child_num][0],
has_brother_above=brother_above,
has_brother_below=brother_below))
## return the built collection tree:
return collection_node
def _organise_submission_page_display_submission_tree(user_msg=""):
title = "Organise WebSubmit Main Page"
body = ""
if user_msg == "" or type(user_msg) not in (list, tuple, str, unicode):
user_msg = []
## Get the submissions tree:
submission_collection_tree = build_submission_collection_tree(0)
## Get all 'submission collections':
submission_collections = get_details_of_all_submission_collections()
sub_col = [('0', 'Top Level')]
for collection in submission_collections:
sub_col.append((str(collection[0]), str(collection[1])))
## Get all document types:
doctypes = get_docid_docname_and_docid_alldoctypes()
## build the page:
body = websubmitadmin_templates.tmpl_display_submission_page_organisation(submission_collection_tree=submission_collection_tree,
submission_collections=sub_col,
doctypes=doctypes,
user_msg=user_msg)
return (title, body)
def _delete_submission_collection(sbmcolid):
"""Recursively calls itself to delete a submission-collection and all of its
attached children (and their children, etc) from the submission-tree.
@param sbmcolid: (integer) - the ID of the submission-collection to be deleted.
@return: None
@Exceptions raised: InvenioWebSubmitAdminWarningDeleteFailed when it was not
possible to delete the submission-collection or some of its children.
"""
## Get the collection-children of this submission-collection:
collection_children = get_collection_children_of_collection(sbmcolid)
## recursively move through each collection-child:
for collection_child in collection_children:
_delete_submission_collection(collection_child[0])
## delete all document-types attached to this submission-collection:
error_code = delete_doctype_children_from_submission_collection(sbmcolid)
if error_code != 0:
## Unable to delete all doctype-children:
err_msg = "Unable to delete doctype children of submission-collection [%s]" % sbmcolid
raise InvenioWebSubmitAdminWarningDeleteFailed(err_msg)
## delete this submission-collection's entry from the sbmCOLLECTION_sbmCOLLECTION table:
error_code = delete_submission_collection_from_submission_tree(sbmcolid)
if error_code != 0:
## Unable to delete submission-collection from the submission-tree:
err_msg = "Unable to delete submission-collection [%s] from submission-tree" % sbmcolid
raise InvenioWebSubmitAdminWarningDeleteFailed(err_msg)
## Now delete this submission-collection's details:
error_code = delete_submission_collection_details(sbmcolid)
if error_code != 0:
## Unable to delete the details of the submission-collection:
err_msg = "Unable to delete details of submission-collection [%s]" % sbmcolid
raise InvenioWebSubmitAdminWarningDeleteFailed(err_msg)
## return
return
def perform_request_organise_submission_page(doctype="",
sbmcolid="",
catscore="",
addsbmcollection="",
deletesbmcollection="",
addtosbmcollection="",
adddoctypes="",
movesbmcollectionup="",
movesbmcollectiondown="",
deletedoctypefromsbmcollection="",
movedoctypeupinsbmcollection="",
movedoctypedowninsbmcollection=""):
user_msg = []
body = ""
if "" not in (deletedoctypefromsbmcollection, sbmcolid, catscore, doctype):
## delete a document type from it's position in the tree
error_code = delete_doctype_from_position_on_submission_page(doctype, sbmcolid, catscore)
if error_code == 0:
## doctype deleted - now normalize scores of remaining doctypes:
normalize_scores_of_doctype_children_for_submission_collection(sbmcolid)
user_msg.append("Document type successfully deleted from submissions tree")
else:
user_msg.append("Unable to delete document type from submission-collection")
## display submission-collections:
(title, body) = _organise_submission_page_display_submission_tree(user_msg=user_msg)
elif "" not in (deletesbmcollection, sbmcolid):
## try to delete the submission-collection from the tree:
try:
_delete_submission_collection(sbmcolid)
user_msg.append("Submission-collection successfully deleted from submissions tree")
except InvenioWebSubmitAdminWarningDeleteFailed, excptn:
user_msg.append(str(excptn))
## re-display submission-collections:
(title, body) = _organise_submission_page_display_submission_tree(user_msg=user_msg)
elif "" not in (movedoctypedowninsbmcollection, sbmcolid, doctype, catscore):
## move a doctype down in order for a submission-collection:
## normalize scores of all doctype-children of the submission-collection:
normalize_scores_of_doctype_children_for_submission_collection(sbmcolid)
## swap this doctype with that below it:
## Get score of doctype to move:
score_doctype_to_move = get_catalogue_score_of_doctype_child_of_submission_collection(sbmcolid, doctype)
## Get score of the doctype brother directly below the doctype to be moved:
score_brother_below = get_score_of_next_doctype_child_below(sbmcolid, score_doctype_to_move)
if None in (score_doctype_to_move, score_brother_below):
user_msg.append("Unable to move document type down")
else:
## update the brother below the doctype to be moved to have a score the same as the doctype to be moved:
update_score_of_doctype_child_of_submission_collection_at_scorex(sbmcolid, score_brother_below, score_doctype_to_move)
## Update the doctype to be moved to have a score of the brother directly below it:
update_score_of_doctype_child_of_submission_collection_with_doctypeid_and_scorex(sbmcolid,
doctype,
score_doctype_to_move,
score_brother_below)
user_msg.append("Document type moved down")
(title, body) = _organise_submission_page_display_submission_tree(user_msg=user_msg)
elif "" not in (movedoctypeupinsbmcollection, sbmcolid, doctype, catscore):
## move a doctype up in order for a submission-collection:
## normalize scores of all doctype-children of the submission-collection:
normalize_scores_of_doctype_children_for_submission_collection(sbmcolid)
## swap this doctype with that above it:
## Get score of doctype to move:
score_doctype_to_move = get_catalogue_score_of_doctype_child_of_submission_collection(sbmcolid, doctype)
## Get score of the doctype brother directly above the doctype to be moved:
score_brother_above = get_score_of_previous_doctype_child_above(sbmcolid, score_doctype_to_move)
if None in (score_doctype_to_move, score_brother_above):
user_msg.append("Unable to move document type up")
else:
## update the brother above the doctype to be moved to have a score the same as the doctype to be moved:
update_score_of_doctype_child_of_submission_collection_at_scorex(sbmcolid, score_brother_above, score_doctype_to_move)
## Update the doctype to be moved to have a score of the brother directly above it:
update_score_of_doctype_child_of_submission_collection_with_doctypeid_and_scorex(sbmcolid,
doctype,
score_doctype_to_move,
score_brother_above)
user_msg.append("Document type moved up")
(title, body) = _organise_submission_page_display_submission_tree(user_msg=user_msg)
elif "" not in (movesbmcollectiondown, sbmcolid):
## move a submission-collection down in order:
## Sanity checking:
try:
int(sbmcolid)
except ValueError:
sbmcolid = 0
if int(sbmcolid) != 0:
## Get father ID of submission-collection:
sbmcolidfather = get_id_father_of_collection(sbmcolid)
if sbmcolidfather is None:
user_msg.append("Unable to move submission-collection downwards")
else:
## normalize scores of all collection-children of the father submission-collection:
normalize_scores_of_collection_children_of_collection(sbmcolidfather)
## swap this collection with the one above it:
## get the score of the collection to move:
score_col_to_move = get_score_of_collection_child_of_submission_collection(sbmcolidfather, sbmcolid)
## get the score of the collection brother directly below the collection to be moved:
score_brother_below = get_score_of_next_collection_child_below(sbmcolidfather, score_col_to_move)
if None in (score_col_to_move, score_brother_below):
## Invalid movement
user_msg.append("Unable to move submission collection downwards")
else:
## update the brother below the collection to be moved to have a score the same as the collection to be moved:
update_score_of_collection_child_of_submission_collection_at_scorex(sbmcolidfather,
score_brother_below,
score_col_to_move)
## Update the collection to be moved to have a score of the brother directly below it:
update_score_of_collection_child_of_submission_collection_with_colid_and_scorex(sbmcolidfather,
sbmcolid,
score_col_to_move,
score_brother_below)
user_msg.append("Submission-collection moved downwards")
else:
## cannot move the master (0) collection
user_msg.append("Unable to move submission-collection downwards")
(title, body) = _organise_submission_page_display_submission_tree(user_msg=user_msg)
elif "" not in (movesbmcollectionup, sbmcolid):
## move a submission-collection up in order:
## Sanity checking:
try:
int(sbmcolid)
except ValueError:
sbmcolid = 0
if int(sbmcolid) != 0:
## Get father ID of submission-collection:
sbmcolidfather = get_id_father_of_collection(sbmcolid)
if sbmcolidfather is None:
user_msg.append("Unable to move submission-collection upwards")
else:
## normalize scores of all collection-children of the father submission-collection:
normalize_scores_of_collection_children_of_collection(sbmcolidfather)
## swap this collection with the one above it:
## get the score of the collection to move:
score_col_to_move = get_score_of_collection_child_of_submission_collection(sbmcolidfather, sbmcolid)
## get the score of the collection brother directly above the collection to be moved:
score_brother_above = get_score_of_previous_collection_child_above(sbmcolidfather, score_col_to_move)
if None in (score_col_to_move, score_brother_above):
## Invalid movement
user_msg.append("Unable to move submission collection upwards")
else:
## update the brother above the collection to be moved to have a score the same as the collection to be moved:
update_score_of_collection_child_of_submission_collection_at_scorex(sbmcolidfather,
score_brother_above,
score_col_to_move)
## Update the collection to be moved to have a score of the brother directly above it:
update_score_of_collection_child_of_submission_collection_with_colid_and_scorex(sbmcolidfather,
sbmcolid,
score_col_to_move,
score_brother_above)
user_msg.append("Submission-collection moved upwards")
else:
## cannot move the master (0) collection
user_msg.append("Unable to move submission-collection upwards")
(title, body) = _organise_submission_page_display_submission_tree(user_msg=user_msg)
elif "" not in (addsbmcollection, addtosbmcollection):
## Add a submission-collection, attached to a submission-collection:
## check that the collection to attach to exists:
parent_ok = 0
if int(addtosbmcollection) != 0:
parent_name = get_collection_name(addtosbmcollection)
if parent_name is not None:
parent_ok = 1
else:
parent_ok = 1
if parent_ok != 0:
## create the new collection:
id_son = insert_submission_collection(addsbmcollection)
## get the maximum catalogue score of the existing collection children:
max_child_score = \
get_maximum_catalogue_score_of_collection_children_of_submission_collection(addtosbmcollection)
## add it to the collection, at a higher score than the others have:
new_score = max_child_score + 1
insert_collection_child_for_submission_collection(addtosbmcollection, id_son, new_score)
user_msg.append("Submission-collection added to submissions tree")
else:
## Parent submission-collection does not exist:
user_msg.append("Unable to add submission-collection - parent unknown")
(title, body) = _organise_submission_page_display_submission_tree(user_msg=user_msg)
elif "" not in (adddoctypes, addtosbmcollection):
## Add document type(s) to a submission-collection:
if type(adddoctypes) == str:
adddoctypes = [adddoctypes,]
## Does submission-collection exist?
num_collections_sbmcolid = get_number_of_rows_for_submission_collection(addtosbmcollection)
if num_collections_sbmcolid > 0:
for doctypeid in adddoctypes:
## Check that Doctype exists:
num_doctypes_doctypeid = get_number_doctypes_docid(doctypeid)
if num_doctypes_doctypeid < 1:
## Cannot connect an unknown doctype:
user_msg.append("Unable to connect unknown document-type [%s] to a submission-collection" \
% doctypeid)
continue
else:
## insert the submission-collection/doctype link:
## get the maximum catalogue score of the existing doctype children:
max_child_score = \
get_maximum_catalogue_score_of_doctype_children_of_submission_collection(addtosbmcollection)
## add it to the new doctype, at a higher score than the others have:
new_score = max_child_score + 1
insert_doctype_child_for_submission_collection(addtosbmcollection, doctypeid, new_score)
user_msg.append("Document-type added to submissions tree")
else:
## submission-collection didn't exist
user_msg.append("The selected submission-collection doesn't seem to exist")
## Check that submission-collection exists:
## insert
(title, body) = _organise_submission_page_display_submission_tree(user_msg=user_msg)
else:
## default action - display submission-collections:
(title, body) = _organise_submission_page_display_submission_tree(user_msg=user_msg)
return (title, body)
## Functions for adding new catalgue to DB:
def _add_new_action(actid,actname,working_dir,status_text):
"""Insert the details of a new action into the websubmit system database.
@param actid: unique action id (sactname)
@param actname: action name (lactname)
@param working_dir: directory action works from (dir)
@param status_text: text string indicating action status (statustext)
"""
(actid,actname,working_dir,status_text) = (str(actid).upper(),str(actname),str(working_dir),str(status_text))
err_code = insert_action_details(actid,actname,working_dir,status_text)
return err_code
def perform_request_add_function(funcname=None, funcdescr=None, funcaddcommit=""):
user_msg = []
body = ""
title = "Create New WebSubmit Function"
commit_error=0
## wash args:
if funcname is not None:
try:
funcname = wash_single_urlarg(urlarg=funcname, argreqdtype=str, argdefault="", maxstrlen=40, minstrlen=1)
if function_name_is_valid(fname=funcname) == 0:
funcname = ""
except ValueError, e:
funcname = ""
else:
funcname = ""
if funcdescr is not None:
try:
funcdescr = wash_single_urlarg(urlarg=funcdescr, argreqdtype=str, argdefault="")
except ValueError, e:
funcdescr = ""
else:
funcdescr = ""
## process request:
if funcaddcommit != "" and funcaddcommit is not None:
if funcname == "":
funcname = ""
user_msg.append("""Function name is mandatory and must be a string with no more than 40 characters""")
user_msg.append("""It must contain only alpha-numeric and underscore characters, beginning with a """\
"""letter or underscore""")
commit_error = 1
if commit_error != 0:
## don't commit - just re-display page with message to user
body = websubmitadmin_templates.tmpl_display_addfunctionform(funcdescr=funcdescr, user_msg=user_msg)
return (title, body)
## Add a new function definition - IF it is not already present
err_code = insert_function_details(funcname, funcdescr)
## Handle error code - redisplay form with warning about no DB commit, or display with options
## to edit function:
if err_code == 0:
user_msg.append("""'%s' Function Added to WebSubmit""" % (funcname,))
all_function_parameters = get_distinct_paramname_all_websubmit_function_parameters()
body = websubmitadmin_templates.tmpl_display_addfunctionform(funcname=funcname,
funcdescr=funcdescr,
all_websubmit_func_parameters=all_function_parameters,
perform_act="functionedit",
user_msg=user_msg)
else:
## Could not commit function to WebSubmit DB - redisplay form with function description:
user_msg.append("""Could Not Add '%s' Function to WebSubmit""" % (funcname,))
body = websubmitadmin_templates.tmpl_display_addfunctionform(funcdescr=funcdescr, user_msg=user_msg)
else:
## Display Web form for new function addition:
body = websubmitadmin_templates.tmpl_display_addfunctionform()
return (title, body)
def perform_request_add_action(actid=None, actname=None, working_dir=None, status_text=None, actcommit=""):
"""An interface for the addition of a new WebSubmit action.
If form fields filled, will insert new action into WebSubmit database, else will display
web form prompting for action details.
@param actid: unique id for new action
@param actname: name of new action
@param working_dir: action working directory for WebSubmit core
@param status_text: status text displayed at end of action
@return: tuple containing "title" (title of page), body (page body).
"""
user_msg = []
body = ""
title = "Create New WebSubmit Action"
commit_error=0
## wash args:
if actid is not None:
try:
actid = wash_single_urlarg(urlarg=actid, argreqdtype=str, argdefault="", maxstrlen=3, minstrlen=3)
if string_is_alphanumeric_including_underscore(txtstring=actid) == 0:
actid = ""
except ValueError, e:
actid = ""
else:
actid = ""
if actname is not None:
try:
actname = wash_single_urlarg(urlarg=actname, argreqdtype=str, argdefault="")
except ValueError, e:
actname = ""
else:
actname = ""
if working_dir is not None:
try:
working_dir = wash_single_urlarg(urlarg=working_dir, argreqdtype=str, argdefault="")
except ValueError, e:
working_dir = ""
else:
working_dir = ""
if status_text is not None:
try:
status_text = wash_single_urlarg(urlarg=status_text, argreqdtype=str, argdefault="")
except ValueError, e:
status_text = ""
else:
status_text = ""
## process request:
if actcommit != "" and actcommit is not None:
if actid in ("", None):
actid = ""
user_msg.append("""Action ID is mandatory and must be a 3 letter string""")
commit_error = 1
if actname in ("", None):
actname = ""
user_msg.append("""Action description is mandatory""")
commit_error = 1
if commit_error != 0:
## don't commit - just re-display page with message to user
body = websubmitadmin_templates.tmpl_display_addactionform(actid=actid, actname=actname, working_dir=working_dir,\
status_text=status_text, user_msg=user_msg)
return (title, body)
## Commit new action to WebSubmit DB:
err_code = _add_new_action(actid,actname,working_dir,status_text)
## Handle error code - redisplay form with warning about no DB commit, or move to list
## of actions
if err_code == 0:
## Action added: show page listing WebSubmit actions
user_msg = """'%s' Action Added to WebSubmit""" % (actid,)
all_actions = get_actid_actname_allactions()
body = websubmitadmin_templates.tmpl_display_allactions(all_actions,user_msg=user_msg)
title = "Available WebSubmit Actions"
else:
## Could not commit action to WebSubmit DB redisplay form with completed details and error message
## warnings.append(('ERR_WEBSUBMIT_ADMIN_ADDACTIONFAILDUPLICATE',actid) ## TODO
user_msg = """Could Not Add '%s' Action to WebSubmit""" % (actid,)
body = websubmitadmin_templates.tmpl_display_addactionform(actid=actid, actname=actname, working_dir=working_dir, \
status_text=status_text, user_msg=user_msg)
else:
## Display Web form for new action details:
body = websubmitadmin_templates.tmpl_display_addactionform()
return (title, body)
def perform_request_add_jscheck(chname=None, chdesc=None, chcommit=""):
"""An interface for the addition of a new WebSubmit JavaScript Check, as used on form elements.
If form fields filled, will insert new Check into WebSubmit database, else will display
Web form prompting for Check details.
@param chname: unique id/name for new Check
@param chdesc: description (JavaScript code body) of new Check
@return: tuple containing "title" (title of page), body (page body).
"""
user_msg = []
body = ""
title = "Create New WebSubmit Checking Function"
commit_error=0
## wash args:
if chname is not None:
try:
chname = wash_single_urlarg(urlarg=chname, argreqdtype=str, argdefault="", maxstrlen=15, minstrlen=1)
if function_name_is_valid(fname=chname) == 0:
chname = ""
except ValueError, e:
chname = ""
else:
chname = ""
if chdesc is not None:
try:
chdesc = wash_single_urlarg(urlarg=chdesc, argreqdtype=str, argdefault="")
except ValueError, e:
chdesc = ""
else:
chdesc = ""
## process request:
if chcommit != "" and chcommit is not None:
if chname in ("", None):
chname = ""
user_msg.append("""Check name is mandatory and must be a string with no more than 15 characters""")
user_msg.append("""It must contain only alpha-numeric and underscore characters, beginning with a """\
"""letter or underscore""")
commit_error = 1
if commit_error != 0:
## don't commit - just re-display page with message to user
body = websubmitadmin_templates.tmpl_display_addjscheckform(chname=chname, chdesc=chdesc, user_msg=user_msg)
return (title, body)
## Commit new check to WebSubmit DB:
err_code = insert_jscheck_details(chname, chdesc)
## Handle error code - redisplay form wih warning about no DB commit, or move to list
## of checks
if err_code == 0:
## Check added: show page listing WebSubmit JS Checks
user_msg.append("""'%s' Checking Function Added to WebSubmit""" % (chname,))
all_jschecks = get_chname_alljschecks()
body = websubmitadmin_templates.tmpl_display_alljschecks(all_jschecks, user_msg=user_msg)
title = "Available WebSubmit Checking Functions"
else:
## Could not commit Check to WebSubmit DB: redisplay form with completed details and error message
## TODO : Warning Message
user_msg.append("""Could Not Add '%s' Checking Function to WebSubmit""" % (chname,))
body = websubmitadmin_templates.tmpl_display_addjscheckform(chname=chname, chdesc=chdesc, user_msg=user_msg)
else:
## Display Web form for new check details:
body = websubmitadmin_templates.tmpl_display_addjscheckform()
return (title, body)
def perform_request_add_element(elname=None, elmarccode=None, eltype=None, elsize=None, elrows=None, \
elcols=None, elmaxlength=None, elval=None, elfidesc=None, \
elmodifytext=None, elcommit=""):
"""An interface for adding a new ELEMENT to the WebSubmit DB.
@param elname: (string) element name.
@param elmarccode: (string) element's MARC code.
@param eltype: (character) element type.
@param elsize: (integer) element size.
@param elrows: (integer) number of rows in element.
@param elcols: (integer) number of columns in element.
@param elmaxlength: (integer) maximum length of element
@param elval: (string) default value of element
@param elfidesc: (string) description of element
@param elmodifytext: (string) modification text of element
@param elcommit: (string) If this value is not empty, attempt to commit element details to WebSubmit DB
@return: tuple containing "title" (title of page), body (page body).
"""
user_msg = []
body = ""
title = "Create New WebSubmit Element"
commit_error=0
## wash args:
if elname is not None:
try:
elname = wash_single_urlarg(urlarg=elname, argreqdtype=str, argdefault="", maxstrlen=15, minstrlen=1)
if string_is_alphanumeric_including_underscore(txtstring=elname) == 0:
elname = ""
except ValueError, e:
elname = ""
else:
elname = ""
if elmarccode is not None:
try:
elmarccode = wash_single_urlarg(urlarg=elmarccode, argreqdtype=str, argdefault="")
except ValueError, e:
elmarccode = ""
else:
elmarccode = ""
if eltype is not None:
try:
eltype = wash_single_urlarg(urlarg=eltype, argreqdtype=str, argdefault="", maxstrlen=1, minstrlen=1)
except ValueError, e:
eltype = ""
else:
eltype = ""
if elsize is not None:
try:
elsize = wash_single_urlarg(urlarg=elsize, argreqdtype=int, argdefault="")
except ValueError, e:
elsize = ""
else:
elsize = ""
if elrows is not None:
try:
elrows = wash_single_urlarg(urlarg=elrows, argreqdtype=int, argdefault="")
except ValueError, e:
elrows = ""
else:
elrows = ""
if elcols is not None:
try:
elcols = wash_single_urlarg(urlarg=elcols, argreqdtype=int, argdefault="")
except ValueError, e:
elcols = ""
else:
elcols = ""
if elmaxlength is not None:
try:
elmaxlength = wash_single_urlarg(urlarg=elmaxlength, argreqdtype=int, argdefault="")
except ValueError, e:
elmaxlength = ""
else:
elmaxlength = ""
if elval is not None:
try:
elval = wash_single_urlarg(urlarg=elval, argreqdtype=str, argdefault="")
except ValueError, e:
elval = ""
else:
elval = ""
if elfidesc is not None:
try:
elfidesc = wash_single_urlarg(urlarg=elfidesc, argreqdtype=str, argdefault="")
except ValueError, e:
elfidesc = ""
else:
elfidesc = ""
if elmodifytext is not None:
try:
elmodifytext = wash_single_urlarg(urlarg=elmodifytext, argreqdtype=str, argdefault="")
except ValueError, e:
elmodifytext = ""
else:
elmodifytext = ""
## process request:
if elcommit != "" and elcommit is not None:
if elname == "":
elname = ""
user_msg.append("""The element name is mandatory and must be a string with no more than 15 characters""")
user_msg.append("""It must contain only alpha-numeric and underscore characters""")
commit_error = 1
if eltype == "" or eltype not in ("D", "F", "H", "I", "R", "S", "T"):
eltype = ""
user_msg.append("""The element type is mandatory and must be selected from the list""")
commit_error = 1
if commit_error != 0:
## don't commit - just re-display page with message to user
body = websubmitadmin_templates.tmpl_display_addelementform(elname=elname,
elmarccode=elmarccode,
eltype=eltype,
elsize=str(elsize),
elrows=str(elrows),
elcols=str(elcols),
elmaxlength=str(elmaxlength),
elval=elval,
elfidesc=elfidesc,
elmodifytext=elmodifytext,
user_msg=user_msg,
)
return (title, body)
## Commit new element description to WebSubmit DB:
err_code = insert_element_details(elname=elname, elmarccode=elmarccode, eltype=eltype, \
elsize=elsize, elrows=elrows, elcols=elcols, \
elmaxlength=elmaxlength, elval=elval, elfidesc=elfidesc, \
elmodifytext=elmodifytext)
if err_code == 0:
## Element added: show page listing WebSubmit elements
user_msg.append("""'%s' Element Added to WebSubmit""" % (elname,))
if elname in CFG_RESERVED_SUBMISSION_FILENAMES:
user_msg.append("""WARNING: '%s' is a reserved name. Check WebSubmit admin guide to be aware of possible side-effects.""" % elname)
title = "Available WebSubmit Elements"
all_elements = get_elename_allelements()
body = websubmitadmin_templates.tmpl_display_allelements(all_elements, user_msg=user_msg)
else:
## Could not commit element to WebSubmit DB: redisplay form with completed details and error message
## TODO : Warning Message
user_msg.append("""Could Not Add '%s' Element to WebSubmit""" % (elname,))
body = websubmitadmin_templates.tmpl_display_addelementform(elname=elname,
elmarccode=elmarccode,
eltype=eltype,
elsize=str(elsize),
elrows=str(elrows),
elcols=str(elcols),
elmaxlength=str(elmaxlength),
elval=elval,
elfidesc=elfidesc,
elmodifytext=elmodifytext,
user_msg=user_msg,
)
else:
## Display Web form for new element details:
body = websubmitadmin_templates.tmpl_display_addelementform()
return (title, body)
def perform_request_edit_element(elname, elmarccode=None, eltype=None, elsize=None, \
elrows=None, elcols=None, elmaxlength=None, elval=None, \
elfidesc=None, elmodifytext=None, elcommit=""):
"""An interface for the editing and updating the details of a WebSubmit ELEMENT.
@param elname: element name.
@param elmarccode: element's MARC code.
@param eltype: element type.
@param elsize: element size.
@param elrows: number of rows in element.
@param elcols: number of columns in element.
@param elmaxlength: maximum length of element
@param elval: default value of element
@param elfidesc: description of element
@param elmodifytext: modification text of element
@param elcommit: If this value is not empty, attempt to commit element details to WebSubmit DB
@return: tuple containing "title" (title of page), body (page body).
"""
user_msg = []
body = ""
title = "Edit WebSubmit Element"
commit_error=0
## wash args:
if elname is not None:
try:
elname = wash_single_urlarg(urlarg=elname, argreqdtype=str, argdefault="", maxstrlen=15, minstrlen=1)
if string_is_alphanumeric_including_underscore(txtstring=elname) == 0:
elname = ""
except ValueError, e:
elname = ""
else:
elname = ""
if elmarccode is not None:
try:
elmarccode = wash_single_urlarg(urlarg=elmarccode, argreqdtype=str, argdefault="")
except ValueError, e:
elmarccode = ""
else:
elmarccode = ""
if eltype is not None:
try:
eltype = wash_single_urlarg(urlarg=eltype, argreqdtype=str, argdefault="", maxstrlen=1, minstrlen=1)
except ValueError, e:
eltype = ""
else:
eltype = ""
if elsize is not None:
try:
elsize = wash_single_urlarg(urlarg=elsize, argreqdtype=int, argdefault="")
except ValueError, e:
elsize = ""
else:
elsize = ""
if elrows is not None:
try:
elrows = wash_single_urlarg(urlarg=elrows, argreqdtype=int, argdefault="")
except ValueError, e:
elrows = ""
else:
elrows = ""
if elcols is not None:
try:
elcols = wash_single_urlarg(urlarg=elcols, argreqdtype=int, argdefault="")
except ValueError, e:
elcols = ""
else:
elcols = ""
if elmaxlength is not None:
try:
elmaxlength = wash_single_urlarg(urlarg=elmaxlength, argreqdtype=int, argdefault="")
except ValueError, e:
elmaxlength = ""
else:
elmaxlength = ""
if elval is not None:
try:
elval = wash_single_urlarg(urlarg=elval, argreqdtype=str, argdefault="")
except ValueError, e:
elval = ""
else:
elval = ""
if elfidesc is not None:
try:
elfidesc = wash_single_urlarg(urlarg=elfidesc, argreqdtype=str, argdefault="")
except ValueError, e:
elfidesc = ""
else:
elfidesc = ""
if elmodifytext is not None:
try:
elmodifytext = wash_single_urlarg(urlarg=elmodifytext, argreqdtype=str, argdefault="")
except ValueError, e:
elmodifytext = ""
else:
elmodifytext = ""
## process request:
if elcommit != "" and elcommit is not None:
if elname == "":
elname = ""
user_msg.append("""Invalid Element Name!""")
commit_error = 1
if eltype == "" or eltype not in ("D", "F", "H", "I", "R", "S", "T"):
eltype = ""
user_msg.append("""Invalid Element Type!""")
commit_error = 1
if commit_error != 0:
## don't commit - just re-display page with message to user
all_elements = get_elename_allelements()
user_msg.append("""Could Not Update Element""")
title = "Available WebSubmit Elements"
body = websubmitadmin_templates.tmpl_display_allelements(all_elements, user_msg=user_msg)
return (title, body)
## Commit updated element description to WebSubmit DB:
err_code = update_element_details(elname=elname, elmarccode=elmarccode, eltype=eltype, \
elsize=elsize, elrows=elrows, elcols=elcols, \
elmaxlength=elmaxlength, elval=elval, elfidesc=elfidesc, \
elmodifytext=elmodifytext)
if err_code == 0:
## Element Updated: Show All Element Details Again
user_msg.append("""'%s' Element Updated""" % (elname,))
## Get submission page usage of element:
el_use = get_doctype_action_pagenb_for_submissions_using_element(elname)
element_dets = get_element_details(elname)
element_dets = stringify_listvars(element_dets)
## Take elements from results tuple:
(elmarccode, eltype, elsize, elrows, elcols, elmaxlength, \
elval, elfidesc, elcd, elmd, elmodifytext) = \
(element_dets[0][0], element_dets[0][1], element_dets[0][2], element_dets[0][3], \
element_dets[0][4], element_dets[0][5], element_dets[0][6], element_dets[0][7], \
element_dets[0][8], element_dets[0][9], element_dets[0][10])
## Pass to template:
body = websubmitadmin_templates.tmpl_display_addelementform(elname=elname,
elmarccode=elmarccode,
eltype=eltype,
elsize=elsize,
elrows=elrows,
elcols=elcols,
elmaxlength=elmaxlength,
elval=elval,
elfidesc=elfidesc,
elcd=elcd,
elmd=elmd,
elmodifytext=elmodifytext,
perform_act="elementedit",
user_msg=user_msg,
el_use_tuple=el_use
)
else:
## Could Not Update Element: Maybe Key Violation, or Invalid elname? Redisplay all elements.
## TODO : LOGGING
all_elements = get_elename_allelements()
user_msg.append("""Could Not Update Element '%s'""" % (elname,))
title = "Available WebSubmit Elements"
body = websubmitadmin_templates.tmpl_display_allelements(all_elements, user_msg=user_msg)
else:
## Display Web form containing existing details of element:
element_dets = get_element_details(elname)
## Get submission page usage of element:
el_use = get_doctype_action_pagenb_for_submissions_using_element(elname)
num_rows_ret = len(element_dets)
element_dets = stringify_listvars(element_dets)
if num_rows_ret == 1:
## Display Element details
## Take elements from results tuple:
(elmarccode, eltype, elsize, elrows, elcols, elmaxlength, \
elval, elfidesc, elcd, elmd, elmodifytext) = \
(element_dets[0][0], element_dets[0][1], element_dets[0][2], element_dets[0][3], \
element_dets[0][4], element_dets[0][5], element_dets[0][6], element_dets[0][7], \
element_dets[0][8], element_dets[0][9], element_dets[0][10])
## Pass to template:
body = websubmitadmin_templates.tmpl_display_addelementform(elname=elname,
elmarccode=elmarccode,
eltype=eltype,
elsize=elsize,
elrows=elrows,
elcols=elcols,
elmaxlength=elmaxlength,
elval=elval,
elfidesc=elfidesc,
elcd=elcd,
elmd=elmd,
elmodifytext=elmodifytext,
perform_act="elementedit",
el_use_tuple=el_use
)
else:
## Either no rows, or more than one row for ELEMENT: log error, and display all Elements
## TODO : LOGGING
title = "Available WebSubmit Elements"
all_elements = get_elename_allelements()
if num_rows_ret > 1:
## Key Error - duplicated elname
user_msg.append("""Found Several Rows for Element with Name '%s' - Inform Administrator""" % (elname,))
## LOG MESSAGE
else:
## No rows for ELEMENT
user_msg.append("""Could Not Find Any Rows for Element with Name '%s'""" % (elname,))
## LOG MESSAGE
body = websubmitadmin_templates.tmpl_display_allelements(all_elements, user_msg=user_msg)
return (title, body)
def _display_edit_check_form(chname, user_msg=""):
title = "Edit WebSubmit Checking Function"
if user_msg == "":
user_msg = []
jscheck_dets = get_jscheck_details(chname)
num_rows_ret = len(jscheck_dets)
if num_rows_ret == 1:
## Display Check details
body = websubmitadmin_templates.tmpl_display_addjscheckform(chname=jscheck_dets[0][0],
chdesc=jscheck_dets[0][1],
perform_act="jscheckedit",
cd=jscheck_dets[0][2],
md=jscheck_dets[0][3],
user_msg=user_msg)
else:
## Either no rows, or more than one row for Check: log error, and display all Checks
## TODO : LOGGING
title = "Available WebSubmit Checking Functions"
all_jschecks = get_chname_alljschecks()
if num_rows_ret > 1:
## Key Error - duplicated chname
user_msg.append("""Found Several Rows for Checking Function with Name '%s' - Inform Administrator""" % (chname,))
## LOG MESSAGE
else:
## No rows for action
user_msg.append("""Could Not Find Any Rows for Checking Function with Name '%s'""" % (chname,))
## LOG MESSAGE
body = websubmitadmin_templates.tmpl_display_alljschecks(all_jschecks, user_msg=user_msg)
return (title, body)
def perform_request_edit_jscheck(chname, chdesc=None, chcommit=""):
"""Interface for editing and updating the details of a WebSubmit Check.
If only "chname" provided, will display the details of a Check in a Web form.
If "chdesc" not empty, will assume that this is a call to commit update to Check details.
@param chname: unique id for Check
@param chdesc: modified value for WebSubmit Check description (code body) - (presence invokes update)
@return: tuple containing "title" (title of page), body (page body).
"""
user_msg = []
body = ""
title = "Edit WebSubmit Checking Function"
commit_error=0
## wash args:
if chname is not None:
try:
chname = wash_single_urlarg(urlarg=chname, argreqdtype=str, argdefault="", maxstrlen=15, minstrlen=1)
if function_name_is_valid(fname=chname) == 0:
chname = ""
except ValueError, e:
chname = ""
else:
chname = ""
if chdesc is not None:
try:
chdesc = wash_single_urlarg(urlarg=chdesc, argreqdtype=str, argdefault="")
except ValueError, e:
chdesc = ""
else:
chdesc = ""
(chname, chdesc) = (str(chname), str(chdesc))
if chcommit != "" and chcommit is not None:
if chname in ("", None):
chname = ""
user_msg.append("""Check name is mandatory and must be a string with no more than 15 characters""")
user_msg.append("""It must contain only alpha-numeric and underscore characters, beginning with a """\
"""letter or underscore""")
commit_error = 1
if commit_error != 0:
## don't commit - just re-display page with message to user
all_jschecks = get_chname_alljschecks()
user_msg.append("""Could Not Update Checking Function""")
body = websubmitadmin_templates.tmpl_display_alljschecks(all_jschecks, user_msg=user_msg)
title = "Available WebSubmit Checking Functions"
return (title, body)
## Commit updated Check details to WebSubmit DB:
err_code = update_jscheck_details(chname, chdesc)
if err_code == 0:
## Check Updated: Show All Check Details Again
user_msg.append("""'%s' Check Updated""" % (chname,))
jscheck_dets = get_jscheck_details(chname)
body = websubmitadmin_templates.tmpl_display_addjscheckform(chname=jscheck_dets[0][0],
chdesc=jscheck_dets[0][1],
perform_act="jscheckedit",
cd=jscheck_dets[0][2],
md=jscheck_dets[0][3],
user_msg=user_msg
)
else:
## Could Not Update Check: Maybe Key Violation, or Invalid chname? Redisplay all Checks.
## TODO : LOGGING
all_jschecks = get_chname_alljschecks()
user_msg.append("""Could Not Update Checking Function '%s'""" % (chname,))
body = websubmitadmin_templates.tmpl_display_alljschecks(all_jschecks, user_msg=user_msg)
title = "Available WebSubmit Checking Functions"
else:
## Display Web form containing existing details of Check:
(title, body) = _display_edit_check_form(chname=chname)
return (title, body)
def _display_edit_action_form(actid, user_msg=""):
title = "Edit WebSubmit Action"
if user_msg == "":
user_msg = []
action_dets = get_action_details(actid)
num_rows_ret = len(action_dets)
if num_rows_ret == 1:
## Display action details
body = websubmitadmin_templates.tmpl_display_addactionform(actid=action_dets[0][0],
actname=action_dets[0][1],
working_dir=action_dets[0][2],
status_text=action_dets[0][3],
perform_act="actionedit",
cd=action_dets[0][4],
md=action_dets[0][5],
user_msg=user_msg)
else:
## Either no rows, or more than one row for action: log error, and display all actions
## TODO : LOGGING
title = "Available WebSubmit Actions"
all_actions = get_actid_actname_allactions()
if num_rows_ret > 1:
## Key Error - duplicated actid
user_msg.append("""Found Several Rows for Action with ID '%s' - Inform Administrator""" % (actid,))
## LOG MESSAGE
else:
## No rows for action
user_msg.append("""Could Not Find Any Rows for Action with ID '%s'""" % (actid,))
## LOG MESSAGE
body = websubmitadmin_templates.tmpl_display_allactions(all_actions, user_msg=user_msg)
return (title, body)
def perform_request_edit_action(actid, actname=None, working_dir=None, status_text=None, actcommit=""):
"""Interface for editing and updating the details of a WebSubmit action.
If only "actid" provided, will display the details of an action in a Web form.
If "actname" not empty, will assume that this is a call to commit update to action details.
@param actid: unique id for action
@param actname: modified value for WebSubmit action name/description (presence invokes update)
@param working_dir: modified value for WebSubmit action working_dir
@param status_text: modified value for WebSubmit action status text
@return: tuple containing "title" (title of page), body (page body).
"""
user_msg = []
body = ""
title = "Edit WebSubmit Action"
commit_error = 0
## wash args:
if actid is not None:
try:
actid = wash_single_urlarg(urlarg=actid, argreqdtype=str, argdefault="", maxstrlen=3, minstrlen=3)
if string_is_alphanumeric_including_underscore(txtstring=actid) == 0:
actid = ""
except ValueError, e:
actid = ""
actid = actid.upper()
else:
actid = ""
if actname is not None:
try:
actname = wash_single_urlarg(urlarg=actname, argreqdtype=str, argdefault="")
except ValueError, e:
actname = ""
else:
actname = ""
if working_dir is not None:
try:
working_dir = wash_single_urlarg(urlarg=working_dir, argreqdtype=str, argdefault="")
except ValueError, e:
working_dir = ""
else:
working_dir = ""
if status_text is not None:
try:
status_text = wash_single_urlarg(urlarg=status_text, argreqdtype=str, argdefault="")
except ValueError, e:
status_text = ""
else:
status_text = ""
## process request:
if actcommit != "" and actcommit is not None:
if actname in ("", None):
actname = ""
user_msg.append("""Action description is mandatory""")
commit_error = 1
if commit_error != 0:
## don't commit - just re-display page with message to user
(title, body) = _display_edit_action_form(actid=actid, user_msg=user_msg)
return (title, body)
## Commit updated action details to WebSubmit DB:
err_code = update_action_details(actid, actname, working_dir, status_text)
if err_code == 0:
## Action Updated: Show Action Details Again
user_msg.append("""'%s' Action Updated""" % (actid,))
action_dets = get_action_details(actid)
body = websubmitadmin_templates.tmpl_display_addactionform(actid=action_dets[0][0],
actname=action_dets[0][1],
working_dir=action_dets[0][2],
status_text=action_dets[0][3],
perform_act="actionedit",
cd=action_dets[0][4],
md=action_dets[0][5],
user_msg=user_msg
)
else:
## Could Not Update Action: Maybe Key Violation, or Invalid actid? Redisplay all actions.
## TODO : LOGGING
all_actions = get_actid_actname_allactions()
user_msg.append("""Could Not Update Action '%s'""" % (actid,))
body = websubmitadmin_templates.tmpl_display_allactions(all_actions, user_msg=user_msg)
title = "Available WebSubmit Actions"
else:
## Display Web form containing existing details of action:
(title, body) = _display_edit_action_form(actid=actid)
return (title, body)
def _functionedit_display_function_details(funcname, user_msg=""):
"""Display the details of a function, along with any message to the user that may have been provided.
@param funcname: unique name of function to be updated
@param user_msg: Any message to the user that is to be displayed on the page.
@return: tuple containing (page title, HTML page body).
"""
if user_msg == "":
user_msg = []
title = "Edit WebSubmit Function"
func_descr_res = get_function_description(function=funcname)
num_rows_ret = len(func_descr_res)
if num_rows_ret == 1:
## Display action details
funcdescr = func_descr_res[0][0]
if funcdescr is None:
funcdescr = ""
## get parameters for this function:
this_function_parameters = get_function_parameters(function=funcname)
## get all function parameters in WebSubmit:
all_function_parameters = get_distinct_paramname_all_websubmit_function_parameters()
## get the docstring of the function. Remove leading empty
## lines and remove unnecessary leading whitespaces
docstring = None
try:
websubmit_function = __import__('invenio.legacy.websubmit.functions.%s' % funcname,
globals(), locals(), [funcname])
if hasattr(websubmit_function, funcname) and getattr(websubmit_function, funcname).__doc__:
docstring = getattr(websubmit_function, funcname).__doc__
except Exception, e:
docstring = '''<span style="color:#f00;font-weight:700">Function documentation could
not be loaded</span>.<br/>Please check function definition. Error was:<br/>%s''' % str(e)
if docstring:
docstring = '<pre style="max-height:500px;overflow: auto;">' + _format_function_docstring(docstring) + '</pre>'
body = websubmitadmin_templates.tmpl_display_addfunctionform(funcname=funcname,
funcdescr=funcdescr,
func_parameters=this_function_parameters,
all_websubmit_func_parameters=all_function_parameters,
perform_act="functionedit",
user_msg=user_msg,
func_docstring = docstring
)
else:
## Either no rows, or more than one row for function: log error, and display all functions
## TODO : LOGGING
title = "Available WebSubmit Functions"
all_functions = get_funcname_funcdesc_allfunctions()
if num_rows_ret > 1:
## Key Error - duplicated function name
user_msg.append("""Found Several Rows for Function with Name '%s' - Inform Administrator""" % (funcname,))
## LOG MESSAGE
else:
## No rows for function
user_msg.append("""Could Not Find Any Rows for Function with Name '%s'""" % (funcname,))
## LOG MESSAGE
body = websubmitadmin_templates.tmpl_display_allfunctions(all_functions, user_msg=user_msg)
return (title, body)
def _format_function_docstring(docstring):
"""
Remove unnecessary leading and trailing empty lines, as well as
meaningless leading and trailing whitespaces on every lines
@param docstring: the input docstring to format
@type docstring: string
@return: a formatted docstring
@rtype: string
"""
def count_leading_whitespaces(line):
"Count enumber of leading whitespaces"
line_length = len(line)
pos = 0
while pos < line_length and line[pos] == " ":
pos += 1
return pos
new_docstring_list = []
min_nb_leading_whitespace = len(docstring) # this is really the max possible
# First count min number of leading whitespaces of all lines. Also
# remove leading empty lines.
docstring_has_started_p = False
for line in docstring.splitlines():
if docstring_has_started_p or line.strip():
# A non-empty line has been found, or an emtpy line after
# the beginning of some text was found
docstring_has_started_p = True
new_docstring_list.append(line)
if line.strip():
# If line has some meaningful char, count leading whitespaces
line_nb_spaces = count_leading_whitespaces(line)
if line_nb_spaces < min_nb_leading_whitespace:
min_nb_leading_whitespace = line_nb_spaces
return '\n'.join([line[min_nb_leading_whitespace:] for line in new_docstring_list]).rstrip()
def _functionedit_update_description(funcname, funcdescr):
"""Perform an update of the description for a given function.
@param funcname: unique name of function to be updated
@param funcdescr: description to be updated for funcname
@return: a tuple containing (page title, HTML body content)
"""
user_msg = []
err_code = update_function_description(funcname, funcdescr)
if err_code == 0:
## Function updated - redisplay
user_msg.append("""'%s' Function Description Updated""" % (funcname,))
else:
## Could not update function description
## TODO : ERROR LIBS
user_msg.append("""Could Not Update Description for Function '%s'""" % (funcname,))
## Display function details
(title, body) = _functionedit_display_function_details(funcname=funcname, user_msg=user_msg)
return (title, body)
def _functionedit_delete_parameter(funcname, deleteparam):
"""Delete a parameter from a given function.
Important: if any document types have been using the function from which this parameter will be deleted,
and therefore have values for this parameter, these values will not be deleted from the WebSubmit DB.
The deleted parameter therefore may continue to exist in the WebSubmit DB, but will be disassociated
from this function.
@param funcname: unique name of the function from which the parameter is to be deleted.
@param deleteparam: the name of the parameter to be deleted from the function.
@return: tuple containing (title, HTML body content)
"""
user_msg = []
err_code = delete_function_parameter(function=funcname, parameter_name=deleteparam)
if err_code == 0:
## Parameter deleted - redisplay function details
user_msg.append("""'%s' Parameter Deleted from '%s' Function""" % (deleteparam, funcname))
else:
## could not delete param - it does not exist for this function
## TODO : ERROR LIBS
user_msg.append("""'%s' Parameter Does not Seem to Exist for Function '%s' - Could not Delete""" \
% (deleteparam, funcname))
## Display function details
(title, body) = _functionedit_display_function_details(funcname=funcname, user_msg=user_msg)
return (title, body)
def _functionedit_add_parameter(funcname, funceditaddparam="", funceditaddparamfree=""):
"""Add (connect) a parameter to a given WebSubmit function.
@param funcname: unique name of the function to which the parameter is to be added.
@param funceditaddparam: the value of a HTML select list: if present, will contain the name of the
parameter to be added to the function. May also be empty - the user may have used the free-text field
(funceditaddparamfree) to manually enter the name of a parameter. The important thing is that one
must be present for the parameter to be added sucessfully.
@param funceditaddparamfree: The name of the parameter to be added to the function, as taken from a free-
text HTML input field. May also be empty - the user may have used the HTML select-list (funceditaddparam)
field to choose the parameter. The important thing is that one must be present for the parameter to be
added sucessfully. The value "funceditaddparamfree" value will take priority over the "funceditaddparam"
list value.
@return: tuple containing (title, HTML body content)
"""
user_msg = []
if funceditaddparam in ("", None, "NO_VALUE") and funceditaddparamfree in ("", None):
## no parameter chosen
## TODO : ERROR LIBS
user_msg.append("""Unable to Find the Parameter to be Added to Function '%s' - Could not Add""" % (funcname,))
else:
add_parameter = ""
if funceditaddparam not in ("", None) and funceditaddparamfree not in ("", None):
## both select box and free-text values provided for parameter - prefer free-text
add_parameter = funceditaddparamfree
elif funceditaddparam not in ("", None):
## take add select-box chosen parameter
add_parameter = funceditaddparam
else:
## take add free-text chosen parameter
add_parameter = funceditaddparamfree
## attempt to commit parameter:
err_code = add_function_parameter(function=funcname, parameter_name=add_parameter)
if err_code == 0:
## Parameter added - redisplay function details
user_msg.append("""'%s' Parameter Added to '%s' Function""" % (add_parameter, funcname))
else:
## could not add param - perhaps it already exists for this function
## TODO : ERROR LIBS
user_msg.append("""Could not Add '%s' Parameter to Function '%s' - It Already Exists for this Function""" \
% (add_parameter, funcname))
## Display function details
(title, body) = _functionedit_display_function_details(funcname=funcname, user_msg=user_msg)
return (title, body)
def perform_request_edit_function(funcname, funcdescr=None, funceditaddparam=None, funceditaddparamfree=None,
funceditdelparam=None, funcdescreditcommit="", funcparamdelcommit="",
funcparamaddcommit=""):
"""Edit a WebSubmit function. 3 possibilities: edit the function description; delete a parameter from the
function; add a new parameter to the function.
@param funcname: the name of the function to be modified
@param funcdescr: the new function description
@param funceditaddparam: the name of the parameter to be added to the function (taken from HTML SELECT-list)
@param funceditaddparamfree: the name of the parameter to be added to the function (taken from free-text input)
@param funceditdelparam: the name of the parameter to be deleted from the function
@param funcdescreditcommit: a flag to indicate that this request is to update the description of a function
@param funcparamdelcommit: a flag to indicate that this request is to delete a parameter from a function
@param funcparamaddcommit: a flag to indicate that this request is to add a new parameter to a function
@return: tuple containing (page title, HTML page body)
"""
body = ""
title = "Edit WebSubmit Function"
commit_error = 0
## wash args:
if funcname is not None:
try:
funcname = wash_single_urlarg(urlarg=funcname, argreqdtype=str, argdefault="")
if string_is_alphanumeric_including_underscore(txtstring=funcname) == 0:
funcname = ""
except ValueError, e:
funcname = ""
else:
funcname = ""
if funcdescr is not None:
try:
funcdescr = wash_single_urlarg(urlarg=funcdescr, argreqdtype=str, argdefault="")
except ValueError, e:
funcdescr = ""
else:
funcdescr = ""
if funceditaddparam is not None:
try:
funceditaddparam = wash_single_urlarg(urlarg=funceditaddparam, argreqdtype=str, argdefault="")
if string_is_alphanumeric_including_underscore(txtstring=funceditaddparam) == 0:
funceditaddparam = ""
except ValueError, e:
funceditaddparam = ""
else:
funceditaddparam = ""
if funceditaddparamfree is not None:
try:
funceditaddparamfree = wash_single_urlarg(urlarg=funceditaddparamfree, argreqdtype=str, argdefault="")
if string_is_alphanumeric_including_underscore(txtstring=funceditaddparamfree) == 0:
funceditaddparamfree = ""
except ValueError, e:
funceditaddparamfree = ""
else:
funceditaddparamfree = ""
if funceditdelparam is not None:
try:
funceditdelparam = wash_single_urlarg(urlarg=funceditdelparam, argreqdtype=str, argdefault="")
except ValueError, e:
funceditdelparam = ""
else:
funceditdelparam = ""
if funcname == "":
(title, body) = _functionedit_display_function_details(funcname=funcname)
return (title, body)
if funcdescreditcommit != "" and funcdescreditcommit is not None:
## Update the definition of a function:
(title, body) = _functionedit_update_description(funcname=funcname, funcdescr=funcdescr)
elif funcparamaddcommit != "" and funcparamaddcommit is not None:
## Request to add a new parameter to a function
(title, body) = _functionedit_add_parameter(funcname=funcname,
funceditaddparam=funceditaddparam, funceditaddparamfree=funceditaddparamfree)
elif funcparamdelcommit != "" and funcparamdelcommit is not None:
## Request to delete a parameter from a function
(title, body) = _functionedit_delete_parameter(funcname=funcname, deleteparam=funceditdelparam)
else:
## Display Web form for new function addition:
(title, body) = _functionedit_display_function_details(funcname=funcname)
return (title, body)
def perform_request_function_usage(funcname):
"""Display a page containing the usage details of a given function.
@param funcname: the function name
@return: page body
"""
func_usage = get_function_usage_details(function=funcname)
func_usage = stringify_listvars(func_usage)
body = websubmitadmin_templates.tmpl_display_function_usage(funcname, func_usage)
return body
def perform_request_list_actions():
"""Display a list of all WebSubmit actions.
@return: body where body is a string of HTML, which is a page body.
"""
body = ""
all_actions = get_actid_actname_allactions()
body = websubmitadmin_templates.tmpl_display_allactions(all_actions)
return body
def perform_request_list_doctypes():
"""Display a list of all WebSubmit document types.
@return: body where body is a string of HTML, which is a page body.
"""
body = ""
all_doctypes = get_docid_docname_alldoctypes()
body = websubmitadmin_templates.tmpl_display_alldoctypes(all_doctypes)
return body
def perform_request_list_jschecks():
"""Display a list of all WebSubmit JavaScript element checking functions.
@return: body, where body is a string of HTML, which is a page body.
"""
body = ""
all_jschecks = get_chname_alljschecks()
body = websubmitadmin_templates.tmpl_display_alljschecks(all_jschecks)
return body
def perform_request_list_functions():
"""Display a list of all WebSubmit FUNCTIONS.
@return: body where body is a string of HTML, which is a page body.
"""
body = ""
all_functions = get_funcname_funcdesc_allfunctions()
body = websubmitadmin_templates.tmpl_display_allfunctions(all_functions)
return body
def perform_request_list_elements():
"""Display a list of all WebSubmit ELEMENTS.
@return: body where body is a string of HTML, which is a page body.
"""
body = ""
all_elements = get_elename_allelements()
body = websubmitadmin_templates.tmpl_display_allelements(all_elements)
return body
def _remove_doctype(doctype):
"""Process removal of a document type.
@param doctype: the document type to be removed.
@return: a tuple containing page title, and HTML page body)
"""
title = ""
body = ""
user_msg = []
numrows_doctype = get_number_doctypes_docid(docid=doctype)
if numrows_doctype == 1:
## Doctype is unique and can therefore be deleted:
## Delete any function parameters for this document type:
error_code = delete_all_parameters_doctype(doctype=doctype)
if error_code != 0:
## problem deleting some or all parameters - inform user and log error
## TODO : ERROR LOGGING
user_msg.append("""Unable to delete some or all function parameter values for document type "%s".""" % (doctype,))
## delete all functions called by this doctype's actions
error_code = delete_all_functions_doctype(doctype=doctype)
if error_code != 0:
## problem deleting some or all functions - inform user and log error
## TODO : ERROR LOGGING
user_msg.append("""Unable to delete some or all functions for document type "%s".""" % (doctype,))
## delete all categories of this doctype
error_code = delete_all_categories_doctype(doctype=doctype)
if error_code != 0:
## problem deleting some or all categories - inform user and log error
## TODO : ERROR LOGGING
user_msg.append("""Unable to delete some or all parameters for document type "%s".""" % (doctype,))
## delete all submission interface fields for this doctype
error_code = delete_all_submissionfields_doctype(doctype=doctype)
if error_code != 0:
## problem deleting some or all submission fields - inform user and log error
## TODO : ERROR LOGGING
user_msg.append("""Unable to delete some or all submission fields for document type "%s".""" % (doctype,))
## delete all submissions for this doctype
error_code = delete_all_submissions_doctype(doctype)
if error_code != 0:
## problem deleting some or all submissions - inform user and log error
## TODO : ERROR LOGGING
user_msg.append("""Unable to delete some or all submissions for document type "%s".""" % (doctype,))
## delete entry for this doctype in the collection-doctypes table
error_code = delete_collection_doctype_entry_doctype(doctype)
if error_code != 0:
## problem deleting this doctype from the collection-doctypes table
## TODO : ERROR LOGGING
user_msg.append("""Unable to delete document type "%s" from the collection-doctypes table.""" % (doctype,))
## delete the doctype itself
error_code = delete_doctype(doctype)
if error_code != 0:
## problem deleting this doctype from the doctypes table
## TODO : ERROR LOGGING
user_msg.append("""Unable to delete document type "%s" from the document types table.""" % (doctype,))
user_msg.append("""The "%s" document type should now have been deleted, but you should not ignore any warnings.""" % (doctype,))
title = """Available WebSubmit Document Types"""
all_doctypes = get_docid_docname_alldoctypes()
body = websubmitadmin_templates.tmpl_display_alldoctypes(doctypes=all_doctypes, user_msg=user_msg)
else:
## doctype is not unique and cannot be deleted
if numrows_doctype > 1:
## doctype is duplicated - cannot delete - needs admin intervention
## TODO : LOG ERROR
user_msg.append("""%s WebSubmit document types have been identified for doctype id "%s" - unable to delete.""" \
""" Please inform administrator.""" % (numrows_doctype, doctype))
else:
## no document types found for this doctype id
## TODO : LOG ERROR
user_msg.append("""Unable to find any document types in the WebSubmit database for doctype id "%s" - unable to delete""" \
% (doctype,))
## get a list of all document types, and once more display the delete form, with the message
alldoctypes = get_docid_docname_and_docid_alldoctypes()
title = "Remove WebSubmit Doctument Type"
body = websubmitadmin_templates.tmpl_display_delete_doctype_form(doctype="", alldoctypes=alldoctypes, user_msg=user_msg)
return (title, body)
def perform_request_remove_doctype(doctype="", doctypedelete="", doctypedeleteconfirm=""):
"""Remove a document type from WebSubmit.
@param doctype: the document type to be removed
@doctypedelete: flag to signal that a confirmation for deletion should be displayed
@doctypedeleteconfirm: flag to signal that confirmation for deletion has been received and
the doctype should be removed
@return: a tuple (title, body)
"""
body = ""
title = "Remove WebSubmit Document Type"
if doctypedeleteconfirm not in ("", None):
## Delete the document type:
(title, body) = _remove_doctype(doctype=doctype)
else:
## Display "doctype delete form"
if doctypedelete not in ("", None) and doctype not in ("", None):
## don't bother to get list of doctypes - user will be prompted to confirm the deletion of "doctype"
alldoctypes = None
else:
## get list of all doctypes to pass to template so that it can prompt the user to choose a doctype to delete
## alldoctypes = get_docid_docname_alldoctypes()
alldoctypes = get_docid_docname_and_docid_alldoctypes()
body = websubmitadmin_templates.tmpl_display_delete_doctype_form(doctype=doctype, alldoctypes=alldoctypes)
return (title, body)
def _create_add_doctype_form(doctype="", doctypename="", doctypedescr="", clonefrom="", user_msg=""):
"""Perform the steps necessary to create the "add a new doctype" form.
@param doctype: The unique ID that is to be used for the new doctype.
@param doctypename: the name that is to be given to a doctype.
@param doctypedescr: the description to be allocated to the new doctype.
@param user_msg: any message to be displayed to the user.
@return: a tuple containing page title and HTML body of page: (title, body)
"""
title = """Add New WebSubmit Document Type"""
alldoctypes = get_docid_docname_and_docid_alldoctypes()
body = websubmitadmin_templates.tmpl_display_doctypedetails_form(doctype=doctype,
doctypename=doctypename,
doctypedescr=doctypedescr,
clonefrom=clonefrom,
alldoctypes=alldoctypes,
user_msg=user_msg
)
return (title, body)
def _clone_categories_doctype(user_msg, fromdoctype, todoctype):
"""Clone the categories of one document type, to another document type.
@param user_msg: any message to be displayed to the user (this is a list)
@param fromdoctype: the doctype from which categories are to be cloned
@param todoctype: the doctype into which categories are to be cloned
@return: integer value (0/1/2) - if doctype's categories couldn't be deleted, return 0 (cloning failed);
if some categories could be cloned, return 1 (cloning partially successful); if all categories could be
cloned, return 2 (cloning successful).
"""
error_code = clone_categories_fromdoctype_todoctype(fromdoctype=fromdoctype, todoctype=todoctype)
if error_code == 1:
## doctype had existing categories and they could not be deleted
## TODO : LOG ERRORS
user_msg.append("""Categories already existed for the document type "%s" but could not be deleted. Unable to clone""" \
""" categories of doctype "%s".""" % (todoctype, fromdoctype))
return 1 ## cloning failed
elif error_code == 2:
## could not clone all categories for new doctype
## TODO : LOG ERRORS
user_msg.append("""Unable to clone all categories from doctype "%s", for doctype "%s".""" % (fromdoctype, todoctype))
return 2 ## cloning at least partially successful
else:
return 0 ## cloning successful
def _clone_functions_foraction_doctype(user_msg, fromdoctype, todoctype, action):
"""Clone the functions of a given action of one document type, to the same action on another document type.
@param user_msg: any message to be displayed to the user (this is a list)
@param fromdoctype: the doctype from which functions are to be cloned
@param todoctype: the doctype into which functions are to be cloned
@param action: the action for which functions are to be cloned
@return: an integer value (0/1/2). In the case that todoctype had existing functions for the given action and
they could not be deleted return 0, signalling that this is a serious problem; in the case that some
functions were cloned, return 1; in the case that all functions were cloned, return 2.
"""
error_code = clone_functions_foraction_fromdoctype_todoctype(fromdoctype=fromdoctype, todoctype=todoctype, action=action)
if error_code == 1:
## doctype had existing functions for the given action and they could not be deleted
## TODO : LOG ERRORS
user_msg.append("""Functions already existed for the "%s" action of the document type "%s" but they could not be """ \
"""deleted. Unable to clone the functions of Document Type "%s" for action "%s".""" \
% (action, todoctype, fromdoctype, action))
## critical - return 1 to signal this
return 1
elif error_code == 2:
## could not clone all functions for given action for new doctype
## TODO : LOG ERRORS
user_msg.append("""Unable to clone all functions for the "%s" action from doctype "%s", for doctype "%s".""" \
% (action, fromdoctype, todoctype))
return 2 ## not critical
else:
return 0 ## total success
def _clone_functionparameters_foraction_fromdoctype_todoctype(user_msg, fromdoctype, todoctype, action):
"""Clone the parameters/values of a given action of one document type, to the same action on another document type.
@param user_msg: any message to be displayed to the user (this is a list)
@param fromdoctype: the doctype from which parameters are to be cloned
@param todoctype: the doctype into which parameters are to be cloned
@param action: the action for which parameters are to be cloned
@return: 0 if it was not possible to clone all parameters/values; 1 if all parameters/values were cloned successfully.
"""
error_code = clone_functionparameters_foraction_fromdoctype_todoctype(fromdoctype=fromdoctype, \
todoctype=todoctype, action=action)
if error_code in (1, 2):
## something went wrong and it was not possible to clone all parameters/values of "action"/"fromdoctype" for "action"/"todoctype"
## TODO : LOG ERRORS
user_msg.append("""It was not possible to clone all parameter values from the action "%(act)s" of the document type""" \
""" "%(fromdt)s" for the action "%(act)s" of the document type "%(todt)s".""" \
% { 'act' : action, 'fromdt' : fromdoctype, 'todt' : todoctype }
)
return 2 ## to signal that addition wasn't 100% successful
else:
return 0 ## all parameters were cloned
def _add_doctype(doctype, doctypename, doctypedescr, clonefrom):
title = ""
body = ""
user_msg = []
commit_error = 0
if doctype == "":
user_msg.append("""The Document Type ID is mandatory and must be a string with no more than 10 alpha-numeric characters""")
commit_error = 1
if commit_error != 0:
## don't commit - just re-display page with message to user
(title, body) = _create_add_doctype_form(doctypename=doctypename, doctypedescr=doctypedescr, clonefrom=clonefrom, user_msg=user_msg)
return (title, body)
numrows_doctype = get_number_doctypes_docid(docid=doctype)
if numrows_doctype > 0:
## this document type already exists - do not add
## TODO : LOG ERROR
user_msg.append("""A document type identified by "%s" already seems to exist and there cannot be added. Choose another ID.""" \
% (doctype,))
(title, body) = _create_add_doctype_form(doctypename=doctypename, doctypedescr=doctypedescr, clonefrom=clonefrom, user_msg=user_msg)
else:
## proceed with addition
## add the document type details:
error_code = insert_doctype_details(doctype=doctype, doctypename=doctypename, doctypedescr=doctypedescr)
if error_code == 0:
## added successfully
if clonefrom not in ("", "None", None):
## document type should be cloned from "clonefrom"
## first, clone the categories from another doctype:
error_code = _clone_categories_doctype(user_msg=user_msg,
fromdoctype=clonefrom,
todoctype=doctype)
## get details of clonefrom's submissions
all_actnames_submissions_clonefrom = get_actname_all_submissions_doctype(doctype=clonefrom)
if len(all_actnames_submissions_clonefrom) > 0:
## begin cloning
for doc_submission_actname in all_actnames_submissions_clonefrom:
## clone submission details:
action_name = doc_submission_actname[0]
_clone_submission_fromdoctype_todoctype(user_msg=user_msg,
todoctype=doctype, action=action_name, clonefrom=clonefrom)
user_msg.append("""The "%s" document type has been added.""" % (doctype,))
title = """Available WebSubmit Document Types"""
all_doctypes = get_docid_docname_alldoctypes()
body = websubmitadmin_templates.tmpl_display_alldoctypes(doctypes=all_doctypes, user_msg=user_msg)
else:
## could not add document type details - do no more
## TODO : LOG ERROR!
user_msg.append("""Unable to add details for document type "%s".""" % (doctype,))
(title, body) = _create_add_doctype_form(user_msg=user_msg)
return (title, body)
def perform_request_add_doctype(doctype=None, doctypename=None, doctypedescr=None, clonefrom=None, doctypedetailscommit=""):
body = ""
## wash args:
if doctype is not None:
try:
doctype = wash_single_urlarg(urlarg=doctype, argreqdtype=str, argdefault="", maxstrlen=10, minstrlen=1)
if string_is_alphanumeric_including_underscore(txtstring=doctype) == 0:
doctype = ""
except ValueError, e:
doctype = ""
else:
doctype = ""
if doctypename is not None:
try:
doctypename = wash_single_urlarg(urlarg=doctypename, argreqdtype=str, argdefault="")
except ValueError, e:
doctypename = ""
else:
doctypename = ""
if doctypedescr is not None:
try:
doctypedescr = wash_single_urlarg(urlarg=doctypedescr, argreqdtype=str, argdefault="")
except ValueError, e:
doctypedescr = ""
else:
doctypedescr = ""
if clonefrom is not None:
try:
clonefrom = wash_single_urlarg(urlarg=clonefrom, argreqdtype=str, argdefault="None")
except ValueError, e:
clonefrom = "None"
else:
clonefrom = "None"
if doctypedetailscommit not in ("", None):
(title, body) = _add_doctype(doctype=doctype,
doctypename=doctypename, doctypedescr=doctypedescr, clonefrom=clonefrom)
else:
(title, body) = _create_add_doctype_form()
return (title, body)
def _delete_referee_doctype(doctype, categid, refereeid):
"""Delete a referee from a given category of a document type.
@param doctype: the document type from whose category the referee is to be removed
@param categid: the name/ID of the category from which the referee is to be removed
@param refereeid: the id of the referee to be removed from the given category
@return: a tuple containing 2 strings: (page title, page body)
"""
user_msg = []
role_name = """referee_%s_%s""" % (doctype, categid)
error_code = acc_delete_user_role(id_user=refereeid, name_role=role_name)
if error_code > 0:
## referee was deleted from category
user_msg.append(""" "%s".""" % (doctype,))
def _create_list_referees_doctype(doctype):
referees = {}
referees_details = {}
## get all Invenio roles:
all_roles = acc_get_all_roles()
for role in all_roles:
(roleid, rolename) = (role[0], role[1])
if re.match("^referee_%s_" % (doctype,), rolename):
## this is a "referee" role - get users of this role:
role_users = acc_get_role_users(roleid)
if role_users is not None and (type(role_users) in (tuple, list) and len(role_users) > 0):
## this role has users, record them in dictionary:
referees[rolename] = role_users
## for each "group" of referees:
for ref_role in referees.keys():
## get category ID for this referee-role:
try:
categid = re.match("^referee_%s_(.*)" % (doctype,), ref_role).group(1)
## from WebSubmit DB, get categ name for "categid":
if categid != "*":
categ_details = get_all_categories_sname_lname_for_doctype_categsname(doctype=doctype, categsname=categid)
if len(categ_details) > 0:
## if possible to receive details of this category, record them in a tuple in the format:
## ("categ name", (tuple of users details)):
referees_details[ref_role] = (categid, categ_details[0][1], referees[ref_role])
else:
## general referee entry:
referees_details[ref_role] = (categid, "General Referee(s)", referees[ref_role])
except AttributeError:
## there is no category for this role - it is broken, so pass it
pass
return referees_details
def _create_edit_doctype_details_form(doctype, doctypename="", doctypedescr="", doctypedetailscommit="", user_msg=""):
if user_msg == "" or type(user_msg) not in (list, tuple, str, unicode):
user_msg = []
elif type(user_msg) in (str, unicode):
user_msg = [user_msg]
title = "Edit Document Type Details"
doctype_details = get_doctype_docname_descr_cd_md_fordoctype(doctype)
if len(doctype_details) == 1:
docname = doctype_details[0][1]
docdescr = doctype_details[0][2]
(cd, md) = (doctype_details[0][3], doctype_details[0][4])
if doctypedetailscommit != "":
## could not commit details
docname = doctypename
docdescr = doctypedescr
body = websubmitadmin_templates.tmpl_display_doctypedetails_form(doctype=doctype,
doctypename=docname,
doctypedescr=docdescr,
cd=cd,
md=md,
user_msg=user_msg,
perform_act="doctypeconfigure")
else:
## problem retrieving details of doctype:
user_msg.append("""Unable to retrieve details of doctype '%s' - cannot edit.""" % (doctype,),)
## TODO : LOG ERROR
all_doctypes = get_docid_docname_alldoctypes()
body = websubmitadmin_templates.tmpl_display_alldoctypes(doctypes=all_doctypes, user_msg=user_msg)
title = "Available WebSubmit Document Types"
return (title, body)
def _create_add_submission_choose_clonefrom_form(doctype, action, user_msg=""):
if user_msg == "" or type(user_msg) not in (list, tuple, str, unicode):
user_msg = []
elif type(user_msg) in (str, unicode):
user_msg = [user_msg]
if action in ("", None):
user_msg.append("""Unknown Submission""")
(title, body) = _create_configure_doctype_form(doctype=doctype, user_msg=user_msg)
return (title, body)
## does this doctype already have this action?
numrows_doctype_action = get_number_submissions_doctype_action(doctype=doctype, action=action)
if numrows_doctype_action < 1:
## action not present for this doctype - can be added
## get list of all doctypes implementing this action (for possible cloning purposes)
doctypes_implementing_action = get_doctypeid_doctypes_implementing_action(action=action)
## create form to display document types to clone from
title = "Add Submission '%s' to Document Type '%s'" % (action, doctype)
body = websubmitadmin_templates.tmpl_display_submission_clone_form(doctype=doctype,
action=action,
clonefrom_list=doctypes_implementing_action,
user_msg=user_msg
)
else:
## warn user that action already exists for doctype and canot be added, then display all
## details of doctype again
user_msg.append("The Document Type '%s' already implements the Submission '%s' - cannot add it again" \
% (doctype, action))
## TODO : LOG WARNING
(title, body) = _create_configure_doctype_form(doctype=doctype, user_msg=user_msg)
return (title, body)
def _create_add_submission_form(doctype, action, displayed="", buttonorder="", statustext="",
level="", score="", stpage="", endtxt="", user_msg=""):
if user_msg == "" or type(user_msg) not in (list, tuple, str, unicode):
user_msg = []
elif type(user_msg) in (str, unicode):
user_msg = [user_msg]
if action in ("", None):
user_msg.append("""Unknown Submission""")
(title, body) = _create_configure_doctype_form(doctype=doctype, user_msg=user_msg)
return (title, body)
title = "Add Submission '%s' to Document Type '%s'" % (action, doctype)
body = websubmitadmin_templates.tmpl_display_submissiondetails_form(doctype=doctype,
action=action,
displayed=displayed,
buttonorder=buttonorder,
statustext=statustext,
level=level,
score=score,
stpage=stpage,
endtxt=endtxt,
user_msg=user_msg,
saveaction="add"
)
return (title, body)
def _create_delete_submission_form(doctype, action):
user_msg = []
title = """Delete Submission "%s" from Document Type "%s" """ % (action, doctype)
numrows_doctypesubmission = get_number_submissions_doctype_action(doctype=doctype, action=action)
if numrows_doctypesubmission > 0:
## submission exists: create form to delete it:
body = websubmitadmin_templates.tmpl_display_delete_doctypesubmission_form(doctype=doctype, action=action)
else:
## submission doesn't seem to exist. Display details of doctype only:
user_msg.append("""The Submission "%s" doesn't seem to exist for the Document Type "%s" - unable to delete it""" % (action, doctype))
## TODO : LOG ERRORS
(title, body) = _create_configure_doctype_form(doctype=doctype, user_msg=user_msg)
return (title, body)
def _create_edit_submission_form(doctype, action, user_msg=""):
if user_msg == "" or type(user_msg) not in (list, tuple, str, unicode):
user_msg = []
elif type(user_msg) in (str, unicode):
user_msg = [user_msg]
submission_details = get_submissiondetails_doctype_action(doctype=doctype, action=action)
numrows_submission_details = len(submission_details)
if numrows_submission_details == 1:
## correctly retrieved details of submission - display:
submission_details = stringify_listvars(submission_details)
displayed = submission_details[0][3]
buttonorder = submission_details[0][7]
statustext = submission_details[0][8]
level = submission_details[0][9]
score = submission_details[0][10]
stpage = submission_details[0][11]
endtxt = submission_details[0][12]
cd = submission_details[0][5]
md = submission_details[0][6]
title = "Edit Details of '%s' Submission of '%s' Document Type" % (action, doctype)
body = websubmitadmin_templates.tmpl_display_submissiondetails_form(doctype=doctype,
action=action,
displayed=displayed,
buttonorder=buttonorder,
statustext=statustext,
level=level,
score=score,
stpage=stpage,
endtxt=endtxt,
cd=cd,
md=md,
user_msg=user_msg
)
else:
if numrows_submission_details > 1:
## multiple rows for this submission - this is a key violation
user_msg.append("Found multiple rows for the Submission '%s' of the Document Type '%s'" \
% (action, doctype))
else:
## submission does not exist
user_msg.append("The Submission '%s' of the Document Type '%s' doesn't seem to exist." \
% (action, doctype))
## TODO : LOG ERROR
(title, body) = _create_configure_doctype_form(doctype=doctype, user_msg=user_msg)
return (title, body)
def _create_edit_category_form(doctype, categid):
title = "Edit Category Description"
categ_details = get_all_categories_sname_lname_for_doctype_categsname(doctype=doctype, categsname=categid)
if len(categ_details) == 1:
## disaply details
retrieved_categid=categ_details[0][0]
retrieved_categdescr=categ_details[0][1]
body = websubmitadmin_templates.tmpl_display_edit_category_form(doctype=doctype,
categid=retrieved_categid,
categdescr=retrieved_categdescr
)
else:
## problem retrieving details of categ
user_msg = """Unable to retrieve details of category '%s'""" % (categid,)
## TODO : LOG ERRORS
(title, body) = _create_configure_doctype_form(doctype=doctype, user_msg=user_msg)
return (title, body)
def _create_configure_doctype_form(doctype, jumpcategout="", user_msg=""):
title = "Configure Document Type"
body = ""
if user_msg == "" or type(user_msg) not in (list, tuple, str, unicode):
user_msg = []
## get details of doctype:
doctype_details = get_doctype_docname_descr_cd_md_fordoctype(doctype)
docname = doctype_details[0][1]
docdescr = doctype_details[0][2]
(cd, md) = (doctype_details[0][3], doctype_details[0][4])
## get categories for doctype:
doctype_categs = get_all_category_details_for_doctype(doctype=doctype)
## get submissions for doctype:
doctype_submissions = get_submissiondetails_all_submissions_doctype(doctype=doctype)
## get list of actions that this doctype doesn't have:
unlinked_actions = get_actions_sname_lname_not_linked_to_doctype(doctype=doctype)
## get referees for doctype:
referees_dets = _create_list_referees_doctype(doctype=doctype)
body = websubmitadmin_templates.tmpl_configure_doctype_overview(doctype=doctype, doctypename=docname,
doctypedescr=docdescr, doctype_cdate=cd,
doctype_mdate=md, doctype_categories=doctype_categs,
jumpcategout=jumpcategout,
doctype_submissions=doctype_submissions,
doctype_referees=referees_dets,
add_actions_list=unlinked_actions,
user_msg=user_msg)
return (title, body)
def _clone_submission_fromdoctype_todoctype(user_msg, todoctype, action, clonefrom):
## first, delete the submission from todoctype (if it exists):
error_code = delete_submissiondetails_doctype(doctype=todoctype, action=action)
if error_code == 0:
## could be deleted - now clone it
error_code = insert_submission_details_clonefrom_submission(addtodoctype=todoctype, action=action, clonefromdoctype=clonefrom)
if error_code == 0:
## submission inserted
## now clone functions:
error_code = _clone_functions_foraction_doctype(user_msg=user_msg, \
fromdoctype=clonefrom, todoctype=todoctype, action=action)
if error_code in (0, 2):
## no serious error - clone parameters:
error_code = _clone_functionparameters_foraction_fromdoctype_todoctype(user_msg=user_msg,
fromdoctype=clonefrom,
todoctype=todoctype,
action=action)
## now clone pages/elements
error_code = clone_submissionfields_from_doctypesubmission_to_doctypesubmission(fromsub="%s%s" % (action, clonefrom),
tosub="%s%s" % (action, todoctype))
if error_code == 1:
## could not delete all existing submission fields and therefore could no clone submission fields at all
## TODO : LOG ERROR
user_msg.append("""Unable to delete existing submission fields for Submission "%s" of Document Type "%s" - """ \
"""cannot clone submission fields!""" % (action, todoctype))
elif error_code == 2:
## could not clone all fields
## TODO : LOG ERROR
user_msg.append("""Unable to clone all submission fields for submission "%s" on Document Type "%s" from Document""" \
""" Type "%s" """ % (action, todoctype, clonefrom))
else:
## could not insert submission details!
user_msg.append("""Unable to successfully insert details of submission "%s" into Document Type "%s" - cannot clone from "%s" """ \
% (action, todoctype, clonefrom))
## TODO : LOG ERROR
else:
## could not delete details of existing submission (action) from 'todoctype' - cannot clone it as new
user_msg.append("""Unable to delete details of existing Submission "%s" from Document Type "%s" - cannot clone it from "%s" """ \
% (action, todoctype, clonefrom))
## TODO : LOG ERROR
def _add_submission_to_doctype_clone(doctype, action, clonefrom):
user_msg = []
if action in ("", None) or clonefrom in ("", None):
user_msg.append("Unknown action or document type to clone from - cannot add submission")
(title, body) = _create_configure_doctype_form(doctype=doctype, user_msg=user_msg)
return (title, body)
## does action exist?
numrows_action = get_number_actions_with_actid(actid=action)
if numrows_action > 0:
## The action exists, but is it already implemented as a submission by doctype?
numrows_submission_doctype = get_number_submissions_doctype_action(doctype=doctype, action=action)
if numrows_submission_doctype > 0:
## this submission already exists for this document type - unable to add it again
user_msg.append("""The Submission "%s" already exists for Document Type "%s" - cannot add it again""" \
%(action, doctype))
## TODO : LOG ERROR
else:
## clone the submission
_clone_submission_fromdoctype_todoctype(user_msg=user_msg,
todoctype=doctype, action=action, clonefrom=clonefrom)
user_msg.append("""Cloning of Submission "%s" from Document Type "%s" has been carried out. You should not""" \
""" ignore any warnings that you may have seen.""" % (action, clonefrom))
## TODO : LOG WARNING OF NEW SUBMISSION CREATION BY CLONING
else:
## this action doesn't exist! cannot add a submission based upon it!
user_msg.append("The Action '%s' does not seem to exist in WebSubmit. Cannot add it as a Submission!" \
% (action))
## TODO : LOG ERROR
(title, body) = _create_configure_doctype_form(doctype=doctype, user_msg=user_msg)
return (title, body)
def _add_submission_to_doctype(doctype, action, displayed, buttonorder,
statustext, level, score, stpage, endtxt):
user_msg = []
## does "action" exist?
numrows_action = get_number_actions_with_actid(actid=action)
if numrows_action < 1:
## this action does not exist! Can't add a submission based upon it!
user_msg.append("'%s' does not exist in WebSubmit as an Action! Unable to add this submission."\
% (action,))
## TODO : LOG ERROR
(title, body) = _create_configure_doctype_form(doctype=doctype, user_msg=user_msg)
return (title, body)
## Insert the new submission
error_code = insert_submission_details(doctype=doctype, action=action, displayed=displayed,
nbpg="0", buttonorder=buttonorder, statustext=statustext,
level=level, score=score, stpage=stpage, endtext=endtxt)
if error_code == 0:
## successful insert
user_msg.append("""'%s' Submission Successfully Added to Document Type '%s'""" % (action, doctype))
else:
## could not insert submission into doctype
user_msg.append("""Unable to Add '%s' Submission to '%s' Document Type""" % (action, doctype))
## TODO : LOG ERROR
(title, body) = _create_configure_doctype_form(doctype=doctype, user_msg=user_msg)
return (title, body)
def _delete_submission_from_doctype(doctype, action):
"""Delete a submission (action) from the document type identified by "doctype".
@param doctype: the unique ID of the document type from which the submission is to be deleted
@param categid: the action ID of the submission to be deleted from doctype
@return: a tuple containing 2 strings: (page title, page body)
"""
user_msg = []
if action in ("", None):
user_msg.append("Unknown action - cannot delete submission")
(title, body) = _create_configure_doctype_form(doctype=doctype, user_msg=user_msg)
return (title, body)
## delete fields for this submission:
error_code = delete_all_submissionfields_submission("""%s%s""" % (action, doctype) )
if error_code != 0:
## could not successfully delete all fields - report error
user_msg.append("""When deleting Submission "%s" from Document Type "%s", it wasn't possible to delete all Submission Fields""" \
% (action, doctype))
## TODO : LOG ERROR
## delete parameters for this submission:
error_code = delete_functionparameters_doctype_submission(doctype=doctype, action=action)
if error_code != 0:
## could not successfully delete all functions - report error
user_msg.append("""When deleting Submission "%s" from Document Type "%s", it wasn't possible to delete all Function Parameters""" \
% (action, doctype))
## TODO : LOG ERROR
## delete functions for this submission:
error_code = delete_all_functions_foraction_doctype(doctype=doctype, action=action)
if error_code != 0:
## could not successfully delete all functions - report error
user_msg.append("""When deleting Submission "%s" from Document Type "%s", it wasn't possible to delete all Functions""" \
% (action, doctype))
## TODO : LOG ERROR
## delete this submission itself:
error_code = delete_submissiondetails_doctype(doctype=doctype, action=action)
if error_code == 0:
## successful delete
user_msg.append("""The "%s" Submission has been deleted from the "%s" Document Type""" % (action, doctype))
else:
## could not delete category
user_msg.append("""Unable to successfully delete the "%s" Submission from the "%s" Document Type""" % (action, doctype))
## TODO : LOG ERROR
(title, body) = _create_configure_doctype_form(doctype=doctype, user_msg=user_msg)
return (title, body)
def _edit_submission_for_doctype(doctype, action, displayed, buttonorder,
statustext, level, score, stpage, endtxt):
"""Update the details of a given submission belonging to the document type identified by "doctype".
@param doctype: the unique ID of the document type for which the submission is to be updated
@param action: action name of the submission to be updated
@param displayed: displayed on main submission page? (Y/N)
@param buttonorder: button order
@param statustext: statustext
@param level: level
@param score: score
@param stpage: stpage
@param endtxt: endtxt
@return: a tuple of 2 strings: (page title, page body)
"""
user_msg = []
commit_error = 0
if action in ("", None):
user_msg.append("Unknown Action - cannot update submission")
(title, body) = _create_configure_doctype_form(doctype=doctype, user_msg=user_msg)
return (title, body)
error_code = update_submissiondetails_doctype_action(doctype=doctype, action=action, displayed=displayed,
buttonorder=buttonorder, statustext=statustext, level=level,
score=score, stpage=stpage, endtxt=endtxt)
if error_code == 0:
## successful update
user_msg.append("'%s' Submission of '%s' Document Type updated." % (action, doctype) )
else:
## could not update
user_msg.append("Unable to update '%s' Submission of '%s' Document Type." % (action, doctype) )
## TODO : LOG ERROR
(title, body) = _create_configure_doctype_form(doctype=doctype, user_msg=user_msg)
return (title, body)
def _edit_doctype_details(doctype, doctypename, doctypedescr):
"""Update the details (name and/or description) of a document type (identified by doctype.)
@param doctype: the unique ID of the document type to be updated
@param doctypename: the new/updated name for the doctype
@param doctypedescr: the new/updated description for the doctype
@return: a tuple containing 2 strings: (page title, page body)
"""
user_msg = []
error_code = update_doctype_details(doctype=doctype, doctypename=doctypename, doctypedescr=doctypedescr)
if error_code == 0:
## successful update
user_msg.append("""'%s' Document Type Updated""" % (doctype,))
else:
## could not update
user_msg.append("""Unable to Update Doctype '%s'""" % (doctype,))
## TODO : LOG ERROR
(title, body) = _create_configure_doctype_form(doctype=doctype, user_msg=user_msg)
return (title, body)
def _edit_category_for_doctype(doctype, categid, categdescr):
"""Edit the description of a given category (identified by categid), belonging to
the document type identified by doctype.
@param doctype: the unique ID of the document type for which the category is to be modified
@param categid: the unique category ID of the category to be modified
@param categdescr: the new description for the category
@return: at tuple containing 2 strings: (page title, page body)
"""
user_msg = []
if categid in ("", None) or categdescr in ("", None):
## cannot edit unknown category!
user_msg.append("Category ID and Description are both mandatory")
(title, body) = _create_configure_doctype_form(doctype=doctype, user_msg=user_msg)
return (title, body)
error_code = update_category_description_doctype_categ(doctype=doctype, categ=categid, categdescr=categdescr)
if error_code == 0:
## successful update
user_msg.append("""'%s' Category Description Successfully Updated""" % (categid,))
else:
## could not update category description
user_msg.append("""Unable to Description for Category '%s'""" % (categid,))
## TODO : LOG ERROR
(title, body) = _create_configure_doctype_form(doctype=doctype, user_msg=user_msg)
return (title, body)
def _add_category_to_doctype(doctype, categid, categdescr):
"""Add a new category to the document type identified by "doctype".
Category ID, and category description are both mandatory.
@param doctype: the unique ID of the document type to which the category is to be added
@param categid: the unique category ID of the category to be added to doctype
@param categdescr: the description of the category to be added
@return: at tuple containing 2 strings: (page title, page body)
"""
user_msg = []
if categid in ("", None) or categdescr in ("", None):
## cannot add unknown category!
user_msg.append("Category ID and Description are both mandatory")
(title, body) = _create_configure_doctype_form(doctype=doctype, user_msg=user_msg)
return (title, body)
error_code = insert_category_into_doctype(doctype=doctype, categ=categid, categdescr=categdescr)
if error_code == 0:
## successful insert
user_msg.append("""'%s' Category Successfully Added""" % (categid,))
else:
## could not insert category into doctype
user_msg.append("""Unable to Add '%s' Category""" % (categid,))
## TODO : LOG ERROR
(title, body) = _create_configure_doctype_form(doctype=doctype, user_msg=user_msg)
return (title, body)
def _delete_category_from_doctype(doctype, categid):
"""Delete a category (categid) from the document type identified by "doctype".
@param doctype: the unique ID of the document type from which the category is to be deleted
@param categid: the unique category ID of the category to be deleted from doctype
@return: a tuple containing 2 strings: (page title, page body)
"""
user_msg = []
if categid in ("", None):
## cannot delete unknown category!
user_msg.append("Category ID is mandatory")
(title, body) = _create_configure_doctype_form(doctype=doctype, user_msg=user_msg)
return (title, body)
error_code = delete_category_doctype(doctype=doctype, categ=categid)
if error_code == 0:
## successful delete
user_msg.append("""'%s' Category Successfully Deleted""" % (categid,))
else:
## could not delete category
user_msg.append("""Unable to Delete '%s' Category""" % (categid,))
## TODO : LOG ERROR
(title, body) = _create_configure_doctype_form(doctype=doctype, user_msg=user_msg)
return (title, body)
def _jump_category_to_new_score(doctype, jumpcategout, jumpcategin):
user_msg = []
if jumpcategout in ("", None) or jumpcategin in ("", None):
## need both jumpcategout and jumpcategin to move a category:
user_msg.append("Unable to move category - unknown source and/or destination score(s)")
(title, body) = _create_configure_doctype_form(doctype=doctype, user_msg=user_msg)
return (title, body)
## FIXME TODO:
error_code = move_category_to_new_score(doctype, jumpcategout, jumpcategin)
if error_code == 0:
## successful jump of category
user_msg.append("""Successfully Moved [%s] Category""" % (jumpcategout,))
else:
## could not delete category
user_msg.append("""Unable to Move [%s] Category""" % (jumpcategout,))
## TODO : LOG ERROR
(title, body) = _create_configure_doctype_form(doctype=doctype, user_msg=user_msg)
return (title, body)
def _move_category(doctype, categid, movecategup=""):
user_msg = []
if categid in ("", None):
## cannot move unknown category!
user_msg.append("Cannot move an unknown category - category ID is mandatory")
(title, body) = _create_configure_doctype_form(doctype=doctype, user_msg=user_msg)
return (title, body)
if movecategup not in ("", None):
## move the category up in score:
error_code = move_category_by_one_place_in_score(doctype=doctype,
categsname=categid,
direction="up")
else:
## move the category down in score:
error_code = move_category_by_one_place_in_score(doctype=doctype,
categsname=categid,
direction="down")
if error_code == 0:
## successful move of category
user_msg.append("""[%s] Category Successfully Moved""" % (categid,))
else:
## could not delete category
user_msg.append("""Unable to Move [%s] Category""" % (categid,))
## TODO : LOG ERROR
(title, body) = _create_configure_doctype_form(doctype=doctype, user_msg=user_msg)
return (title, body)
def perform_request_configure_doctype(doctype,
doctypename=None,
doctypedescr=None,
doctypedetailsedit="",
doctypedetailscommit="",
doctypecategoryadd="",
doctypecategoryedit="",
doctypecategoryeditcommit="",
doctypecategorydelete="",
doctypesubmissionadd="",
doctypesubmissiondelete="",
doctypesubmissiondeleteconfirm="",
doctypesubmissionedit="",
doctypesubmissionaddclonechosen="",
doctypesubmissionadddetailscommit="",
doctypesubmissioneditdetailscommit="",
categid=None,
categdescr=None,
movecategup=None,
movecategdown=None,
jumpcategout=None,
jumpcategin=None,
action=None,
doctype_cloneactionfrom=None,
displayed=None,
buttonorder=None,
statustext=None,
level=None,
score=None,
stpage=None,
endtxt=None
):
user_msg = []
body = ""
if doctype is not None:
try:
doctype = wash_single_urlarg(urlarg=doctype, argreqdtype=str, argdefault="", maxstrlen=10, minstrlen=1)
if string_is_alphanumeric_including_underscore(txtstring=doctype) == 0:
doctype = ""
except ValueError, e:
doctype = ""
else:
doctype = ""
if action is not None:
try:
action = wash_single_urlarg(urlarg=action, argreqdtype=str, argdefault="", maxstrlen=3, minstrlen=1)
if string_is_alphanumeric_including_underscore(txtstring=action) == 0:
action = ""
except ValueError, e:
action = ""
else:
action = ""
if doctypename is not None:
try:
doctypename = wash_single_urlarg(urlarg=doctypename, argreqdtype=str, argdefault="")
except ValueError, e:
doctypename = ""
else:
doctypename = ""
if doctypedescr is not None:
try:
doctypedescr = wash_single_urlarg(urlarg=doctypedescr, argreqdtype=str, argdefault="")
except ValueError, e:
doctypedescr = ""
else:
doctypedescr = ""
if categid is not None:
try:
categid = wash_single_urlarg(urlarg=categid, argreqdtype=str, argdefault="")
except ValueError, e:
categid = ""
else:
categid = ""
if categdescr is not None:
try:
categdescr = wash_single_urlarg(urlarg=categdescr, argreqdtype=str, argdefault="")
except ValueError, e:
categdescr = ""
else:
categdescr = ""
if doctype_cloneactionfrom is not None:
try:
doctype_cloneactionfrom = wash_single_urlarg(urlarg=doctype_cloneactionfrom, argreqdtype=str, argdefault="", maxstrlen=10, minstrlen=1)
if string_is_alphanumeric_including_underscore(txtstring=doctype_cloneactionfrom) == 0:
doctype_cloneactionfrom = ""
except ValueError, e:
doctype_cloneactionfrom = ""
else:
doctype_cloneactionfrom = ""
if displayed is not None:
try:
displayed = wash_single_urlarg(urlarg=displayed, argreqdtype=str, argdefault="Y", maxstrlen=1, minstrlen=1)
except ValueError, e:
displayed = "Y"
else:
displayed = "Y"
if buttonorder is not None:
try:
buttonorder = wash_single_urlarg(urlarg=buttonorder, argreqdtype=int, argdefault="")
except ValueError, e:
buttonorder = ""
else:
buttonorder = ""
if level is not None:
try:
level = wash_single_urlarg(urlarg=level, argreqdtype=str, argdefault="", maxstrlen=1, minstrlen=1)
except ValueError, e:
level = ""
else:
level = ""
if score is not None:
try:
score = wash_single_urlarg(urlarg=score, argreqdtype=int, argdefault="")
except ValueError, e:
score = ""
else:
score = ""
if stpage is not None:
try:
stpage = wash_single_urlarg(urlarg=stpage, argreqdtype=int, argdefault="")
except ValueError, e:
stpage = ""
else:
stpage = ""
if statustext is not None:
try:
statustext = wash_single_urlarg(urlarg=statustext, argreqdtype=str, argdefault="")
except ValueError, e:
statustext = ""
else:
statustext = ""
if endtxt is not None:
try:
endtxt = wash_single_urlarg(urlarg=endtxt, argreqdtype=str, argdefault="")
except ValueError, e:
endtxt = ""
else:
endtxt = ""
## ensure that there is only one doctype for this doctype ID - simply display all doctypes with warning if not
numrows_doctype = get_number_doctypes_docid(docid=doctype)
if numrows_doctype > 1:
## there are multiple doctypes with this doctype ID:
## TODO : LOG ERROR
user_msg.append("""Multiple document types identified by "%s" exist - cannot configure at this time.""" \
% (doctype,))
all_doctypes = get_docid_docname_alldoctypes()
body = websubmitadmin_templates.tmpl_display_alldoctypes(doctypes=all_doctypes, user_msg=user_msg)
title = "Available WebSubmit Document Types"
return (title, body)
elif numrows_doctype == 0:
## this doctype does not seem to exist:
user_msg.append("""The document type identified by "%s" doesn't exist - cannot configure at this time.""" \
% (doctype,))
## TODO : LOG ERROR
all_doctypes = get_docid_docname_alldoctypes()
body = websubmitadmin_templates.tmpl_display_alldoctypes(doctypes=all_doctypes, user_msg=user_msg)
title = "Available WebSubmit Document Types"
return (title, body)
## since doctype ID is OK, process doctype configuration request:
if doctypedetailsedit not in ("", None):
(title, body) = _create_edit_doctype_details_form(doctype=doctype)
elif doctypedetailscommit not in ("", None):
## commit updated document type details
(title, body) = _edit_doctype_details(doctype=doctype,
doctypename=doctypename, doctypedescr=doctypedescr)
elif doctypecategoryadd not in ("", None):
## add new category:
(title, body) = _add_category_to_doctype(doctype=doctype, categid=categid, categdescr=categdescr)
elif doctypecategoryedit not in ("", None):
## create form to update category description:
(title, body) = _create_edit_category_form(doctype=doctype,
categid=categid)
elif doctypecategoryeditcommit not in ("", None):
## commit updated category description:
(title, body) = _edit_category_for_doctype(doctype=doctype, categid=categid, categdescr=categdescr)
elif doctypecategorydelete not in ("", None):
## delete a category
(title, body) = _delete_category_from_doctype(doctype=doctype, categid=categid)
elif movecategup not in ("", None) or movecategdown not in ("", None):
## move a category up or down in score:
(title, body) = _move_category(doctype=doctype, categid=categid,
movecategup=movecategup)
elif jumpcategout not in ("", None) and jumpcategin not in ("", None):
## jump a category from one score to another:
(title, body) = _jump_category_to_new_score(doctype=doctype, jumpcategout=jumpcategout,
jumpcategin=jumpcategin)
elif doctypesubmissionadd not in ("", None):
## form displaying option of adding doctype:
(title, body) = _create_add_submission_choose_clonefrom_form(doctype=doctype, action=action)
elif doctypesubmissionaddclonechosen not in ("", None):
## add a submission. if there is a document type to be cloned from, then process clone;
## otherwise, present form with details of doctype
if doctype_cloneactionfrom in ("", None, "None"):
## no clone - present form into which details of new submission should be entered
(title, body) = _create_add_submission_form(doctype=doctype, action=action)
else:
## new submission should be cloned from doctype_cloneactionfrom
(title, body) = _add_submission_to_doctype_clone(doctype=doctype, action=action, clonefrom=doctype_cloneactionfrom)
elif doctypesubmissiondelete not in ("", None):
## create form to prompt for confirmation of deletion of a submission:
(title, body) = _create_delete_submission_form(doctype=doctype, action=action)
elif doctypesubmissiondeleteconfirm not in ("", None):
## process the deletion of a submission from the doctype concerned:
(title, body) = _delete_submission_from_doctype(doctype=doctype, action=action)
elif doctypesubmissionedit not in ("", None):
## create form to update details of a submission
(title, body) = _create_edit_submission_form(doctype=doctype, action=action)
elif doctypesubmissioneditdetailscommit not in ("", None):
## commit updated submission details:
(title, body) = _edit_submission_for_doctype(doctype=doctype, action=action,
displayed=displayed, buttonorder=buttonorder, statustext=statustext,
level=level, score=score, stpage=stpage, endtxt=endtxt)
elif doctypesubmissionadddetailscommit not in ("", None):
## commit new submission to doctype (not by cloning)
(title, body) = _add_submission_to_doctype(doctype=doctype, action=action,
displayed=displayed, buttonorder=buttonorder, statustext=statustext,
level=level, score=score, stpage=stpage, endtxt=endtxt)
else:
## default - display root of edit doctype
(title, body) = _create_configure_doctype_form(doctype=doctype, jumpcategout=jumpcategout)
return (title, body)
def _create_configure_doctype_submission_functions_form(doctype,
action,
movefromfunctionname="",
movefromfunctionstep="",
movefromfunctionscore="",
user_msg=""):
title = """Functions of the "%s" Submission of the "%s" Document Type:""" % (action, doctype)
submission_functions = get_functionname_step_score_allfunctions_doctypesubmission(doctype=doctype, action=action)
body = websubmitadmin_templates.tmpl_configuredoctype_display_submissionfunctions(doctype=doctype,
action=action,
movefromfunctionname=movefromfunctionname,
movefromfunctionstep=movefromfunctionstep,
movefromfunctionscore=movefromfunctionscore,
submissionfunctions=submission_functions,
user_msg=user_msg)
return (title, body)
def _create_configure_doctype_submission_functions_add_function_form(doctype, action, addfunctionname="",
addfunctionstep="", addfunctionscore="", user_msg=""):
"""Create a form that allows a user to add a function a submission.
@param doctype: (string) the unique ID of a document type
@param action: (string) the unique ID of an action
@param addfunctionname: (string) the name of the function to be added to the submission (passed in case of page refresh)
@param addfunctionstep: (integer) the step of the submission into which the function is to be added (passed in case of
page refresh)
@param addfunctionscore: (integer) the score at at which the function is to be added (passed in case of page refresh)
@param user_msg: (string or list of strings) any message(s) to be displayed to the user
@return: (tuple) containing 2 strings - (page-title, HTML page-body)
"""
title = """Add a function to the [%s] submission of the [%s] document type""" % (action, doctype)
submission_functions = get_functionname_step_score_allfunctions_doctypesubmission(doctype=doctype, action=action)
## get names of all WebSubmit functions:
all_websubmit_functions = get_names_of_all_functions()
## put names into a list of single-element tuples, so that template can make HTML select list with them:
all_websubmit_functions = map(lambda x: (str(x),), all_websubmit_functions)
## create page body:
body = websubmitadmin_templates.tmpl_configuredoctype_add_submissionfunction(doctype=doctype,
action=action,
cursubmissionfunctions=submission_functions,
allWSfunctions=all_websubmit_functions,
addfunctionname=addfunctionname,
addfunctionstep=addfunctionstep,
addfunctionscore=addfunctionscore,
user_msg=user_msg)
return (title, body)
def _create_configure_doctype_submission_functions_list_parameters_form(doctype,
action,
functionname,
user_msg=""):
title = """Parameters of the %s function, as used in the %s document type"""\
% (functionname, doctype)
funcparams = get_function_parameters(function=functionname)
if len(funcparams) > 0:
## get the values
paramslist = map(lambda x: str(x[0]), funcparams)
params = get_function_parameter_vals_doctype(doctype=doctype, paramlist=paramslist)
else:
params = ()
## params = get_parameters_name_and_value_for_function_of_doctype(doctype=doctype, function=functionname)
body = websubmitadmin_templates.tmpl_configuredoctype_list_functionparameters(doctype=doctype,
action=action,
function=functionname,
params=params,
user_msg=user_msg)
return (title, body)
def _update_submission_function_parameter_file(doctype, action, functionname,
paramname, paramfilecontent):
user_msg = []
## get the filename:
paramval_res = get_value_of_parameter_for_doctype(doctype=doctype, parameter=paramname)
if paramval_res is None:
## this parameter doesn't exist for this doctype!
user_msg.append("The parameter [%s] doesn't exist for the document type [%s]!" % (paramname, doctype))
(title, body) = _create_configure_doctype_submission_functions_list_parameters_form(doctype=doctype,
action=action,
functionname=functionname,
user_msg=user_msg)
return (title, body)
paramval = str(paramval_res)
filename = basename(paramval)
if filename == "":
## invalid filename
user_msg.append("[%s] is an invalid filename - cannot save details" % (paramval,))
(title, body) = _create_configure_doctype_submission_functions_list_parameters_form(doctype=doctype,
action=action,
functionname=functionname,
user_msg=user_msg)
return (title, body)
## save file:
try:
save_update_to_file(filepath="%s/%s" % (CFG_WEBSUBMIT_BIBCONVERTCONFIGDIR, filename), filecontent=paramfilecontent)
except InvenioWebSubmitAdminWarningIOError, e:
## could not correctly update the file!
user_msg.append(str(e))
(title, body) = _create_configure_doctype_submission_functions_list_parameters_form(doctype=doctype,
action=action,
functionname=functionname,
user_msg=user_msg)
return (title, body)
## redisplay form
user_msg.append("""[%s] file updated""" % (filename,))
(title, body) = _create_configure_doctype_submission_functions_edit_parameter_file_form(doctype=doctype,
action=action,
functionname=functionname,
paramname=paramname,
user_msg=user_msg)
return (title, body)
def _create_configure_doctype_submission_functions_edit_parameter_file_form(doctype,
action,
functionname,
paramname,
user_msg=""):
if type(user_msg) is not list:
user_msg = []
paramval_res = get_value_of_parameter_for_doctype(doctype=doctype, parameter=paramname)
if paramval_res is None:
## this parameter doesn't exist for this doctype!
user_msg.append("The parameter [%s] doesn't exist for the document type [%s]!" % (paramname, doctype))
(title, body) = _create_configure_doctype_submission_functions_list_parameters_form(doctype=doctype,
action=action,
functionname=functionname)
return (title, body)
paramval = str(paramval_res)
title = "Edit the [%s] file for the [%s] document type" % (paramval, doctype)
## get basename of file:
filecontent = ""
filename = basename(paramval)
if filename == "":
## invalid filename
user_msg.append("[%s] is an invalid filename" % (paramval,))
(title, body) = _create_configure_doctype_submission_functions_list_parameters_form(doctype=doctype,
action=action,
functionname=functionname,
user_msg=user_msg)
return (title, body)
## try to read file contents:
if access("%s/%s" % (CFG_WEBSUBMIT_BIBCONVERTCONFIGDIR, filename), F_OK):
## file exists
if access("%s/%s" % (CFG_WEBSUBMIT_BIBCONVERTCONFIGDIR, filename), R_OK) and \
isfile("%s/%s" % (CFG_WEBSUBMIT_BIBCONVERTCONFIGDIR, filename)):
## file is a regular file and is readable - get contents
filecontent = open("%s/%s" % (CFG_WEBSUBMIT_BIBCONVERTCONFIGDIR, filename), "r").read()
else:
if not isfile("%s/%s" % (CFG_WEBSUBMIT_BIBCONVERTCONFIGDIR, filename)):
## file is not a regular file
user_msg.append("The parameter file [%s] is not regular file - unable to read" % (filename,))
else:
## file is not readable - error message
user_msg.append("The parameter file [%s] could not be read - check permissions" % (filename,))
## display page listing the parameters of this function:
(title, body) = _create_configure_doctype_submission_functions_list_parameters_form(doctype=doctype,
action=action,
functionname=functionname,
user_msg=user_msg)
return (title, body)
else:
## file does not exist:
user_msg.append("The parameter file [%s] does not exist - it will be created" % (filename,))
## make page body:
body = websubmitadmin_templates.tmpl_configuredoctype_edit_functionparameter_file(doctype=doctype,
action=action,
function=functionname,
paramname=paramname,
paramfilename=filename,
paramfilecontent=filecontent,
user_msg=user_msg)
return (title, body)
def _create_configure_doctype_submission_functions_edit_parameter_value_form(doctype,
action,
functionname,
paramname,
paramval="",
user_msg=""):
title = """Edit the value of the [%s] Parameter""" % (paramname,)
## get the parameter's value from the DB:
paramval_res = get_value_of_parameter_for_doctype(doctype=doctype, parameter=paramname)
if paramval_res is None:
## this parameter doesn't exist for this doctype!
(title, body) = _create_configure_doctype_submission_functions_list_parameters_form(doctype=doctype,
action=action,
functionname=functionname)
if paramval == "":
## use whatever retrieved paramval_res contains:
paramval = str(paramval_res)
body = websubmitadmin_templates.tmpl_configuredoctype_edit_functionparameter_value(doctype=doctype,
action=action,
function=functionname,
paramname=paramname,
paramval=paramval)
return (title, body)
def _update_submissionfunction_parameter_value(doctype, action, functionname, paramname, paramval):
user_msg = []
try:
update_value_of_function_parameter_for_doctype(doctype=doctype, paramname=paramname, paramval=paramval)
user_msg.append("""The value of the parameter [%s] was updated for document type [%s]""" % (paramname, doctype))
except InvenioWebSubmitAdminWarningTooManyRows, e:
## multiple rows found for param - update not carried out
user_msg.append(str(e))
except InvenioWebSubmitAdminWarningNoRowsFound, e:
## no rows found - parameter does not exist for doctype, therefore no update
user_msg.append(str(e))
(title, body) = \
_create_configure_doctype_submission_functions_list_parameters_form(doctype=doctype, action=action,
functionname=functionname, user_msg=user_msg)
return (title, body)
def perform_request_configure_doctype_submissionfunctions_parameters(doctype,
action,
functionname,
functionstep,
functionscore,
paramname="",
paramval="",
editfunctionparametervalue="",
editfunctionparametervaluecommit="",
editfunctionparameterfile="",
editfunctionparameterfilecommit="",
paramfilename="",
paramfilecontent=""):
body = ""
user_msg = []
## ensure that there is only one doctype for this doctype ID - simply display all doctypes with warning if not
if doctype in ("", None):
user_msg.append("""Unknown Document Type""")
## TODO : LOG ERROR
all_doctypes = get_docid_docname_alldoctypes()
body = websubmitadmin_templates.tmpl_display_alldoctypes(doctypes=all_doctypes, user_msg=user_msg)
title = "Available WebSubmit Document Types"
return (title, body)
numrows_doctype = get_number_doctypes_docid(docid=doctype)
if numrows_doctype > 1:
## there are multiple doctypes with this doctype ID:
## TODO : LOG ERROR
user_msg.append("""Multiple document types identified by "%s" exist - cannot configure at this time.""" \
% (doctype,))
all_doctypes = get_docid_docname_alldoctypes()
body = websubmitadmin_templates.tmpl_display_alldoctypes(doctypes=all_doctypes, user_msg=user_msg)
title = "Available WebSubmit Document Types"
return (title, body)
elif numrows_doctype == 0:
## this doctype does not seem to exist:
user_msg.append("""The document type identified by "%s" doesn't exist - cannot configure at this time.""" \
% (doctype,))
## TODO : LOG ERROR
all_doctypes = get_docid_docname_alldoctypes()
body = websubmitadmin_templates.tmpl_display_alldoctypes(doctypes=all_doctypes, user_msg=user_msg)
title = "Available WebSubmit Document Types"
return (title, body)
## ensure that this submission exists for this doctype:
numrows_submission = get_number_submissions_doctype_action(doctype=doctype, action=action)
if numrows_submission > 1:
## there are multiple submissions for this doctype/action ID:
## TODO : LOG ERROR
user_msg.append("""The Submission "%s" seems to exist multiple times for the Document Type "%s" - cannot configure at this time.""" \
% (action, doctype))
(title, body) = _create_configure_doctype_form(doctype=doctype, user_msg=user_msg)
return (title, body)
elif numrows_submission == 0:
## this submission does not seem to exist for this doctype:
user_msg.append("""The Submission "%s" doesn't exist for the "%s" Document Type - cannot configure at this time.""" \
% (action, doctype))
## TODO : LOG ERROR
(title, body) = _create_configure_doctype_form(doctype=doctype, user_msg=user_msg)
return (title, body)
if editfunctionparametervaluecommit not in ("", None):
## commit an update to a function parameter:
(title, body) = _update_submissionfunction_parameter_value(doctype=doctype, action=action, functionname=functionname,
paramname=paramname, paramval=paramval)
elif editfunctionparametervalue not in ("", None):
## display a form for editing the value of a parameter:
(title, body) = _create_configure_doctype_submission_functions_edit_parameter_value_form(doctype=doctype,
action=action,
functionname=functionname,
paramname=paramname,
paramval=paramval)
elif editfunctionparameterfile not in ("", None):
## display a form for editing the contents of a file, named by the parameter's value:
(title, body) = _create_configure_doctype_submission_functions_edit_parameter_file_form(doctype=doctype,
action=action,
functionname=functionname,
paramname=paramname)
elif editfunctionparameterfilecommit not in ("", None):
(title, body) = _update_submission_function_parameter_file(doctype=doctype, action=action, functionname=functionname,
paramname=paramname, paramfilecontent=paramfilecontent)
else:
## default - display list of parameters for function:
(title, body) = _create_configure_doctype_submission_functions_list_parameters_form(doctype=doctype,
action=action,
functionname=functionname)
return (title, body)
def perform_request_configure_doctype_submissionfunctions(doctype,
action,
moveupfunctionname="",
moveupfunctionstep="",
moveupfunctionscore="",
movedownfunctionname="",
movedownfunctionstep="",
movedownfunctionscore="",
movefromfunctionname="",
movefromfunctionstep="",
movefromfunctionscore="",
movetofunctionname="",
movetofunctionstep="",
movetofunctionscore="",
deletefunctionname="",
deletefunctionstep="",
deletefunctionscore="",
configuresubmissionaddfunction="",
configuresubmissionaddfunctioncommit="",
addfunctionname="",
addfunctionstep="",
addfunctionscore=""):
body = ""
user_msg = []
if addfunctionstep != "":
try:
addfunctionstep = str(wash_single_urlarg(urlarg=addfunctionstep, argreqdtype=int, argdefault=""))
except ValueError, e:
addfunctionstep = ""
if addfunctionscore != "":
try:
addfunctionscore = str(wash_single_urlarg(urlarg=addfunctionscore, argreqdtype=int, argdefault=""))
except ValueError, e:
addfunctionscore = ""
if deletefunctionstep != "":
try:
deletefunctionstep = str(wash_single_urlarg(urlarg=deletefunctionstep, argreqdtype=int, argdefault=""))
except ValueError, e:
deletefunctionstep = ""
if deletefunctionscore != "":
try:
deletefunctionscore = str(wash_single_urlarg(urlarg=deletefunctionscore, argreqdtype=int, argdefault=""))
except ValueError, e:
deletefunctionscore = ""
if movetofunctionstep != "":
try:
movetofunctionstep = str(wash_single_urlarg(urlarg=movetofunctionstep, argreqdtype=int, argdefault=""))
except ValueError, e:
movetofunctionstep = ""
if movetofunctionscore != "":
try:
movetofunctionscore = str(wash_single_urlarg(urlarg=movetofunctionscore, argreqdtype=int, argdefault=""))
except ValueError, e:
movetofunctionscore = ""
if moveupfunctionstep != "":
try:
moveupfunctionstep = str(wash_single_urlarg(urlarg=moveupfunctionstep, argreqdtype=int, argdefault=""))
except ValueError, e:
moveupfunctionstep = ""
if moveupfunctionscore != "":
try:
moveupfunctionscore = str(wash_single_urlarg(urlarg=moveupfunctionscore, argreqdtype=int, argdefault=""))
except ValueError, e:
moveupfunctionscore = ""
if movedownfunctionstep != "":
try:
movedownfunctionstep = str(wash_single_urlarg(urlarg=movedownfunctionstep, argreqdtype=int, argdefault=""))
except ValueError, e:
movedownfunctionstep = ""
if movedownfunctionscore != "":
try:
movedownfunctionscore = str(wash_single_urlarg(urlarg=movedownfunctionscore, argreqdtype=int, argdefault=""))
except ValueError, e:
movedownfunctionscore = ""
## ensure that there is only one doctype for this doctype ID - simply display all doctypes with warning if not
if doctype in ("", None):
user_msg.append("""Unknown Document Type""")
## TODO : LOG ERROR
all_doctypes = get_docid_docname_alldoctypes()
body = websubmitadmin_templates.tmpl_display_alldoctypes(doctypes=all_doctypes, user_msg=user_msg)
title = "Available WebSubmit Document Types"
return (title, body)
numrows_doctype = get_number_doctypes_docid(docid=doctype)
if numrows_doctype > 1:
## there are multiple doctypes with this doctype ID:
## TODO : LOG ERROR
user_msg.append("""Multiple document types identified by "%s" exist - cannot configure at this time.""" \
% (doctype,))
all_doctypes = get_docid_docname_alldoctypes()
body = websubmitadmin_templates.tmpl_display_alldoctypes(doctypes=all_doctypes, user_msg=user_msg)
title = "Available WebSubmit Document Types"
return (title, body)
elif numrows_doctype == 0:
## this doctype does not seem to exist:
user_msg.append("""The document type identified by "%s" doesn't exist - cannot configure at this time.""" \
% (doctype,))
## TODO : LOG ERROR
all_doctypes = get_docid_docname_alldoctypes()
body = websubmitadmin_templates.tmpl_display_alldoctypes(doctypes=all_doctypes, user_msg=user_msg)
title = "Available WebSubmit Document Types"
return (title, body)
## ensure that this submission exists for this doctype:
numrows_submission = get_number_submissions_doctype_action(doctype=doctype, action=action)
if numrows_submission > 1:
## there are multiple submissions for this doctype/action ID:
## TODO : LOG ERROR
user_msg.append("""The Submission "%s" seems to exist multiple times for the Document Type "%s" - cannot configure at this time.""" \
% (action, doctype))
(title, body) = _create_configure_doctype_form(doctype=doctype, user_msg=user_msg)
return (title, body)
elif numrows_submission == 0:
## this submission does not seem to exist for this doctype:
user_msg.append("""The Submission "%s" doesn't exist for the "%s" Document Type - cannot configure at this time.""" \
% (action, doctype))
## TODO : LOG ERROR
(title, body) = _create_configure_doctype_form(doctype=doctype, user_msg=user_msg)
return (title, body)
## submission valid
if movefromfunctionname != "" and movefromfunctionstep != "" and movefromfunctionscore != "" and \
movetofunctionname != "" and movetofunctionstep != "" and movetofunctionscore != "":
## process moving the function by jumping it to another position
try:
move_submission_function_from_one_position_to_another_position(doctype=doctype, action=action,
movefuncname=movefromfunctionname,
movefuncfromstep=movefromfunctionstep,
movefuncfromscore=movefromfunctionscore,
movefunctostep=movetofunctionstep,
movefunctoscore=movetofunctionscore)
user_msg.append("""The function [%s] at step [%s], score [%s] was successfully moved."""\
% (movefromfunctionname, movefromfunctionstep, movefromfunctionscore))
except Exception, e:
## there was a problem
user_msg.append(str(e))
(title, body) = _create_configure_doctype_submission_functions_form(doctype=doctype,
action=action,
user_msg=user_msg)
elif moveupfunctionname != "" and moveupfunctionstep != "" and moveupfunctionscore != "":
## process moving the function up one position
error_code = move_position_submissionfunction_up(doctype=doctype,
action=action,
function=moveupfunctionname,
funccurstep=moveupfunctionstep,
funccurscore=moveupfunctionscore)
if error_code == 0:
## success
user_msg.append("""The Function "%s" that was located at step %s, score %s, has been moved upwards""" \
% (moveupfunctionname, moveupfunctionstep, moveupfunctionscore))
else:
## could not move it
user_msg.append("""Unable to move the Function "%s" that is located at step %s, score %s""" \
% (moveupfunctionname, moveupfunctionstep, moveupfunctionscore))
(title, body) = _create_configure_doctype_submission_functions_form(doctype=doctype,
action=action,
user_msg=user_msg)
elif movedownfunctionname != "" and movedownfunctionstep != "" and movedownfunctionscore != "":
## process moving the function down one position
error_code = move_position_submissionfunction_down(doctype=doctype,
action=action,
function=movedownfunctionname,
funccurstep=movedownfunctionstep,
funccurscore=movedownfunctionscore)
if error_code == 0:
## success
user_msg.append("""The Function "%s" that was located at step %s, score %s, has been moved downwards""" \
% (movedownfunctionname, movedownfunctionstep, movedownfunctionscore))
else:
## could not move it
user_msg.append("""Unable to move the Function "%s" that is located at step %s, score %s""" \
% (movedownfunctionname, movedownfunctionstep, movedownfunctionscore))
(title, body) = _create_configure_doctype_submission_functions_form(doctype=doctype,
action=action,
user_msg=user_msg)
elif deletefunctionname != "" and deletefunctionstep != "" and deletefunctionscore != "":
## process deletion of function from the given position
(title, body) = _delete_submission_function(doctype=doctype, action=action, deletefunctionname=deletefunctionname,
deletefunctionstep=deletefunctionstep, deletefunctionscore=deletefunctionscore)
elif configuresubmissionaddfunction != "":
## display a form that allows the addition of a new WebSubmit function
(title, body) = _create_configure_doctype_submission_functions_add_function_form(doctype=doctype,
action=action)
elif configuresubmissionaddfunctioncommit != "":
## process the addition of the new WebSubmit function to the submission:
(title, body) = _add_function_to_submission(doctype=doctype, action=action, addfunctionname=addfunctionname,
addfunctionstep=addfunctionstep, addfunctionscore=addfunctionscore)
else:
## default - display functions for this submission
(title, body) = _create_configure_doctype_submission_functions_form(doctype=doctype,
action=action,
movefromfunctionname=movefromfunctionname,
movefromfunctionstep=movefromfunctionstep,
movefromfunctionscore=movefromfunctionscore
)
return (title, body)
def _add_function_to_submission(doctype, action, addfunctionname, addfunctionstep, addfunctionscore):
"""Process the addition of a function to a submission.
The user can decide in which step and at which score to insert the function.
@param doctype: (string) the unique ID of a document type
@param action: (string) the unique ID of an action
@param addfunctionname: (string) the name of the function to be added to the submission
@param addfunctionstep: (integer) the step at which the function is to be added
@param addfunctionscore: (integer) the score at which the function is to be added
@return: a tuple containing 2 strings: (page-title, page-body)
"""
user_msg = []
if addfunctionname == "" or addfunctionstep == "" or addfunctionscore == "":
## invalid details!
user_msg.append("""Invalid function coordinates supplied!""")
(title, body) = _create_configure_doctype_submission_functions_add_function_form(doctype=doctype,
action=action,
user_msg=user_msg)
return (title, body)
try:
if int(addfunctionstep) < 1 or int(addfunctionscore) < 1:
## invalid details!
user_msg.append("""Invalid function step and/or score!""")
(title, body) = _create_configure_doctype_submission_functions_add_function_form(doctype=doctype,
action=action,
user_msg=user_msg)
return (title, body)
except ValueError:
user_msg.append("""Invalid function step and/or score!""")
(title, body) = _create_configure_doctype_submission_functions_add_function_form(doctype=doctype,
action=action,
user_msg=user_msg)
try:
insert_function_into_submission_at_step_and_score_then_regulate_scores_of_functions_in_step(doctype=doctype,
action=action,
function=addfunctionname,
step=addfunctionstep,
score=addfunctionscore)
except InvenioWebSubmitAdminWarningReferentialIntegrityViolation, e:
## Function didn't exist in WebSubmit! Not added to submission.
user_msg.append(str(e))
## TODO : LOG ERROR
(title, body) = _create_configure_doctype_submission_functions_add_function_form(doctype=doctype,
action=action,
addfunctionstep=addfunctionstep,
addfunctionscore=addfunctionscore,
user_msg=user_msg)
return (title, body)
except InvenioWebSubmitAdminWarningInsertFailed, e:
## insert failed - some functions within the step may have been corrupted!
user_msg.append(str(e))
## TODO : LOG ERROR
(title, body) = \
_create_configure_doctype_submission_functions_form(doctype=doctype, action=action, user_msg=user_msg)
return (title, body)
except InvenioWebSubmitAdminWarningDeleteFailed, e:
## when regulating the scores of functions within the step, could not delete some or all of the functions
## within the step that the function was added to. Some functions may have been lost!
user_msg.append(str(e))
## TODO : LOG ERROR
(title, body) = \
_create_configure_doctype_submission_functions_form(doctype=doctype, action=action, user_msg=user_msg)
return (title, body)
## Successfully added
user_msg.append("""The function [%s] has been added to submission [%s] at step [%s], score [%s]."""\
% (addfunctionname, "%s%s" % (action, doctype), addfunctionstep, addfunctionscore))
(title, body) = \
_create_configure_doctype_submission_functions_form(doctype=doctype, action=action, user_msg=user_msg)
return (title, body)
def _delete_submission_function(doctype, action, deletefunctionname, deletefunctionstep, deletefunctionscore):
"""Delete a submission function from a given submission. Re-order all functions below it (within the same step)
to fill the gap left by the deleted function.
@param doctype: (string) the unique ID of a document type
@param action: (string) the unique ID of an action
@param deletefunctionname: (string) the name of the function to be deleted from the submission
@param deletefunctionstep: (string) the step of the function to be deleted from the submission
@param deletefunctionscore: (string) the score of the function to be deleted from the submission
@return: tuple containing 2 strings: (page-title, page-body)
"""
user_msg = []
## first, delete the function:
try:
delete_function_at_step_and_score_from_submission(doctype=doctype, action=action,
function=deletefunctionname, step=deletefunctionstep,
score=deletefunctionscore)
except InvenioWebSubmitAdminWarningDeleteFailed, e:
## unable to delete function - error message and return
user_msg.append("""Unable to delete function [%s] at step [%s], score [%s] from submission [%s]""" \
% (deletefunctionname, deletefunctionstep, deletefunctionscore, "%s%s" % (action, doctype)))
## TODO : LOG ERROR
(title, body) = _create_configure_doctype_submission_functions_form(doctype=doctype, action=action, user_msg=user_msg)
return (title, body)
## now, correct the scores of all functions in the step from which a function was just deleted:
try:
regulate_score_of_all_functions_in_step_to_ascending_multiples_of_10_for_submission(doctype=doctype,
action=action,
step=deletefunctionstep)
except InvenioWebSubmitAdminWarningDeleteFailed, e:
## couldnt delete the functions before reordering them
user_msg.append("""Deleted function [%s] at step [%s], score [%s] from submission [%s], but could not re-order""" \
""" scores of remaining functions within step [%s]""" \
% (deletefunctionname, deletefunctionstep, deletefunctionscore,
"%s%s" % (action, doctype), deletefunctionstep))
## TODO : LOG ERROR
(title, body) = _create_configure_doctype_submission_functions_form(doctype=doctype, action=action, user_msg=user_msg)
return (title, body)
## update submission "last-modification" date:
update_modification_date_for_submission(doctype=doctype, action=action)
## success message:
user_msg.append("""Successfully deleted function [%s] at step [%s], score [%s] from submission [%s]""" \
% (deletefunctionname, deletefunctionstep, deletefunctionscore, "%s%s" % (action, doctype)))
## TODO : LOG function Deletion
(title, body) = _create_configure_doctype_submission_functions_form(doctype=doctype, action=action, user_msg=user_msg)
return (title, body)
def perform_request_configure_doctype_submissionpage_preview(doctype, action, pagenum):
"""Display a preview of a Submission Page and its fields.
@param doctype: (string) the unique ID of a document type
@param action: (string) the unique ID of an action
@param pagenum: (integer) the number of the submission page to be previewed
@return: a tuple of four elements. (page-title, page-body)
"""
body = ""
user_msg = []
try:
pagenum = str(pagenum)
except ValueError:
pagenum = ""
if pagenum != "":
try:
pagenum = str(wash_single_urlarg(urlarg=pagenum, argreqdtype=int, argdefault=""))
except ValueError, e:
pagenum = ""
## ensure that the page number for this submission is valid:
num_pages_submission = get_numbersubmissionpages_doctype_action(doctype=doctype, action=action)
try:
if not (int(pagenum) > 0 and int(pagenum) <= num_pages_submission):
user_msg.append("Invalid page number - out of range")
(title, body) = _create_configure_doctype_submission_pages_form(doctype=doctype, action=action, user_msg=user_msg)
return (title, body)
except ValueError:
## invalid page number
user_msg.append("Invalid page number - must be an integer value!")
(title, body) = _create_configure_doctype_submission_pages_form(doctype=doctype, action=action, user_msg=user_msg)
return (title, body)
## get details of all fields on submission page:
fields = get_details_and_description_of_all_fields_on_submissionpage(doctype=doctype, action=action, pagenum=pagenum)
## ensure all values for each field are strings:
string_fields = []
for field in fields:
string_fields.append(stringify_list_elements(field))
title = """A preview of Page %s of the %s Submission""" % (pagenum, "%s%s" % (action, doctype))
body = websubmitadmin_templates.tmpl_configuredoctype_display_submissionpage_preview(doctype=doctype,
action=action,
pagenum=pagenum,
fields=string_fields)
return (title, body)
def perform_request_configure_doctype_submissionpage_elements(doctype, action, pagenum, movefieldfromposn="",
movefieldtoposn="", deletefieldposn="", editfieldposn="",
editfieldposncommit="", fieldname="", fieldtext="", fieldlevel="",
fieldshortdesc="", fieldcheck="", addfield="", addfieldcommit=""):
"""Process requests relating to the elements of a particular submission page"""
body = ""
user_msg = []
try:
pagenum = str(pagenum)
except ValueError:
pagenum = ""
if pagenum != "":
try:
pagenum = str(wash_single_urlarg(urlarg=pagenum, argreqdtype=int, argdefault=""))
except ValueError, e:
pagenum = ""
if fieldname != "":
try:
fieldname = wash_single_urlarg(urlarg=fieldname, argreqdtype=str, argdefault="", maxstrlen=15, minstrlen=1)
if string_is_alphanumeric_including_underscore(txtstring=fieldname) == 0:
fieldname = ""
except ValueError, e:
fieldname = ""
if fieldtext != "":
try:
fieldtext = wash_single_urlarg(urlarg=fieldtext, argreqdtype=str, argdefault="")
except ValueError, e:
fieldtext = ""
if fieldlevel != "":
try:
fieldlevel = wash_single_urlarg(urlarg=fieldlevel, argreqdtype=str, argdefault="O", maxstrlen=1, minstrlen=1)
if string_is_alphanumeric_including_underscore(txtstring=fieldlevel) == 0:
fieldlevel = "O"
if fieldlevel not in ("m", "M", "o", "O"):
fieldlevel = "O"
fieldlevel = fieldlevel.upper()
except ValueError, e:
fieldlevel = "O"
if fieldshortdesc != "":
try:
fieldshortdesc = wash_single_urlarg(urlarg=fieldshortdesc, argreqdtype=str, argdefault="")
except ValueError, e:
fieldshortdesc = ""
if fieldcheck != "":
try:
fieldcheck = wash_single_urlarg(urlarg=fieldcheck, argreqdtype=str, argdefault="", maxstrlen=15, minstrlen=1)
if string_is_alphanumeric_including_underscore(txtstring=fieldcheck) == 0:
fieldcheck = ""
except ValueError, e:
fieldcheck = ""
if editfieldposn != "":
try:
editfieldposn = str(wash_single_urlarg(urlarg=editfieldposn, argreqdtype=int, argdefault=""))
except ValueError, e:
editfieldposn = ""
if deletefieldposn != "":
try:
deletefieldposn = str(wash_single_urlarg(urlarg=deletefieldposn, argreqdtype=int, argdefault=""))
except ValueError, e:
deletefieldposn = ""
if movefieldfromposn != "":
try:
movefieldfromposn = str(wash_single_urlarg(urlarg=movefieldfromposn, argreqdtype=int, argdefault=""))
except ValueError, e:
movefieldfromposn = ""
if movefieldtoposn != "":
try:
movefieldtoposn = str(wash_single_urlarg(urlarg=movefieldtoposn, argreqdtype=int, argdefault=""))
except ValueError, e:
movefieldtoposn = ""
## ensure that there is only one doctype for this doctype ID - simply display all doctypes with warning if not
if doctype in ("", None):
user_msg.append("""Unknown Document Type""")
## TODO : LOG ERROR
all_doctypes = get_docid_docname_alldoctypes()
body = websubmitadmin_templates.tmpl_display_alldoctypes(doctypes=all_doctypes, user_msg=user_msg)
title = "Available WebSubmit Document Types"
return (title, body)
numrows_doctype = get_number_doctypes_docid(docid=doctype)
if numrows_doctype > 1:
## there are multiple doctypes with this doctype ID:
## TODO : LOG ERROR
user_msg.append("""Multiple document types identified by "%s" exist - cannot configure at this time.""" \
% (doctype,))
all_doctypes = get_docid_docname_alldoctypes()
body = websubmitadmin_templates.tmpl_display_alldoctypes(doctypes=all_doctypes, user_msg=user_msg)
title = "Available WebSubmit Document Types"
return (title, body)
elif numrows_doctype == 0:
## this doctype does not seem to exist:
user_msg.append("""The document type identified by "%s" doesn't exist - cannot configure at this time.""" \
% (doctype,))
## TODO : LOG ERROR
all_doctypes = get_docid_docname_alldoctypes()
body = websubmitadmin_templates.tmpl_display_alldoctypes(doctypes=all_doctypes, user_msg=user_msg)
title = "Available WebSubmit Document Types"
return (title, body)
## ensure that this submission exists for this doctype:
numrows_submission = get_number_submissions_doctype_action(doctype=doctype, action=action)
if numrows_submission > 1:
## there are multiple submissions for this doctype/action ID:
## TODO : LOG ERROR
user_msg.append("""The Submission "%s" seems to exist multiple times for the Document Type "%s" - cannot configure at this time.""" \
% (action, doctype))
(title, body) = _create_configure_doctype_form(doctype=doctype, user_msg=user_msg)
return (title, body)
elif numrows_submission == 0:
## this submission does not seem to exist for this doctype:
user_msg.append("""The Submission "%s" doesn't exist for the "%s" Document Type - cannot configure at this time.""" \
% (action, doctype))
## TODO : LOG ERROR
(title, body) = _create_configure_doctype_form(doctype=doctype, user_msg=user_msg)
return (title, body)
## ensure that the page number for this submission is valid:
num_pages_submission = get_numbersubmissionpages_doctype_action(doctype=doctype, action=action)
try:
if not (int(pagenum) > 0 and int(pagenum) <= num_pages_submission):
user_msg.append("Invalid page number - out of range")
(title, body) = _create_configure_doctype_submission_pages_form(doctype=doctype, action=action, user_msg=user_msg)
return (title, body)
except ValueError:
## invalid page number
user_msg.append("Invalid page number - must be an integer value!")
(title, body) = _create_configure_doctype_submission_pages_form(doctype=doctype, action=action, user_msg=user_msg)
return (title, body)
## submission valid
if editfieldposn != "" and editfieldposncommit == "":
## display form for editing field
(title, body) = _configure_doctype_edit_field_on_submissionpage_display_field_details(doctype=doctype, action=action,
pagenum=pagenum, fieldposn=editfieldposn)
elif editfieldposn != "" and editfieldposncommit != "":
## commit changes to element
(title, body) = _configure_doctype_edit_field_on_submissionpage(doctype=doctype, action=action,
pagenum=pagenum, fieldposn=editfieldposn, fieldtext=fieldtext,
fieldlevel=fieldlevel, fieldshortdesc=fieldshortdesc, fieldcheck=fieldcheck)
elif movefieldfromposn != "" and movefieldtoposn != "":
## move a field
(title, body) = _configure_doctype_move_field_on_submissionpage(doctype=doctype,
action=action, pagenum=pagenum, movefieldfromposn=movefieldfromposn,
movefieldtoposn=movefieldtoposn)
elif addfield != "":
## request to add a new field to a page - display form
(title, body) = _configure_doctype_add_field_to_submissionpage_display_form(doctype=doctype, action=action, pagenum=pagenum)
elif addfieldcommit != "":
## commit a new field to the page
(title, body) = _configure_doctype_add_field_to_submissionpage(doctype=doctype, action=action,
pagenum=pagenum, fieldname=fieldname, fieldtext=fieldtext,
fieldlevel=fieldlevel, fieldshortdesc=fieldshortdesc, fieldcheck=fieldcheck)
elif deletefieldposn != "":
## user wishes to delete a field from the page:
(title, body) = _configure_doctype_delete_field_from_submissionpage(doctype=doctype,
action=action, pagenum=pagenum, fieldnum=deletefieldposn)
else:
## default visit to page - list its elements:
(title, body) = _create_configure_doctype_submission_page_elements_form(doctype=doctype, action=action,
pagenum=pagenum, movefieldfromposn=movefieldfromposn)
return (title, body)
def stringify_list_elements(elementslist):
o = []
for el in elementslist:
o.append(str(el))
return o
def _configure_doctype_edit_field_on_submissionpage(doctype, action, pagenum, fieldposn,
fieldtext, fieldlevel, fieldshortdesc, fieldcheck):
"""Perform an update to the details of a field on a submission page.
@param doctype: (string) the unique ID of a document type
@param action: (string) the unique ID of an action
@param pagenum: (integer) the number of the page on which the element to be updated is found
@param fieldposn: (integer) the numeric position of the field to be editied
@param fieldtext: (string) the text label displayed with a field
@param fieldlevel: (char) M or O (whether the field is mandatory or optional)
@param fieldshortdesc: (string) the short description of a field
@param fieldcheck: (string) the name of a JavaScript check to be applied to a field
@return: a tuple containing 2 strings - (page-title, page-body)
"""
user_msg = []
if fieldcheck not in ("", None):
## ensure check exists:
checkres = get_number_jschecks_with_chname(chname=fieldcheck)
if checkres < 1:
user_msg.append("The Check '%s' does not exist in WebSubmit - changes to field not saved" % (fieldcheck,))
(title, body) = _configure_doctype_edit_field_on_submissionpage_display_field_details(doctype=doctype, action=action,
pagenum=pagenum, fieldposn=fieldposn,
fieldtext=fieldtext, fieldlevel=fieldlevel,
fieldshortdesc=fieldshortdesc, user_msg=user_msg)
return (title, body)
try:
update_details_of_a_field_on_a_submissionpage(doctype=doctype, action=action, pagenum=pagenum, fieldposn=fieldposn,
fieldtext=fieldtext, fieldlevel=fieldlevel, fieldshortdesc=fieldshortdesc,
fieldcheck=fieldcheck)
user_msg.append("The details of the field at position %s have been updated successfully" % (fieldposn,))
update_modification_date_for_submission(doctype=doctype, action=action)
except InvenioWebSubmitAdminWarningTooManyRows, e:
## multiple rows found at page position - not safe to edit:
user_msg.append("Unable to update details of field at position %s on submission page %s - multiple fields found at this position" \
% (fieldposn, pagenum))
## TODO : LOG WARNING
except InvenioWebSubmitAdminWarningNoRowsFound, e:
## field not found - cannot edit
user_msg.append("Unable to update details of field at position %s on submission page %s - field doesn't seem to exist there!" \
% (fieldposn, pagenum))
## TODO : LOG WARNING
(title, body) = _create_configure_doctype_submission_page_elements_form(doctype=doctype, action=action, pagenum=pagenum, user_msg=user_msg)
return (title, body)
def _configure_doctype_edit_field_on_submissionpage_display_field_details(doctype, action, pagenum, fieldposn,
fieldtext=None, fieldlevel=None, fieldshortdesc=None,
fieldcheck=None, user_msg=""):
"""Display a form used to edit a field on a submission page.
@param doctype: (string) the unique ID of a document type
@param action: (string) the unique ID of an action
@param pagenum: (integer) the number of the page on which the element to be updated is found
@param fieldposn: (integer) the numeric position of the field to be editied
@param fieldtext: (string) the text label displayed with a field
@param fieldlevel: (char) M or O (whether the field is mandatory or optional)
@param fieldshortdesc: (string) the short description of a field
@param fieldcheck: (string) the name of a JavaScript check to be applied to a field
@param user_msg: (list of strings, or string) any warning/error message to be displayed to the user
@return: a tuple containing 2 strings (page-title, page-body)
"""
if type(user_msg) not in (list, tuple) or user_msg == "":
user_msg = []
## get a list of all check names:
checks_res = get_all_jscheck_names()
allchecks=[]
for check in checks_res:
allchecks.append((check,))
## get the details for the field to be edited:
fielddets = get_details_of_field_at_positionx_on_submissionpage(doctype=doctype, action=action, pagenum=pagenum, fieldposition=fieldposn)
if len(fielddets) < 1:
(title, body) = _create_configure_doctype_submission_page_elements_form(doctype=doctype, action=action, pagenum=pagenum)
return (title, body)
fieldname = str(fielddets[2])
if fieldtext is not None:
fieldtext = str(fieldtext)
else:
fieldtext = str(fielddets[3])
if fieldlevel is not None:
fieldlevel = str(fieldlevel)
else:
fieldlevel = str(fielddets[4])
if fieldshortdesc is not None:
fieldshortdesc = str(fieldshortdesc)
else:
fieldshortdesc = str(fielddets[5])
if fieldcheck is not None:
fieldcheck = str(fieldcheck)
else:
fieldcheck = str(fielddets[6])
cd = str(fielddets[7])
md = str(fielddets[8])
title = """Edit the %(fieldname)s field as it appears at position %(fieldnum)s on Page %(pagenum)s of the %(submission)s Submission""" \
% { 'fieldname' : fieldname, 'fieldnum' : fieldposn, 'pagenum' : pagenum, 'submission' : "%s%s" % (action, doctype) }
body = websubmitadmin_templates.tmpl_configuredoctype_edit_submissionfield(doctype=doctype,
action=action,
pagenum=pagenum,
fieldnum=fieldposn,
fieldname=fieldname,
fieldtext=fieldtext,
fieldlevel=fieldlevel,
fieldshortdesc=fieldshortdesc,
fieldcheck=fieldcheck,
cd=cd,
md=md,
allchecks=allchecks,
user_msg=user_msg)
return (title, body)
def _configure_doctype_add_field_to_submissionpage(doctype, action, pagenum, fieldname="",
fieldtext="", fieldlevel="", fieldshortdesc="", fieldcheck=""):
"""Add a field to a submission page.
@param doctype: (string) the unique ID of a document type
@param action: (string) the unique ID of an action
@param pagenum: (integer) the number of the page on which the element to be updated is found
@param fieldname: (string) the name of the field to be added to the page
@param fieldtext: (string) the text label displayed with a field
@param fieldlevel: (char) M or O (whether the field is mandatory or optional)
@param fieldshortdesc: (string) the short description of a field
@param fieldcheck: (string) the name of a JavaScript check to be applied to a field
@return: a tuple containing 2 strings - (page-title, page-body)
"""
user_msg = []
## ensure that a field "fieldname" actually exists:
if fieldname == "":
## the field to be added has no element description in the WebSubmit DB - cannot add
user_msg.append("""The field that you have chosen to add does not seem to exist in WebSubmit - cannot add""")
(title, body) = _configure_doctype_add_field_to_submissionpage_display_form(doctype, action, pagenum,
fieldtext=fieldtext,
fieldlevel=fieldlevel, fieldshortdesc=fieldshortdesc,
fieldcheck=fieldcheck, user_msg=user_msg)
return (title, body)
numelements_elname = get_number_elements_with_elname(elname=fieldname)
if numelements_elname < 1:
## the field to be added has no element description in the WebSubmit DB - cannot add
user_msg.append("""The field that you have chosen to add (%s) does not seem to exist in WebSubmit - cannot add""" % (fieldname,))
(title, body) = _configure_doctype_add_field_to_submissionpage_display_form(doctype, action, pagenum,
fieldtext=fieldtext,
fieldlevel=fieldlevel, fieldshortdesc=fieldshortdesc,
fieldcheck=fieldcheck, user_msg=user_msg)
return (title, body)
## if fieldcheck has been provided, ensure that it is a valid check name:
if fieldcheck not in ("", None):
## ensure check exists:
checkres = get_number_jschecks_with_chname(chname=fieldcheck)
if checkres < 1:
user_msg.append("The Check '%s' does not exist in WebSubmit - new field not saved to page" % (fieldcheck,))
(title, body) = _configure_doctype_add_field_to_submissionpage_display_form(doctype, action, pagenum,
fieldname=fieldname, fieldtext=fieldtext,
fieldlevel=fieldlevel, fieldshortdesc=fieldshortdesc,
user_msg=user_msg)
return (title, body)
## now add the new field to the page:
try:
insert_field_onto_submissionpage(doctype=doctype, action=action, pagenum=pagenum, fieldname=fieldname, fieldtext=fieldtext,
fieldlevel=fieldlevel, fieldshortdesc=fieldshortdesc, fieldcheck=fieldcheck)
user_msg.append("""Successfully added the field "%s" to the last position on page %s of submission %s""" \
% (fieldname, pagenum, "%s%s" % (action, doctype)))
update_modification_date_for_submission(doctype=doctype, action=action)
(title, body) = _create_configure_doctype_submission_page_elements_form(doctype=doctype, action=action, pagenum=pagenum, user_msg=user_msg)
except InvenioWebSubmitAdminWarningInsertFailed, e:
## the insert of the new field failed for some reason
## TODO : LOG ERROR
user_msg.append("""Couldn't add the field "%s" to page %s of submission %s - please try again""" \
% (fieldname, pagenum, "%s%s" % (action, doctype)))
(title, body) = _configure_doctype_add_field_to_submissionpage_display_form(doctype, action, pagenum,
fieldname=fieldname, fieldtext=fieldtext,
fieldlevel=fieldlevel, fieldshortdesc=fieldshortdesc,
fieldcheck=fieldcheck, user_msg=user_msg)
return (title, body)
def _configure_doctype_add_field_to_submissionpage_display_form(doctype, action, pagenum, fieldname="", fieldtext="",
fieldlevel="", fieldshortdesc="", fieldcheck="", user_msg=""):
title = """Add a Field to Page %(pagenum)s of the %(submission)s Submission""" \
% { 'pagenum' : pagenum, 'submission' : "%s%s" % (action, doctype) }
## sanity checking:
if type(user_msg) not in (list, tuple) or user_msg == "":
user_msg = []
## get a list of all check names:
checks_res = get_all_jscheck_names()
allchecks=[]
for check in checks_res:
allchecks.append((check,))
## get a list of all WebSubmit element names:
elements_res = get_all_element_names()
allelements = []
for el in elements_res:
allelements.append((el,))
## get form:
body = websubmitadmin_templates.tmpl_configuredoctype_add_submissionfield(doctype=doctype,
action=action,
pagenum=pagenum,
fieldname=fieldname,
fieldtext=fieldtext,
fieldlevel=fieldlevel,
fieldshortdesc=fieldshortdesc,
fieldcheck=fieldcheck,
allchecks=allchecks,
allelements=allelements,
user_msg=user_msg)
return (title, body)
def _configure_doctype_move_field_on_submissionpage(doctype, action, pagenum, movefieldfromposn, movefieldtoposn):
user_msg = []
_ = gettext_set_language(CFG_SITE_LANG)
movefield_res = move_field_on_submissionpage_from_positionx_to_positiony(doctype=doctype, action=action, pagenum=pagenum,
movefieldfrom=movefieldfromposn, movefieldto=movefieldtoposn)
if movefield_res == 1:
## invalid field numbers
try:
raise InvenioWebSubmitWarning(_('Unable to move field at position %s to position %s on page %s of submission \'%s%s\' - Invalid Field Position Numbers') % (movefieldfromposn, movefieldtoposn, pagenum, action, doctype))
except InvenioWebSubmitWarning, exc:
register_exception(stream='warning')
#warnings.append(exc.message)
#warnings.append(('WRN_WEBSUBMITADMIN_INVALID_FIELD_NUMBERS_SUPPLIED_WHEN_TRYING_TO_MOVE_FIELD_ON_SUBMISSION_PAGE', \
#movefieldfromposn, movefieldtoposn, pagenum, "%s%s" % (action, doctype)))
user_msg.append("""Unable to move field from position %s to position %s on page %s of submission %s%s - field position numbers invalid""" \
% (movefieldfromposn, movefieldtoposn, pagenum, action, doctype))
elif movefield_res == 2:
## failed to swap 2 fields - couldn't move field1 to temp position
try:
raise InvenioWebSubmitWarning(_('Unable to swap field at position %s with field at position %s on page %s of submission %s - could not move field at position %s to temporary field location') % (movefieldfromposn, movefieldtoposn, pagenum, action, doctype))
except InvenioWebSubmitWarning, exc:
register_exception(stream='warning')
#warnings.append(exc.message)
#warnings.append(('WRN_WEBSUBMITADMIN_UNABLE_TO_SWAP_TWO_FIELDS_ON_SUBMISSION_PAGE_COULDNT_MOVE_FIELD1_TO_TEMP_POSITION', \
#movefieldfromposn, movefieldtoposn, pagenum, "%s%s" % (action, doctype)))
user_msg.append("""Unable to move field from position %s to position %s on page %s of submission %s%s""" \
% (movefieldfromposn, movefieldtoposn, pagenum, action, doctype))
elif movefield_res == 3:
## failed to swap 2 fields on submission page - couldn't move field2 to field1 position
try:
raise InvenioWebSubmitWarning(_('Unable to swap field at position %s with field at position %s on page %s of submission %s - could not move field at position %s to position %s. Please ask Admin to check that a field was not stranded in a temporary position') % (movefieldfromposn, movefieldtoposn, pagenum, action, doctype))
except InvenioWebSubmitWarning, exc:
register_exception(stream='warning')
#warnings.append(exc.message)
#warnings.append(('WRN_WEBSUBMITADMIN_UNABLE_TO_SWAP_TWO_FIELDS_ON_SUBMISSION_PAGE_COULDNT_MOVE_FIELD2_TO_FIELD1_POSITION', \
#movefieldfromposn, movefieldtoposn, pagenum, "%s%s" % (action, doctype), movefieldtoposn, movefieldfromposn))
user_msg.append("""Unable to move field from position %s to position %s on page %s of submission %s%s - See Admin if field order is broken""" \
% (movefieldfromposn, movefieldtoposn, pagenum, action, doctype))
elif movefield_res == 4:
## failed to swap 2 fields in submission page - couldnt swap field at temp position to field2 position
try:
raise InvenioWebSubmitWarning(_('Unable to swap field at position %s with field at position %s on page %s of submission %s - could not move field that was located at position %s to position %s from temporary position. Field is now stranded in temporary position and must be corrected manually by an Admin') % (movefieldfromposn, movefieldtoposn, pagenum, action, doctype, movefieldfromposn, movefieldtoposn))
except InvenioWebSubmitWarning, exc:
register_exception(stream='warning')
#warnings.append(exc.message)
#warnings.append(('WRN_WEBSUBMITADMIN_UNABLE_TO_SWAP_TWO_FIELDS_ON_SUBMISSION_PAGE_COULDNT_MOVE_FIELD1_TO_POSITION_FIELD2_FROM_TEMPORARY_POSITION', \
#movefieldfromposn, movefieldtoposn, pagenum, "%s%s" % (action, doctype), movefieldfromposn, movefieldtoposn))
user_msg.append("""Unable to move field from position %s to position %s on page %s of submission %s%s - Field-order is now broken and must be corrected by Admin""" \
% (movefieldfromposn, movefieldtoposn, pagenum, action, doctype))
elif movefield_res == 5:
## failed to decrement the position of all fields below the field that was moved to a temp position
try:
raise InvenioWebSubmitWarning(_('Unable to move field at position %s to position %s on page %s of submission %s - could not decrement the position of the fields below position %s. Tried to recover - please check that field ordering is not broken') % (movefieldfromposn, movefieldtoposn, pagenum, action, doctype, movefieldfromposn))
except InvenioWebSubmitWarning, exc:
register_exception(stream='warning')
#warnings.append(exc.message)
#warnings.append(('WRN_WEBSUBMITADMIN_UNABLE_TO_MOVE_FIELD_TO_NEW_POSITION_ON_SUBMISSION_PAGE_COULDNT_DECREMENT_POSITION_OF_FIELDS_BELOW_FIELD1', \
#movefieldfromposn, movefieldtoposn, pagenum, "%s%s" % (action, doctype), movefieldfromposn))
user_msg.append("""Unable to move field from position %s to position %s on page %s of submission %s%s - See Admin if field-order is broken""" \
% (movefieldfromposn, movefieldtoposn, pagenum, action, doctype))
elif movefield_res == 6:
## failed to increment position of fields in and below position into which 'movefromfieldposn' is to be inserted
try:
raise InvenioWebSubmitWarning(_('Unable to move field at position %s to position %s on page %s of submission %s%s - could not increment the position of the fields at and below position %s. The field that was at position %s is now stranded in a temporary position.') % (movefieldfromposn, movefieldtoposn, pagenum, action, doctype, movefieldtoposn, movefieldfromposn))
except InvenioWebSubmitWarning, exc:
register_exception(stream='warning')
#warnings.append(exc.message)
#warnings.append(('WRN_WEBSUBMITADMIN_UNABLE_TO_MOVE_FIELD_TO_NEW_POSITION_ON_SUBMISSION_PAGE_COULDNT_INCREMENT_POSITION_OF_FIELDS_AT_AND_BELOW_FIELD2', \
#movefieldfromposn, movefieldtoposn, pagenum, "%s%s" % (action, doctype), movefieldtoposn, movefieldfromposn))
user_msg.append("""Unable to move field from position %s to position %s on page %s of submission %s%s - Field-order is now broken and must be corrected by Admin""" \
% (movefieldfromposn, movefieldtoposn, pagenum, action, doctype))
else:
## successful update:
try:
raise InvenioWebSubmitWarning(_('Moved field from position %s to position %s on page %s of submission \'%s%s\'.') % (movefieldfromposn, movefieldtoposn, pagenum, action, doctype))
except InvenioWebSubmitWarning, exc:
register_exception(stream='warning')
#warnings.append(exc.message)
#warnings.append(('WRN_WEBSUBMITADMIN_MOVED_FIELD_ON_SUBMISSION_PAGE', movefieldfromposn, movefieldtoposn, pagenum, "%s%s" % (action, doctype)))
user_msg.append("""Successfully moved field from position %s to position %s on page %s of submission %s%s""" \
% (movefieldfromposn, movefieldtoposn, pagenum, action, doctype))
(title, body) = _create_configure_doctype_submission_page_elements_form(doctype=doctype, action=action, pagenum=pagenum, user_msg=user_msg)
return (title, body)
def _configure_doctype_delete_field_from_submissionpage(doctype, action, pagenum, fieldnum):
"""Delete a field from a submission page"""
_ = gettext_set_language(CFG_SITE_LANG)
user_msg = []
del_res = delete_a_field_from_submissionpage_then_reorder_fields_below_to_fill_vacant_position(doctype=doctype,
action=action,
pagenum=pagenum,
fieldposn=fieldnum)
if del_res == 1:
try:
raise InvenioWebSubmitWarning(_('Unable to delete field at position %s from page %s of submission \'%s\'') % (fieldnum, pagenum, action, doctype))
except InvenioWebSubmitWarning, exc:
register_exception(stream='warning')
#warnings.append(exc.message)
#warnings.append(('WRN_WEBSUBMITADMIN_UNABLE_TO_DELETE_FIELD_FROM_SUBMISSION_PAGE', fieldnum, pagenum, "%s%s" % (action, doctype)))
user_msg.append("Unable to delete field at position %s from page number %s of submission %s%s" % (fieldnum, pagenum, action, doctype))
else:
## deletion was OK
user_msg.append("Field deleted")
try:
raise InvenioWebSubmitWarning(_('Unable to delete field at position %s from page %s of submission \'%s%s\'') % (fieldnum, pagenum, action, doctype))
except InvenioWebSubmitWarning, exc:
register_exception(stream='warning')
#warnings.append(exc.message)
#warnings.append(('WRN_WEBSUBMITADMIN_DELETED_FIELD_FROM_SUBMISSION_PAGE', fieldnum, pagenum, "%s%s" % (action, doctype)))
(title, body) = _create_configure_doctype_submission_page_elements_form(doctype=doctype, action=action, pagenum=pagenum, user_msg=user_msg)
return (title, body)
def _create_configure_doctype_submission_page_elements_form(doctype, action, pagenum, movefieldfromposn="", user_msg=""):
## get list of elements for page:
title = """Submission Elements found on Page %s of the "%s" Submission of the "%s" Document Type:"""\
% (pagenum, action, doctype)
body = ""
raw_page_elements = get_details_allsubmissionfields_on_submission_page(doctype=doctype, action=action, pagenum=pagenum)
## correctly stringify page elements for the template:
page_elements = []
for element in raw_page_elements:
page_elements.append(stringify_list_elements(element))
body = websubmitadmin_templates.tmpl_configuredoctype_list_submissionelements(doctype=doctype,
action=action,
pagenum=pagenum,
page_elements=page_elements,
movefieldfromposn=movefieldfromposn,
user_msg=user_msg)
return (title, body)
def perform_request_configure_doctype_submissionpages(doctype,
action,
pagenum="",
movepage="",
movepagedirection="",
deletepage="",
deletepageconfirm="",
addpage=""):
"""Process requests relating to the submission pages of a doctype/submission"""
body = ""
user_msg = []
try:
pagenum = int(pagenum)
except ValueError:
pagenum = ""
## ensure that there is only one doctype for this doctype ID - simply display all doctypes with warning if not
if doctype in ("", None):
user_msg.append("""Unknown Document Type""")
## TODO : LOG ERROR
all_doctypes = get_docid_docname_alldoctypes()
body = websubmitadmin_templates.tmpl_display_alldoctypes(doctypes=all_doctypes, user_msg=user_msg)
title = "Available WebSubmit Document Types"
return (title, body)
numrows_doctype = get_number_doctypes_docid(docid=doctype)
if numrows_doctype > 1:
## there are multiple doctypes with this doctype ID:
## TODO : LOG ERROR
user_msg.append("""Multiple document types identified by "%s" exist - cannot configure at this time.""" \
% (doctype,))
all_doctypes = get_docid_docname_alldoctypes()
body = websubmitadmin_templates.tmpl_display_alldoctypes(doctypes=all_doctypes, user_msg=user_msg)
title = "Available WebSubmit Document Types"
return (title, body)
elif numrows_doctype == 0:
## this doctype does not seem to exist:
user_msg.append("""The document type identified by "%s" doesn't exist - cannot configure at this time.""" \
% (doctype,))
## TODO : LOG ERROR
all_doctypes = get_docid_docname_alldoctypes()
body = websubmitadmin_templates.tmpl_display_alldoctypes(doctypes=all_doctypes, user_msg=user_msg)
title = "Available WebSubmit Document Types"
return (title, body)
## ensure that this submission exists for this doctype:
numrows_submission = get_number_submissions_doctype_action(doctype=doctype, action=action)
if numrows_submission > 1:
## there are multiple submissions for this doctype/action ID:
## TODO : LOG ERROR
user_msg.append("""The Submission "%s" seems to exist multiple times for the Document Type "%s" - cannot configure at this time.""" \
% (action, doctype))
(title, body) = _create_configure_doctype_form(doctype=doctype, user_msg=user_msg)
return (title, body)
elif numrows_submission == 0:
## this submission does not seem to exist for this doctype:
user_msg.append("""The Submission "%s" doesn't exist for the "%s" Document Type - cannot configure at this time.""" \
% (action, doctype))
## TODO : LOG ERROR
(title, body) = _create_configure_doctype_form(doctype=doctype, user_msg=user_msg)
return (title, body)
## submission valid
if addpage != "":
## add a new page to a submission:
error_code = add_submission_page_doctype_action(doctype=doctype, action=action)
if error_code == 0:
## success
user_msg.append("""A new Submission Page has been added into the last position""")
else:
## could not move it
user_msg.append("""Unable to add a new Submission Page""")
(title, body) = _create_configure_doctype_submission_pages_form(doctype=doctype,
action=action,
user_msg=user_msg)
elif movepage != "":
## user wants to move a page upwards in the order
(title, body) = _configure_doctype_move_submission_page(doctype=doctype,
action=action, pagenum=pagenum, direction=movepagedirection)
elif deletepage != "":
## user wants to delete a page:
if deletepageconfirm != "":
## confirmation of deletion has been provided - proceed
(title, body) = _configure_doctype_delete_submission_page(doctype=doctype,
action=action, pagenum=pagenum)
else:
## user has not yet confirmed the deletion of a page - prompt for confirmation
(title, body) = _create_configure_doctype_submission_pages_form(doctype=doctype,
action=action,
deletepagenum=pagenum)
else:
## default - display details of submission pages for this submission:
(title, body) = _create_configure_doctype_submission_pages_form(doctype=doctype, action=action)
return (title, body)
def _configure_doctype_move_submission_page(doctype, action, pagenum, direction):
user_msg = []
## Sanity checking:
if direction.lower() not in ("up", "down"):
## invalid direction:
user_msg.append("""Invalid Page destination - no action was taken""")
(title, body) = _create_configure_doctype_submission_pages_form(doctype=doctype,
action=action,
user_msg=user_msg)
return (title, body)
## swap the pages:
if direction.lower() == "up":
error_code = swap_elements_adjacent_pages_doctype_action(doctype=doctype, action=action,
page1=pagenum, page2=pagenum-1)
else:
error_code = swap_elements_adjacent_pages_doctype_action(doctype=doctype, action=action,
page1=pagenum, page2=pagenum+1)
if error_code == 0:
## pages swapped successfully:
## TODO : LOG PAGE SWAP
user_msg.append("""Page %s was successfully moved %swards""" % (pagenum, direction.capitalize()))
elif error_code == 1:
## pages are not adjacent:
user_msg.append("""Unable to move page - only adjacent pages can be swapped around""")
elif error_code == 2:
## at least one page out of legal range (e.g. trying to move a page to a position higher or lower
## than the number of pages:
user_msg.append("""Unable to move page to illegal position""")
elif error_code in (3, 4):
## Some sort of problem moving fields around!
## TODO : LOG ERROR
user_msg.append("""Error: there was a problem swapping the submission elements to their new pages.""")
user_msg.append("""An attempt was made to return the elements to their original pages - you """\
"""should verify that this was successful, or ask your administrator"""\
""" to fix the problem manually""")
elif error_code == 5:
## the elements from the first page were left stranded in the temporary page!
## TODO : LOG ERROR
user_msg.append("""Error: there was a problem swapping the submission elements to their new pages.""")
user_msg.append("""Some elements were left stranded on a temporary page. Please ask your administrator to"""\
""" fix this problem manually""")
(title, body) = _create_configure_doctype_submission_pages_form(doctype=doctype, action=action, user_msg=user_msg)
return (title, body)
def _configure_doctype_delete_submission_page(doctype, action, pagenum):
user_msg = []
num_pages = get_numbersubmissionpages_doctype_action(doctype=doctype, action=action)
if num_pages > 0:
## proceed with deletion
error_code = delete_allfields_submissionpage_doctype_action(doctype=doctype, action=action, pagenum=pagenum)
if error_code == 0:
## everything OK
## move elements from pages above the deleted page down by one page:
decrement_by_one_pagenumber_submissionelements_abovepage(doctype=doctype, action=action, frompage=pagenum)
## now decrement the number of pages associated with the submission:
error_code = decrement_by_one_number_submissionpages_doctype_action(doctype=doctype, action=action)
if error_code == 0:
## successfully deleted submission page
## TODO : LOG DELETION
user_msg.append("""Page number %s of Submission %s was successfully deleted."""\
% (pagenum, action))
(title, body) = _create_configure_doctype_submission_pages_form(doctype=doctype,
action=action,
user_msg=user_msg)
else:
## error - either submission didn't exist, or multiple instances found
## TODO : LOG ERROR
user_msg.append("""The Submission elements were deleted from Page %s of the Submission "%s"."""\
""" However, it was not possible to delete the page itself."""\
% (pagenum, action))
(title, body) = _create_configure_doctype_submission_pages_form(doctype=doctype,
action=action,
user_msg=user_msg)
else:
## unable to delete some or all fields from the page
## TODO : LOG ERROR
user_msg.append("""Error: Unable to delete some field elements from Page %s of Submission %s%s - """\
"""Page not deleted!""" % (pagenum, action, doctype))
(title, body) = _create_configure_doctype_submission_pages_form(doctype=doctype,
action=action,
user_msg=user_msg)
elif num_pages == 0:
## no pages to delete for this submission
user_msg.append("""This Submission has no Pages - Cannot delete a Page!""")
(title, body) = _create_configure_doctype_submission_pages_form(doctype=doctype,
action=action,
user_msg=user_msg)
else:
## error - couldn't determine the number of pages for submission
## TODO : LOG ERROR
user_msg.append("""Unable to determine number of Submission Pages for Submission "%s" - """\
"""Cannot delete page %s"""\
% (action, pagenum))
(title, body) = _create_configure_doctype_form(doctype=doctype, user_msg=user_msg)
return (title, body)
def _create_configure_doctype_submission_pages_form(doctype,
action,
deletepagenum="",
user_msg=""):
"""Perform the necessary steps in order to display a list of the pages belonging to a given
submission of a given document type.
@param doctype: (string) the unique ID of the document type.
@param action: (string) the unique name/ID of the action.
@param user_msg: (string, or list) any message(s) to be displayed to the user.
@return: a tuple containing 2 strings - the page title and the page body.
"""
title = """Details of the Pages of the "%s" Submission of the "%s" Document Type:""" % (action, doctype)
submission_dets = get_cd_md_numbersubmissionpages_doctype_action(doctype=doctype, action=action)
if len(submission_dets) > 0:
cd = str(submission_dets[0][0])
md = str(submission_dets[0][1])
num_pages = submission_dets[0][2]
else:
(cd, md, num_pages) = ("", "", "0")
body = websubmitadmin_templates.tmpl_configuredoctype_list_submissionpages(doctype=doctype,
action=action,
number_pages=num_pages,
cd=cd,
md=md,
deletepagenum=deletepagenum,
user_msg=user_msg)
return (title, body)
diff --git a/invenio/legacy/websubmit/engine.py b/invenio/legacy/websubmit/engine.py
index 9a5c73250..b7d85946e 100644
--- a/invenio/legacy/websubmit/engine.py
+++ b/invenio/legacy/websubmit/engine.py
@@ -1,1858 +1,1858 @@
## This file is part of Invenio.
## Copyright (C) 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011, 2012, 2013 CERN.
##
## Invenio is free software; you can redistribute it and/or
## modify it under the terms of the GNU General Public License as
## published by the Free Software Foundation; either version 2 of the
## License, or (at your option) any later version.
##
## Invenio is distributed in the hope that it will be useful, but
## WITHOUT ANY WARRANTY; without even the implied warranty of
## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
## General Public License for more details.
##
## You should have received a copy of the GNU General Public License
## along with Invenio; if not, write to the Free Software Foundation, Inc.,
## 59 Temple Place, Suite 330, Boston, MA 02111-1307, USA.
"""WebSubmit: the mechanism for the submission of new records into Invenio
via a Web interface.
"""
__revision__ = "$Id$"
## import interesting modules:
import string
import os
import sys
import time
import types
import re
import pprint
from urllib import quote_plus
from cgi import escape
from invenio.config import \
CFG_SITE_LANG, \
CFG_SITE_NAME, \
CFG_SITE_URL, \
CFG_WEBSUBMIT_STORAGEDIR, \
CFG_DEVEL_SITE, \
CFG_SITE_SECURE_URL, \
CFG_WEBSUBMIT_USE_MATHJAX
from invenio.dbquery import Error
from invenio.access_control_engine import acc_authorize_action
from invenio.webpage import page, error_page, warning_page
from invenio.webuser import getUid, get_email, collect_user_info, isGuestUser, \
page_not_authorized
from invenio.legacy.websubmit.config import CFG_RESERVED_SUBMISSION_FILENAMES, \
InvenioWebSubmitFunctionError, InvenioWebSubmitFunctionStop, \
InvenioWebSubmitFunctionWarning
from invenio.base.i18n import gettext_set_language, wash_language
from invenio.webstat import register_customevent
-from invenio.errorlib import register_exception
+from invenio.ext.logging import register_exception
from invenio.utils.url import make_canonical_urlargd, redirect_to_url
from invenio.websubmitadmin_engine import string_is_alphanumeric_including_underscore
from invenio.htmlutils import get_mathjax_header
from invenio.websubmit_dblayer import \
get_storage_directory_of_action, \
get_longname_of_doctype, \
get_longname_of_action, \
get_num_pages_of_submission, \
get_parameter_value_for_doctype, \
submission_exists_in_log, \
log_new_pending_submission, \
log_new_completed_submission, \
update_submission_modified_date_in_log, \
update_submission_reference_in_log, \
update_submission_reference_and_status_in_log, \
get_form_fields_on_submission_page, \
get_element_description, \
get_element_check_description, \
get_form_fields_not_on_submission_page, \
function_step_is_last, \
get_collection_children_of_submission_collection, \
get_submission_collection_name, \
get_doctype_children_of_submission_collection, \
get_categories_of_doctype, \
get_doctype_details, \
get_actions_on_submission_page_for_doctype, \
get_action_details, \
get_parameters_of_function, \
get_details_of_submission, \
get_functions_for_submission_step, \
get_submissions_at_level_X_with_score_above_N, \
submission_is_finished
import invenio.template
websubmit_templates = invenio.template.load('websubmit')
def interface(req,
c=CFG_SITE_NAME,
ln=CFG_SITE_LANG,
doctype="",
act="",
startPg=1,
access="",
mainmenu="",
fromdir="",
nextPg="",
nbPg="",
curpage=1):
"""This function is called after a user has visited a document type's
"homepage" and selected the type of "action" to perform. Having
clicked an action-button (e.g. "Submit a New Record"), this function
will be called . It performs the task of initialising a new submission
session (retrieving information about the submission, creating a
working submission-directory, etc), and "drawing" a submission page
containing the WebSubmit form that the user uses to input the metadata
to be submitted.
When a user moves between pages in the submission interface, this
function is recalled so that it can save the metadata entered into the
previous page by the user, and draw the current submission-page.
Note: During a submission, for each page refresh, this function will be
called while the variable "step" (a form variable, seen by
websubmit_webinterface, which calls this function) is 0 (ZERO).
In other words, this function handles the FRONT-END phase of a
submission, BEFORE the WebSubmit functions are called.
@param req: (apache request object) *** NOTE: Added into this object, is
a variable called "form" (req.form). This is added into the object in
the index function of websubmit_webinterface. It contains a
"mod_python.util.FieldStorage" instance, that contains the form-fields
found on the previous submission page.
@param c: (string), defaulted to CFG_SITE_NAME. The name of the Invenio
installation.
@param ln: (string), defaulted to CFG_SITE_LANG. The language in which to
display the pages.
@param doctype: (string) - the doctype ID of the doctype for which the
submission is being made.
@param act: (string) - The ID of the action being performed (e.g.
submission of bibliographic information; modification of bibliographic
information, etc).
@param startPg: (integer) - Starting page for the submission? Defaults
to 1.
@param indir: (string) - the directory used to store all submissions
of the given "type" of this submission. For example, if the submission
is of the type "modify bibliographic information", this variable would
contain "modify".
@param access: (string) - the "access" number for the submission
(e.g. 1174062451_7010). This number is also used as the name for the
current working submission directory.
@param mainmenu: (string) - contains the URL (minus the Invenio
home stem) for the submission's home-page. (E.g. If this submission
is "PICT", the "mainmenu" file would contain "/submit?doctype=PICT".
@param fromdir: (integer)
@param nextPg: (string)
@param nbPg: (string)
@param curpage: (integer) - the current submission page number. Defaults
to 1.
"""
ln = wash_language(ln)
# load the right message language
_ = gettext_set_language(ln)
sys.stdout = req
# get user ID:
user_info = collect_user_info(req)
uid = user_info['uid']
uid_email = user_info['email']
# variable initialisation
t = ""
field = []
fieldhtml = []
level = []
fullDesc = []
text = ''
check = []
select = []
radio = []
upload = []
txt = []
noPage = []
# Preliminary tasks
if not access:
# In some cases we want to take the users directly to the submit-form.
# This fix makes this possible - as it generates the required access
# parameter if it is not present.
pid = os.getpid()
now = time.time()
access = "%i_%s" % (now, pid)
# check we have minimum fields
if not doctype or not act or not access:
## We don't have all the necessary information to go ahead
## with this submission:
return warning_page(_("Not enough information to go ahead with the submission."), req, ln)
try:
assert(not access or re.match('\d+_\d+', access))
except AssertionError:
register_exception(req=req, prefix='doctype="%s", access="%s"' % (doctype, access))
return warning_page(_("Invalid parameters"), req, ln)
if doctype and act:
## Let's clean the input
details = get_details_of_submission(doctype, act)
if not details:
return warning_page(_("Invalid doctype and act parameters"), req, ln)
doctype = details[0]
act = details[1]
## Before continuing to display the submission form interface,
## verify that this submission has not already been completed:
if submission_is_finished(doctype, act, access, uid_email):
## This submission has already been completed.
## This situation can arise when, having completed a submission,
## the user uses the browser's back-button to go back to the form
## stage of the submission and then tries to submit once more.
## This is unsafe and should not be allowed. Instead of re-displaying
## the submission forms, display an error message to the user:
wrnmsg = """<b>This submission has been completed. Please go to the""" \
""" <a href="/submit?doctype=%(doctype)s&amp;ln=%(ln)s">""" \
"""main menu</a> to start a new submission.</b>""" \
% { 'doctype' : quote_plus(doctype), 'ln' : ln }
return warning_page(wrnmsg, req, ln)
## retrieve the action and doctype data:
## Concatenate action ID and doctype ID to make the submission ID:
subname = "%s%s" % (act, doctype)
## Get the submission storage directory from the DB:
submission_dir = get_storage_directory_of_action(act)
if submission_dir:
indir = submission_dir
else:
## Unable to determine the submission-directory:
return warning_page(_("Unable to find the submission directory for the action: %s") % escape(str(act)), req, ln)
## get the document type's long-name:
doctype_lname = get_longname_of_doctype(doctype)
if doctype_lname is not None:
## Got the doctype long-name: replace spaces with HTML chars:
docname = doctype_lname.replace(" ", "&nbsp;")
else:
## Unknown document type:
return warning_page(_("Unknown document type"), req, ln)
## get the action's long-name:
actname = get_longname_of_action(act)
if actname is None:
## Unknown action:
return warning_page(_("Unknown action"), req, ln)
## Get the number of pages for this submission:
num_submission_pages = get_num_pages_of_submission(subname)
if num_submission_pages is not None:
nbpages = num_submission_pages
else:
## Unable to determine the number of pages for this submission:
return warning_page(_("Unable to determine the number of submission pages."), req, ln)
## If unknown, get the current page of submission:
if startPg != "" and curpage in ("", 0):
curpage = startPg
## retrieve the name of the file in which the reference of
## the submitted document will be stored
rn_filename = get_parameter_value_for_doctype(doctype, "edsrn")
if rn_filename is not None:
edsrn = rn_filename
else:
## Unknown value for edsrn - set it to an empty string:
edsrn = ""
## This defines the path to the directory containing the action data
curdir = os.path.join(CFG_WEBSUBMIT_STORAGEDIR, indir, doctype, access)
try:
assert(curdir == os.path.abspath(curdir))
except AssertionError:
register_exception(req=req, prefix='indir="%s", doctype="%s", access="%s"' % (indir, doctype, access))
return warning_page(_("Invalid parameters"), req, ln)
## if this submission comes from another one (fromdir is then set)
## We retrieve the previous submission directory and put it in the proper one
if fromdir != "":
olddir = os.path.join(CFG_WEBSUBMIT_STORAGEDIR, fromdir, doctype, access)
try:
assert(olddir == os.path.abspath(olddir))
except AssertionError:
register_exception(req=req, prefix='fromdir="%s", doctype="%s", access="%s"' % (fromdir, doctype, access))
return warning_page(_("Invalid parameters"), req, ln)
if os.path.exists(olddir):
os.rename(olddir, curdir)
## If the submission directory still does not exist, we create it
if not os.path.exists(curdir):
try:
os.makedirs(curdir)
except Exception, e:
register_exception(req=req, alert_admin=True)
return warning_page(_("Unable to create a directory for this submission. The administrator has been alerted."), req, ln)
## Retrieve the previous page, as submitted to curdir (before we
## overwrite it with our curpage as declared from the incoming
## form)
try:
fp = open(os.path.join(curdir, "curpage"))
previous_page_from_disk = fp.read()
fp.close()
except:
previous_page_from_disk = "1"
# retrieve the original main menu url and save it in the "mainmenu" file
if mainmenu != "":
fp = open(os.path.join(curdir, "mainmenu"), "w")
fp.write(mainmenu)
fp.close()
# and if the file containing the URL to the main menu exists
# we retrieve it and store it in the $mainmenu variable
if os.path.exists(os.path.join(curdir, "mainmenu")):
fp = open(os.path.join(curdir, "mainmenu"), "r");
mainmenu = fp.read()
fp.close()
else:
mainmenu = "%s/submit" % (CFG_SITE_URL,)
# various authentication related tasks...
if uid_email != "guest" and uid_email != "":
#First save the username (email address) in the SuE file. This way bibconvert will be able to use it if needed
fp = open(os.path.join(curdir, "SuE"), "w")
fp.write(uid_email)
fp.close()
if os.path.exists(os.path.join(curdir, "combo%s" % doctype)):
fp = open(os.path.join(curdir, "combo%s" % doctype), "r");
categ = fp.read()
fp.close()
else:
categ = req.form.get('combo%s' % doctype, '*')
# is user authorized to perform this action?
(auth_code, auth_message) = acc_authorize_action(req, 'submit', \
authorized_if_no_roles=not isGuestUser(uid), \
verbose=0, \
doctype=doctype, \
act=act, \
categ=categ)
if not auth_code == 0:
return warning_page("""<center><font color="red">%s</font></center>""" % auth_message, req, ln)
## update the "journal of submission":
## Does the submission already exist in the log?
submission_exists = \
submission_exists_in_log(doctype, act, access, uid_email)
if submission_exists == 1:
## update the modification-date of this submission in the log:
update_submission_modified_date_in_log(doctype, act, access, uid_email)
else:
## Submission doesn't exist in log - create it:
log_new_pending_submission(doctype, act, access, uid_email)
## Let's write in curdir file under curdir the curdir value
## in case e.g. it is needed in FFT.
fp = open(os.path.join(curdir, "curdir"), "w")
fp.write(curdir)
fp.close()
## Let's write in ln file the current language
fp = open(os.path.join(curdir, "ln"), "w")
fp.write(ln)
fp.close()
# Save the form fields entered in the previous submission page
# If the form was sent with the GET method
form = dict(req.form)
value = ""
# we parse all the form variables
for key, formfields in form.items():
filename = key.replace("[]", "")
file_to_open = os.path.join(curdir, filename)
try:
assert(file_to_open == os.path.abspath(file_to_open))
except AssertionError:
register_exception(req=req, prefix='curdir="%s", filename="%s"' % (curdir, filename))
return warning_page(_("Invalid parameters"), req, ln)
# Do not write reserved filenames to disk
if filename in CFG_RESERVED_SUBMISSION_FILENAMES:
# Unless there is really an element with that name on this
# page or previous one (either visited, or declared to be
# visited), which means that admin authorized it.
if not ((str(curpage).isdigit() and \
filename in [submission_field[3] for submission_field in \
get_form_fields_on_submission_page(subname, curpage)]) or \
(str(curpage).isdigit() and int(curpage) > 1 and \
filename in [submission_field[3] for submission_field in \
get_form_fields_on_submission_page(subname, int(curpage) - 1)]) or \
(previous_page_from_disk.isdigit() and \
filename in [submission_field[3] for submission_field in \
get_form_fields_on_submission_page(subname, int(previous_page_from_disk))])):
# Still this will filter out reserved field names that
# might have been called by functions such as
# Create_Modify_Interface function in MBI step, or
# dynamic fields in response elements, but that is
# unlikely to be a problem.
continue
# Skip variables containing characters that are not allowed in
# WebSubmit elements
if not string_is_alphanumeric_including_underscore(filename):
continue
# the field is an array
if isinstance(formfields, types.ListType):
fp = open(file_to_open, "w")
for formfield in formfields:
#stripslashes(value)
value = specialchars(formfield)
fp.write(value+"\n")
fp.close()
# the field is a normal string
elif isinstance(formfields, types.StringTypes) and formfields != "":
value = formfields
fp = open(file_to_open, "w")
fp.write(specialchars(value))
fp.close()
# the field is a file
elif hasattr(formfields,"filename") and formfields.filename:
dir_to_open = os.path.join(curdir, 'files', key)
try:
assert(dir_to_open == os.path.abspath(dir_to_open))
assert(dir_to_open.startswith(CFG_WEBSUBMIT_STORAGEDIR))
except AssertionError:
register_exception(req=req, prefix='curdir="%s", key="%s"' % (curdir, key))
return warning_page(_("Invalid parameters"), req, ln)
if not os.path.exists(dir_to_open):
try:
os.makedirs(dir_to_open)
except:
register_exception(req=req, alert_admin=True)
return warning_page(_("Cannot create submission directory. The administrator has been alerted."), req, ln)
filename = formfields.filename
## Before saving the file to disc, wash the filename (in particular
## washing away UNIX and Windows (e.g. DFS) paths):
filename = os.path.basename(filename.split('\\')[-1])
filename = filename.strip()
if filename != "":
fp = open(os.path.join(dir_to_open, filename), "w")
while True:
buf = formfields.read(10240)
if buf:
fp.write(buf)
else:
break
fp.close()
fp = open(os.path.join(curdir, "lastuploadedfile"), "w")
fp.write(filename)
fp.close()
fp = open(file_to_open, "w")
fp.write(filename)
fp.close()
else:
return warning_page(_("No file uploaded?"), req, ln)
## if the found field is the reference of the document,
## save this value in the "journal of submissions":
if uid_email != "" and uid_email != "guest":
if key == edsrn:
update_submission_reference_in_log(doctype, access, uid_email, value)
## create the interface:
subname = "%s%s" % (act, doctype)
## Get all of the form fields that appear on this page, ordered by fieldnum:
form_fields = get_form_fields_on_submission_page(subname, curpage)
full_fields = []
values = []
the_globals = {
'doctype' : doctype,
'action' : action,
'access' : access,
'ln' : ln,
'curdir' : curdir,
'uid' : uid,
'uid_email' : uid_email,
'form' : form,
'act' : act,
'action' : act, ## for backward compatibility
'req' : req,
'user_info' : user_info,
'InvenioWebSubmitFunctionError' : InvenioWebSubmitFunctionError,
'__websubmit_in_jail__' : True,
'__builtins__' : globals()['__builtins__']
}
for field_instance in form_fields:
full_field = {}
## Retrieve the field's description:
element_descr = get_element_description(field_instance[3])
try:
assert(element_descr is not None)
except AssertionError:
msg = _("Unknown form field found on submission page.")
register_exception(req=req, alert_admin=True, prefix=msg)
## The form field doesn't seem to exist - return with error message:
return warning_page(_("Unknown form field found on submission page."), req, ln)
if element_descr[8] is None:
val = ""
else:
val = element_descr[8]
## we also retrieve and add the javascript code of the checking function, if needed
## Set it to empty string to begin with:
full_field['javascript'] = ''
if field_instance[7] != '':
check_descr = get_element_check_description(field_instance[7])
if check_descr is not None:
## Retrieved the check description:
full_field['javascript'] = check_descr
full_field['type'] = element_descr[3]
full_field['name'] = field_instance[3]
full_field['rows'] = element_descr[5]
full_field['cols'] = element_descr[6]
full_field['val'] = val
full_field['size'] = element_descr[4]
full_field['maxlength'] = element_descr[7]
full_field['htmlcode'] = element_descr[9]
full_field['typename'] = field_instance[1] ## TODO: Investigate this, Not used?
## It also seems to refer to pagenum.
# The 'R' fields must be executed in the engine's environment,
# as the runtime functions access some global and local
# variables.
if full_field ['type'] == 'R':
try:
co = compile (full_field ['htmlcode'].replace("\r\n","\n"), "<string>", "exec")
the_globals['text'] = ''
exec co in the_globals
text = the_globals['text']
except:
register_exception(req=req, alert_admin=True, prefix="Error in evaluating response element %s with globals %s" % (pprint.pformat(full_field), pprint.pformat(the_globals)))
raise
else:
text = websubmit_templates.tmpl_submit_field (ln = ln, field = full_field)
# we now determine the exact type of the created field
if full_field['type'] not in [ 'D','R']:
field.append(full_field['name'])
level.append(field_instance[5])
fullDesc.append(field_instance[4])
txt.append(field_instance[6])
check.append(field_instance[7])
# If the field is not user-defined, we try to determine its type
# (select, radio, file upload...)
# check whether it is a select field or not
if re.search("SELECT", text, re.IGNORECASE) is not None:
select.append(1)
else:
select.append(0)
# checks whether it is a radio field or not
if re.search(r"TYPE=[\"']?radio", text, re.IGNORECASE) is not None:
radio.append(1)
else:
radio.append(0)
# checks whether it is a file upload or not
if re.search(r"TYPE=[\"']?file", text, re.IGNORECASE) is not None:
upload.append(1)
else:
upload.append(0)
# if the field description contains the "<COMBO>" string, replace
# it by the category selected on the document page submission page
combofile = "combo%s" % doctype
if os.path.exists("%s/%s" % (curdir, combofile)):
f = open("%s/%s" % (curdir, combofile), "r")
combo = f.read()
f.close()
else:
combo = ""
text = text.replace("<COMBO>", combo)
# if there is a <YYYY> tag in it, replace it by the current year
year = time.strftime("%Y");
text = text.replace("<YYYY>", year)
# if there is a <TODAY> tag in it, replace it by the current year
today = time.strftime("%d/%m/%Y");
text = text.replace("<TODAY>", today)
fieldhtml.append(text)
else:
select.append(0)
radio.append(0)
upload.append(0)
# field.append(value) - initial version, not working with JS, taking a submitted value
field.append(field_instance[3])
level.append(field_instance[5])
txt.append(field_instance[6])
fullDesc.append(field_instance[4])
check.append(field_instance[7])
fieldhtml.append(text)
full_field['fullDesc'] = field_instance[4]
full_field['text'] = text
# If a file exists with the name of the field we extract the saved value
text = ''
if os.path.exists(os.path.join(curdir, full_field['name'])):
file = open(os.path.join(curdir, full_field['name']), "r");
text = file.read()
text = re.compile("[\n\r]*$").sub("", text)
text = re.compile("\n").sub("\\n", text)
text = re.compile("\r").sub("", text)
file.close()
values.append(text)
full_fields.append(full_field)
returnto = {}
if int(curpage) == int(nbpages):
subname = "%s%s" % (act, doctype)
other_form_fields = \
get_form_fields_not_on_submission_page(subname, curpage)
nbFields = 0
message = ""
fullcheck_select = []
fullcheck_radio = []
fullcheck_upload = []
fullcheck_field = []
fullcheck_level = []
fullcheck_txt = []
fullcheck_noPage = []
fullcheck_check = []
for field_instance in other_form_fields:
if field_instance[5] == "M":
## If this field is mandatory, get its description:
element_descr = get_element_description(field_instance[3])
try:
assert(element_descr is not None)
except AssertionError:
msg = _("Unknown form field found on submission page.")
register_exception(req=req, alert_admin=True, prefix=msg)
## The form field doesn't seem to exist - return with error message:
return warning_page(_("Unknown form field found on submission page."), req, ln)
if element_descr[3] in ['D', 'R']:
if element_descr[3] == "D":
text = element_descr[9]
else:
text = eval(element_descr[9])
formfields = text.split(">")
for formfield in formfields:
match = re.match("name=([^ <>]+)", formfield, re.IGNORECASE)
if match is not None:
names = match.groups
for value in names:
if value != "":
value = re.compile("[\"']+").sub("", value)
fullcheck_field.append(value)
fullcheck_level.append(field_instance[5])
fullcheck_txt.append(field_instance[6])
fullcheck_noPage.append(field_instance[1])
fullcheck_check.append(field_instance[7])
nbFields = nbFields + 1
else:
fullcheck_noPage.append(field_instance[1])
fullcheck_field.append(field_instance[3])
fullcheck_level.append(field_instance[5])
fullcheck_txt.append(field_instance[6])
fullcheck_check.append(field_instance[7])
nbFields = nbFields+1
# tests each mandatory field
fld = 0
res = 1
for i in xrange(nbFields):
res = 1
if not os.path.exists(os.path.join(curdir, fullcheck_field[i])):
res = 0
else:
file = open(os.path.join(curdir, fullcheck_field[i]), "r")
text = file.read()
if text == '':
res = 0
else:
if text == "Select:":
res = 0
if res == 0:
fld = i
break
if not res:
returnto = {
'field' : fullcheck_txt[fld],
'page' : fullcheck_noPage[fld],
}
t += websubmit_templates.tmpl_page_interface(
ln = ln,
docname = docname,
actname = actname,
curpage = curpage,
nbpages = nbpages,
nextPg = nextPg,
access = access,
nbPg = nbPg,
doctype = doctype,
act = act,
fields = full_fields,
javascript = websubmit_templates.tmpl_page_interface_js(
ln = ln,
upload = upload,
field = field,
fieldhtml = fieldhtml,
txt = txt,
check = check,
level = level,
curdir = curdir,
values = values,
select = select,
radio = radio,
curpage = curpage,
nbpages = nbpages,
returnto = returnto,
),
mainmenu = mainmenu,
)
t += websubmit_templates.tmpl_page_do_not_leave_submission_js(ln)
# start display:
req.content_type = "text/html"
req.send_http_header()
p_navtrail = """<a href="/submit?ln=%(ln)s" class="navtrail">%(submit)s</a>&nbsp;>&nbsp;<a href="/submit?doctype=%(doctype)s&amp;ln=%(ln)s" class="navtrail">%(docname)s</a>&nbsp;""" % {
'submit' : _("Submit"),
'doctype' : quote_plus(doctype),
'docname' : docname,
'ln' : ln
}
## add MathJax if wanted
if CFG_WEBSUBMIT_USE_MATHJAX:
metaheaderadd = get_mathjax_header(req.is_https())
metaheaderadd += websubmit_templates.tmpl_mathpreview_header(ln, req.is_https())
else:
metaheaderadd = ''
return page(title= actname,
body = t,
navtrail = p_navtrail,
description = "submit documents",
keywords = "submit",
uid = uid,
language = ln,
req = req,
navmenuid='submit',
metaheaderadd=metaheaderadd)
def endaction(req,
c=CFG_SITE_NAME,
ln=CFG_SITE_LANG,
doctype="",
act="",
startPg=1,
access="",
mainmenu="",
fromdir="",
nextPg="",
nbPg="",
curpage=1,
step=1,
mode="U"):
"""Having filled-in the WebSubmit form created for metadata by the interface
function, the user clicks a button to either "finish the submission" or
to "proceed" to the next stage of the submission. At this point, a
variable called "step" will be given a value of 1 or above, which means
that this function is called by websubmit_webinterface.
So, during all non-zero steps of the submission, this function is called.
In other words, this function is called during the BACK-END phase of a
submission, in which WebSubmit *functions* are being called.
The function first ensures that all of the WebSubmit form field values
have been saved in the current working submission directory, in text-
files with the same name as the field elements have. It then determines
the functions to be called for the given step of the submission, and
executes them.
Following this, if this is the last step of the submission, it logs the
submission as "finished" in the journal of submissions.
@param req: (apache request object) *** NOTE: Added into this object, is
a variable called "form" (req.form). This is added into the object in
the index function of websubmit_webinterface. It contains a
"mod_python.util.FieldStorage" instance, that contains the form-fields
found on the previous submission page.
@param c: (string), defaulted to CFG_SITE_NAME. The name of the Invenio
installation.
@param ln: (string), defaulted to CFG_SITE_LANG. The language in which to
display the pages.
@param doctype: (string) - the doctype ID of the doctype for which the
submission is being made.
@param act: (string) - The ID of the action being performed (e.g.
submission of bibliographic information; modification of bibliographic
information, etc).
@param startPg: (integer) - Starting page for the submission? Defaults
to 1.
@param indir: (string) - the directory used to store all submissions
of the given "type" of this submission. For example, if the submission
is of the type "modify bibliographic information", this variable would
contain "modify".
@param access: (string) - the "access" number for the submission
(e.g. 1174062451_7010). This number is also used as the name for the
current working submission directory.
@param mainmenu: (string) - contains the URL (minus the Invenio
home stem) for the submission's home-page. (E.g. If this submission
is "PICT", the "mainmenu" file would contain "/submit?doctype=PICT".
@param fromdir:
@param nextPg:
@param nbPg:
@param curpage: (integer) - the current submission page number. Defaults
to 1.
@param step: (integer) - the current step of the submission. Defaults to
1.
@param mode:
"""
# load the right message language
_ = gettext_set_language(ln)
dismode = mode
ln = wash_language(ln)
sys.stdout = req
rn = ""
t = ""
# get user ID:
uid = getUid(req)
uid_email = get_email(uid)
## Get the submission storage directory from the DB:
submission_dir = get_storage_directory_of_action(act)
if submission_dir:
indir = submission_dir
else:
## Unable to determine the submission-directory:
return warning_page(_("Unable to find the submission directory for the action: %s") % escape(str(act)), req, ln)
curdir = os.path.join(CFG_WEBSUBMIT_STORAGEDIR, indir, doctype, access)
if os.path.exists(os.path.join(curdir, "combo%s" % doctype)):
fp = open(os.path.join(curdir, "combo%s" % doctype), "r");
categ = fp.read()
fp.close()
else:
categ = req.form.get('combo%s' % doctype, '*')
# is user authorized to perform this action?
(auth_code, auth_message) = acc_authorize_action(req, 'submit', \
authorized_if_no_roles=not isGuestUser(uid), \
verbose=0, \
doctype=doctype, \
act=act, \
categ=categ)
if not auth_code == 0:
return warning_page("""<center><font color="red">%s</font></center>""" % auth_message, req, ln)
# Preliminary tasks
## check we have minimum fields
if not doctype or not act or not access:
## We don't have all the necessary information to go ahead
## with this submission:
return warning_page(_("Not enough information to go ahead with the submission."), req, ln)
if doctype and act:
## Let's clean the input
details = get_details_of_submission(doctype, act)
if not details:
return warning_page(_("Invalid doctype and act parameters"), req, ln)
doctype = details[0]
act = details[1]
try:
assert(not access or re.match('\d+_\d+', access))
except AssertionError:
register_exception(req=req, prefix='doctype="%s", access="%s"' % (doctype, access))
return warning_page(_("Invalid parameters"), req, ln)
## Before continuing to process the submitted data, verify that
## this submission has not already been completed:
if submission_is_finished(doctype, act, access, uid_email):
## This submission has already been completed.
## This situation can arise when, having completed a submission,
## the user uses the browser's back-button to go back to the form
## stage of the submission and then tries to submit once more.
## This is unsafe and should not be allowed. Instead of re-processing
## the submitted data, display an error message to the user:
wrnmsg = """<b>This submission has been completed. Please go to the""" \
""" <a href="/submit?doctype=%(doctype)s&amp;ln=%(ln)s">""" \
"""main menu</a> to start a new submission.</b>""" \
% { 'doctype' : quote_plus(doctype), 'ln' : ln }
return warning_page(wrnmsg, req, ln)
## Get the number of pages for this submission:
subname = "%s%s" % (act, doctype)
## retrieve the action and doctype data
## Get the submission storage directory from the DB:
submission_dir = get_storage_directory_of_action(act)
if submission_dir:
indir = submission_dir
else:
## Unable to determine the submission-directory:
return warning_page(_("Unable to find the submission directory for the action: %s") % escape(str(act)), req, ln)
# The following words are reserved and should not be used as field names
reserved_words = ["stop", "file", "nextPg", "startPg", "access", "curpage", "nbPg", "act", \
"indir", "doctype", "mode", "step", "deleted", "file_path", "userfile_name"]
# This defines the path to the directory containing the action data
curdir = os.path.join(CFG_WEBSUBMIT_STORAGEDIR, indir, doctype, access)
try:
assert(curdir == os.path.abspath(curdir))
except AssertionError:
register_exception(req=req, prefix='indir="%s", doctype=%s, access=%s' % (indir, doctype, access))
return warning_page(_("Invalid parameters"), req, ln)
## If the submission directory still does not exist, we create it
if not os.path.exists(curdir):
try:
os.makedirs(curdir)
except Exception, e:
register_exception(req=req, alert_admin=True)
return warning_page(_("Unable to create a directory for this submission. The administrator has been alerted."), req, ln)
# retrieve the original main menu url ans save it in the "mainmenu" file
if mainmenu != "":
fp = open(os.path.join(curdir, "mainmenu"), "w")
fp.write(mainmenu)
fp.close()
# and if the file containing the URL to the main menu exists
# we retrieve it and store it in the $mainmenu variable
if os.path.exists(os.path.join(curdir, "mainmenu")):
fp = open(os.path.join(curdir, "mainmenu"), "r");
mainmenu = fp.read()
fp.close()
else:
mainmenu = "%s/submit" % (CFG_SITE_URL,)
num_submission_pages = get_num_pages_of_submission(subname)
if num_submission_pages is not None:
nbpages = num_submission_pages
else:
## Unable to determine the number of pages for this submission:
return warning_page(_("Unable to determine the number of submission pages."), \
req, ln)
## Retrieve the previous page, as submitted to curdir (before we
## overwrite it with our curpage as declared from the incoming
## form)
try:
fp = open(os.path.join(curdir, "curpage"))
previous_page_from_disk = fp.read()
fp.close()
except:
previous_page_from_disk = str(num_submission_pages)
## retrieve the name of the file in which the reference of
## the submitted document will be stored
rn_filename = get_parameter_value_for_doctype(doctype, "edsrn")
if rn_filename is not None:
edsrn = rn_filename
else:
## Unknown value for edsrn - set it to an empty string:
edsrn = ""
## Determine whether the action is finished
## (ie there are no other steps after the current one):
finished = function_step_is_last(doctype, act, step)
## Let's write in curdir file under curdir the curdir value
## in case e.g. it is needed in FFT.
fp = open(os.path.join(curdir, "curdir"), "w")
fp.write(curdir)
fp.close()
## Let's write in ln file the current language
fp = open(os.path.join(curdir, "ln"), "w")
fp.write(ln)
fp.close()
# Save the form fields entered in the previous submission page
# If the form was sent with the GET method
form = req.form
value = ""
# we parse all the form variables
for key in form.keys():
formfields = form[key]
filename = key.replace("[]", "")
file_to_open = os.path.join(curdir, filename)
try:
assert(file_to_open == os.path.abspath(file_to_open))
assert(file_to_open.startswith(CFG_WEBSUBMIT_STORAGEDIR))
except AssertionError:
register_exception(req=req, prefix='curdir="%s", filename="%s"' % (curdir, filename))
return warning_page(_("Invalid parameters"), req, ln)
# Do not write reserved filenames to disk
if filename in CFG_RESERVED_SUBMISSION_FILENAMES:
# Unless there is really an element with that name on this
# page, or on the previously visited one, which means that
# admin authorized it. Note that in endaction() curpage is
# equivalent to the "previous" page value
if not ((previous_page_from_disk.isdigit() and \
filename in [submission_field[3] for submission_field in \
get_form_fields_on_submission_page(subname, int(previous_page_from_disk))]) or \
(str(curpage).isdigit() and int(curpage) > 1 and \
filename in [submission_field[3] for submission_field in \
get_form_fields_on_submission_page(subname, int(curpage) - 1)])):
# might have been called by functions such as
# Create_Modify_Interface function in MBI step, or
# dynamic fields in response elements, but that is
# unlikely to be a problem.
continue
# Skip variables containing characters that are not allowed in
# WebSubmit elements
if not string_is_alphanumeric_including_underscore(filename):
continue
# the field is an array
if isinstance(formfields, types.ListType):
fp = open(file_to_open, "w")
for formfield in formfields:
#stripslashes(value)
value = specialchars(formfield)
fp.write(value+"\n")
fp.close()
# the field is a normal string
elif isinstance(formfields, types.StringTypes) and formfields != "":
value = formfields
fp = open(file_to_open, "w")
fp.write(specialchars(value))
fp.close()
# the field is a file
elif hasattr(formfields, "filename") and formfields.filename:
dir_to_open = os.path.join(curdir, 'files', key)
try:
assert(dir_to_open == os.path.abspath(dir_to_open))
assert(dir_to_open.startswith(CFG_WEBSUBMIT_STORAGEDIR))
except AssertionError:
register_exception(req=req, prefix='curdir="%s", key="%s"' % (curdir, key))
return warning_page(_("Invalid parameters"), req, ln)
if not os.path.exists(dir_to_open):
try:
os.makedirs(dir_to_open)
except:
register_exception(req=req, alert_admin=True)
return warning_page(_("Cannot create submission directory. The administrator has been alerted."), req, ln)
filename = formfields.filename
## Before saving the file to disc, wash the filename (in particular
## washing away UNIX and Windows (e.g. DFS) paths):
filename = os.path.basename(filename.split('\\')[-1])
filename = filename.strip()
if filename != "":
fp = open(os.path.join(dir_to_open, filename), "w")
while True:
buf = formfields.file.read(10240)
if buf:
fp.write(buf)
else:
break
fp.close()
fp = open(os.path.join(curdir, "lastuploadedfile"), "w")
fp.write(filename)
fp.close()
fp = open(file_to_open, "w")
fp.write(filename)
fp.close()
else:
return warning_page(_("No file uploaded?"), req, ln)
## if the found field is the reference of the document
## we save this value in the "journal of submissions"
if uid_email != "" and uid_email != "guest":
if key == edsrn:
update_submission_reference_in_log(doctype, access, uid_email, value)
## get the document type's long-name:
doctype_lname = get_longname_of_doctype(doctype)
if doctype_lname is not None:
## Got the doctype long-name: replace spaces with HTML chars:
docname = doctype_lname.replace(" ", "&nbsp;")
else:
## Unknown document type:
return warning_page(_("Unknown document type"), req, ln)
## get the action's long-name:
actname = get_longname_of_action(act)
if actname is None:
## Unknown action:
return warning_page(_("Unknown action"), req, ln)
## Determine whether the action is finished
## (ie there are no other steps after the current one):
last_step = function_step_is_last(doctype, act, step)
next_action = '' ## The next action to be proposed to the user
# Prints the action details, returning the mandatory score
action_score = action_details(doctype, act)
current_level = get_level(doctype, act)
# Calls all the function's actions
function_content = ''
try:
## Handle the execution of the functions for this
## submission/step:
start_time = time.time()
(function_content, last_step, action_score, rn) = \
print_function_calls(req=req,
doctype=doctype,
action=act,
step=step,
form=form,
start_time=start_time,
access=access,
curdir=curdir,
dismode=mode,
rn=rn,
last_step=last_step,
action_score=action_score,
ln=ln)
except InvenioWebSubmitFunctionError, e:
register_exception(req=req, alert_admin=True, prefix='doctype="%s", action="%s", step="%s", form="%s", start_time="%s"' % (doctype, act, step, form, start_time))
## There was a serious function-error. Execution ends.
if CFG_DEVEL_SITE:
raise
else:
return warning_page(_("A serious function-error has been encountered. Adminstrators have been alerted. <br /><em>Please not that this might be due to wrong characters inserted into the form</em> (e.g. by copy and pasting some text from a PDF file)."), req, ln)
except InvenioWebSubmitFunctionStop, e:
## For one reason or another, one of the functions has determined that
## the data-processing phase (i.e. the functions execution) should be
## halted and the user should be returned to the form interface once
## more. (NOTE: Redirecting the user to the Web-form interface is
## currently done using JavaScript. The "InvenioWebSubmitFunctionStop"
## exception contains a "value" string, which is effectively JavaScript
## - probably an alert box and a form that is submitted). **THIS WILL
## CHANGE IN THE FUTURE WHEN JavaScript IS REMOVED!**
if e.value is not None:
function_content = e.value
else:
function_content = e
else:
## No function exceptions (InvenioWebSubmitFunctionStop,
## InvenioWebSubmitFunctionError) were raised by the functions. Propose
## the next action (if applicable), and log the submission as finished:
## If the action was mandatory we propose the next
## mandatory action (if any)
if action_score != -1 and last_step == 1:
next_action = Propose_Next_Action(doctype, \
action_score, \
access, \
current_level, \
indir)
## If we are in the last step of an action, we can update
## the "journal of submissions"
if last_step == 1:
if uid_email != "" and uid_email != "guest":
## update the "journal of submission":
## Does the submission already exist in the log?
submission_exists = \
submission_exists_in_log(doctype, act, access, uid_email)
if submission_exists == 1:
## update the rn and status to finished for this submission
## in the log:
update_submission_reference_and_status_in_log(doctype, \
act, \
access, \
uid_email, \
rn, \
"finished")
else:
## Submission doesn't exist in log - create it:
log_new_completed_submission(doctype, \
act, \
access, \
uid_email, \
rn)
## Having executed the functions, create the page that will be displayed
## to the user:
t = websubmit_templates.tmpl_page_endaction(
ln = ln,
# these fields are necessary for the navigation
nextPg = nextPg,
startPg = startPg,
access = access,
curpage = curpage,
nbPg = nbPg,
nbpages = nbpages,
doctype = doctype,
act = act,
docname = docname,
actname = actname,
mainmenu = mainmenu,
finished = finished,
function_content = function_content,
next_action = next_action,
)
if finished:
# register event in webstat
try:
register_customevent("websubmissions", [get_longname_of_doctype(doctype)])
except:
register_exception(suffix="Do the webstat tables exists? Try with 'webstatadmin --load-config'")
else:
t += websubmit_templates.tmpl_page_do_not_leave_submission_js(ln)
# start display:
req.content_type = "text/html"
req.send_http_header()
p_navtrail = '<a href="/submit?ln='+ln+'" class="navtrail">' + _("Submit") +\
"""</a>&nbsp;>&nbsp;<a href="/submit?doctype=%(doctype)s&amp;ln=%(ln)s" class="navtrail">%(docname)s</a>""" % {
'doctype' : quote_plus(doctype),
'docname' : docname,
'ln' : ln,
}
## add MathJax if wanted
if CFG_WEBSUBMIT_USE_MATHJAX:
metaheaderadd = get_mathjax_header(req.is_https())
metaheaderadd += websubmit_templates.tmpl_mathpreview_header(ln, req.is_https())
else:
metaheaderadd = ''
return page(title= actname,
body = t,
navtrail = p_navtrail,
description="submit documents",
keywords="submit",
uid = uid,
language = ln,
req = req,
navmenuid='submit',
metaheaderadd=metaheaderadd)
def home(req, catalogues_text, c=CFG_SITE_NAME, ln=CFG_SITE_LANG):
"""This function generates the WebSubmit "home page".
Basically, this page contains a list of submission-collections
in WebSubmit, and gives links to the various document-type
submissions.
Document-types only appear on this page when they have been
connected to a submission-collection in WebSubmit.
@param req: (apache request object)
@param catalogues_text (string): the computed catalogues tree
@param c: (string) - defaults to CFG_SITE_NAME
@param ln: (string) - The Invenio interface language of choice.
Defaults to CFG_SITE_LANG (the default language of the installation).
@return: (string) - the Web page to be displayed.
"""
ln = wash_language(ln)
# get user ID:
try:
uid = getUid(req)
user_info = collect_user_info(req)
except Error, e:
return error_page(e, req, ln)
# load the right message language
_ = gettext_set_language(ln)
finaltext = websubmit_templates.tmpl_submit_home_page(
ln = ln,
catalogues = catalogues_text,
user_info = user_info,
)
return page(title=_("Submit"),
body=finaltext,
navtrail=[],
description="submit documents",
keywords="submit",
uid=uid,
language=ln,
req=req,
navmenuid='submit'
)
def makeCataloguesTable(req, ln=CFG_SITE_LANG):
"""Build the 'catalogues' (submission-collections) tree for
the WebSubmit home-page. This tree contains the links to
the various document types in WebSubmit.
@param req: (dict) - the user request object
in order to decide whether to display a submission.
@param ln: (string) - the language of the interface.
(defaults to 'CFG_SITE_LANG').
@return: (string, bool, bool) - the submission-collections tree.
True if there is at least one submission authorized for the user
True if there is at least one submission
"""
def is_at_least_one_submission_authorized(cats):
for cat in cats:
if cat['docs']:
return True
if is_at_least_one_submission_authorized(cat['sons']):
return True
return False
text = ""
catalogues = []
## Get the submission-collections attached at the top level
## of the submission-collection tree:
top_level_collctns = get_collection_children_of_submission_collection(0)
if len(top_level_collctns) != 0:
## There are submission-collections attatched to the top level.
## retrieve their details for displaying:
for child_collctn in top_level_collctns:
catalogues.append(getCatalogueBranch(child_collctn[0], 1, req))
text = websubmit_templates.tmpl_submit_home_catalogs(
ln=ln,
catalogs=catalogues)
submissions_exist = True
at_least_one_submission_authorized = is_at_least_one_submission_authorized(catalogues)
else:
text = websubmit_templates.tmpl_submit_home_catalog_no_content(ln=ln)
submissions_exist = False
at_least_one_submission_authorized = False
return text, at_least_one_submission_authorized, submissions_exist
def getCatalogueBranch(id_father, level, req):
"""Build up a given branch of the submission-collection
tree. I.e. given a parent submission-collection ID,
build up the tree below it. This tree will include
doctype-children, as well as other submission-
collections and their children.
Finally, return the branch as a dictionary.
@param id_father: (integer) - the ID of the submission-collection
from which to begin building the branch.
@param level: (integer) - the level of the current submission-
collection branch.
@param req: (dict) - the user request object in order to decide
whether to display a submission.
@return: (dictionary) - the branch and its sub-branches.
"""
elem = {} ## The dictionary to contain this branch of the tree.
## First, get the submission-collection-details:
collctn_name = get_submission_collection_name(id_father)
if collctn_name is not None:
## Got the submission-collection's name:
elem['name'] = collctn_name
else:
## The submission-collection is unknown to the DB
## set its name as empty:
elem['name'] = ""
elem['id'] = id_father
elem['level'] = level
## Now get details of the doctype-children of this
## submission-collection:
elem['docs'] = [] ## List to hold the doctype-children
## of the submission-collection
doctype_children = \
get_doctype_children_of_submission_collection(id_father)
user_info = collect_user_info(req)
for child_doctype in doctype_children:
## To get access to a submission pipeline for a logged in user,
## it is decided by any authorization. If none are defined for the action
## then a logged in user will get access.
## If user is not logged in, a specific rule to allow the action is needed
if acc_authorize_action(req, 'submit', \
authorized_if_no_roles=not isGuestUser(user_info['uid']), \
doctype=child_doctype[0])[0] == 0:
elem['docs'].append(getDoctypeBranch(child_doctype[0]))
## Now, get the collection-children of this submission-collection:
elem['sons'] = []
collctn_children = \
get_collection_children_of_submission_collection(id_father)
for child_collctn in collctn_children:
elem['sons'].append(getCatalogueBranch(child_collctn[0], level + 1, req))
## Now return this branch of the built-up 'collection-tree':
return elem
def getDoctypeBranch(doctype):
"""Create a document-type 'leaf-node' for the submission-collections
tree. Basically, this leaf is a dictionary containing the name
and ID of the document-type submission to which it links.
@param doctype: (string) - the ID of the document type.
@return: (dictionary) - the document-type 'leaf node'. Contains
the following values:
+ id: (string) - the document-type ID.
+ name: (string) - the (long) name of the document-type.
"""
ldocname = get_longname_of_doctype(doctype)
if ldocname is None:
ldocname = "Unknown Document Type"
return { 'id' : doctype, 'name' : ldocname, }
def displayCatalogueBranch(id_father, level, catalogues):
text = ""
collctn_name = get_submission_collection_name(id_father)
if collctn_name is None:
## If this submission-collection wasn't known in the DB,
## give it the name "Unknown Submission-Collection" to
## avoid errors:
collctn_name = "Unknown Submission-Collection"
## Now, create the display for this submission-collection:
if level == 1:
text = "<LI><font size=\"+1\"><strong>%s</strong></font>\n" \
% collctn_name
else:
## TODO: These are the same (and the if is ugly.) Why?
if level == 2:
text = "<LI>%s\n" % collctn_name
else:
if level > 2:
text = "<LI>%s\n" % collctn_name
## Now display the children document-types that are attached
## to this submission-collection:
## First, get the children:
doctype_children = get_doctype_children_of_submission_collection(id_father)
collctn_children = get_collection_children_of_submission_collection(id_father)
if len(doctype_children) > 0 or len(collctn_children) > 0:
## There is something to display, so open a list:
text = text + "<UL>\n"
## First, add the doctype leaves of this branch:
for child_doctype in doctype_children:
## Add the doctype 'leaf-node':
text = text + displayDoctypeBranch(child_doctype[0], catalogues)
## Now add the submission-collection sub-branches:
for child_collctn in collctn_children:
catalogues.append(child_collctn[0])
text = text + displayCatalogueBranch(child_collctn[0], level+1, catalogues)
## Finally, close up the list if there were nodes to display
## at this branch:
if len(doctype_children) > 0 or len(collctn_children) > 0:
text = text + "</UL>\n"
return text
def displayDoctypeBranch(doctype, catalogues):
text = ""
ldocname = get_longname_of_doctype(doctype)
if ldocname is None:
ldocname = "Unknown Document Type"
text = "<LI><a href=\"\" onmouseover=\"javascript:" \
"popUpTextWindow('%s',true,event);\" onmouseout" \
"=\"javascript:popUpTextWindow('%s',false,event);\" " \
"onClick=\"document.forms[0].doctype.value='%s';" \
"document.forms[0].submit();return false;\">%s</a>\n" \
% (doctype, doctype, doctype, ldocname)
return text
def action(req, c=CFG_SITE_NAME, ln=CFG_SITE_LANG, doctype=""):
# load the right message language
_ = gettext_set_language(ln)
nbCateg = 0
snameCateg = []
lnameCateg = []
actionShortDesc = []
indir = []
actionbutton = []
statustext = []
t = ""
ln = wash_language(ln)
# get user ID:
try:
uid = getUid(req)
except Error, e:
return error_page(e, req, ln)
#parses database to get all data
## first, get the list of categories
doctype_categs = get_categories_of_doctype(doctype)
for doctype_categ in doctype_categs:
if not acc_authorize_action(req, 'submit', \
authorized_if_no_roles=not isGuestUser(uid), \
verbose=0, \
doctype=doctype, \
categ=doctype_categ[0])[0] == 0:
# This category is restricted for this user, move on to the next categories.
continue
nbCateg = nbCateg+1
snameCateg.append(doctype_categ[0])
lnameCateg.append(doctype_categ[1])
## Now get the details of the document type:
doctype_details = get_doctype_details(doctype)
if doctype_details is None:
## Doctype doesn't exist - raise error:
return warning_page(_("Unable to find document type: %s") % escape(str(doctype)), req, ln)
else:
docFullDesc = doctype_details[0]
# Also update the doctype as returned by the database, since
# it might have a differnent case (eg. DemOJrN->demoJRN)
doctype = docShortDesc = doctype_details[1]
description = doctype_details[4]
## Get the details of the actions supported by this document-type:
doctype_actions = get_actions_on_submission_page_for_doctype(doctype)
for doctype_action in doctype_actions:
if not acc_authorize_action(req, 'submit', \
authorized_if_no_roles=not isGuestUser(uid), \
doctype=doctype, \
act=doctype_action[0])[0] == 0:
# This action is not authorized for this user, move on to the next actions.
continue
## Get the details of this action:
action_details = get_action_details(doctype_action[0])
if action_details is not None:
actionShortDesc.append(doctype_action[0])
indir.append(action_details[1])
actionbutton.append(action_details[4])
statustext.append(action_details[5])
if not snameCateg and not actionShortDesc:
if isGuestUser(uid):
# If user is guest and does not have access to any of the
# categories, offer to login.
return redirect_to_url(req, "%s/youraccount/login%s" % (
CFG_SITE_SECURE_URL,
make_canonical_urlargd({'referer' : CFG_SITE_SECURE_URL + req.unparsed_uri, 'ln' : ln}, {})),
norobot=True)
else:
return page_not_authorized(req, "../submit",
uid=uid,
text=_("You are not authorized to access this submission interface."),
navmenuid='submit')
## Send the gathered information to the template so that the doctype's
## home-page can be displayed:
t = websubmit_templates.tmpl_action_page(
ln=ln,
uid=uid,
pid = os.getpid(),
now = time.time(),
doctype = doctype,
description = description,
docfulldesc = docFullDesc,
snameCateg = snameCateg,
lnameCateg = lnameCateg,
actionShortDesc = actionShortDesc,
indir = indir,
# actionbutton = actionbutton,
statustext = statustext,
)
p_navtrail = """<a href="/submit?ln=%(ln)s" class="navtrail">%(submit)s</a>""" % {'submit' : _("Submit"),
'ln' : ln}
return page(title = docFullDesc,
body=t,
navtrail=p_navtrail,
description="submit documents",
keywords="submit",
uid=uid,
language=ln,
req=req,
navmenuid='submit'
)
def Request_Print(m, txt):
"""The argumemts to this function are the display mode (m) and the text
to be displayed (txt).
"""
return txt
def Evaluate_Parameter (field, doctype):
# Returns the literal value of the parameter. Assumes that the value is
# uniquely determined by the doctype, i.e. doctype is the primary key in
# the table
# If the table name is not null, evaluate the parameter
## TODO: The above comment looks like nonesense? This
## function only seems to get the values of parameters
## from the db...
## Get the value for the parameter:
param_val = get_parameter_value_for_doctype(doctype, field)
if param_val is None:
## Couldn't find a value for this parameter for this doctype.
## Instead, try with the default doctype (DEF):
param_val = get_parameter_value_for_doctype("DEF", field)
if param_val is None:
## There was no value for the parameter for the default doctype.
## Nothing can be done about it - return an empty string:
return ""
else:
## There was some kind of value for the parameter; return it:
return param_val
def Get_Parameters (function, doctype):
"""For a given function of a given document type, a dictionary
of the parameter names and values are returned.
@param function: (string) - the name of the function for which the
parameters are to be retrieved.
@param doctype: (string) - the ID of the document type.
@return: (dictionary) - of the parameters of the function.
Keyed by the parameter name, values are of course the parameter
values.
"""
parray = {}
## Get the names of the parameters expected by this function:
func_params = get_parameters_of_function(function)
for func_param in func_params:
## For each of the parameters, get its value for this document-
## type and add it into the dictionary of parameters:
parameter = func_param[0]
parray[parameter] = Evaluate_Parameter (parameter, doctype)
return parray
def get_level(doctype, action):
"""Get the level of a given submission. If unknown, return 0
as the level.
@param doctype: (string) - the ID of the document type.
@param action: (string) - the ID of the action.
@return: (integer) - the level of the submission; 0 otherwise.
"""
subm_details = get_details_of_submission(doctype, action)
if subm_details is not None:
## Return the level of this action
subm_level = subm_details[9]
try:
int(subm_level)
except ValueError:
return 0
else:
return subm_level
else:
return 0
def action_details (doctype, action):
# Prints whether the action is mandatory or optional. The score of the
# action is returned (-1 if the action was optional)
subm_details = get_details_of_submission(doctype, action)
if subm_details is not None:
if subm_details[9] != "0":
## This action is mandatory; return the score:
return subm_details[10]
else:
return -1
else:
return -1
def print_function_calls(req, doctype, action, step, form, start_time,
access, curdir, dismode, rn, last_step, action_score,
ln=CFG_SITE_LANG):
""" Calls the functions required by an 'action'
action on a 'doctype' document In supervisor mode, a table of the
function calls is produced
@return: (function_output_string, last_step, action_score, rn)
"""
user_info = collect_user_info(req)
# load the right message language
_ = gettext_set_language(ln)
t = ""
## Here follows the global protect environment.
the_globals = {
'doctype' : doctype,
'action' : action,
'act' : action, ## for backward compatibility
'step' : step,
'access' : access,
'ln' : ln,
'curdir' : curdir,
'uid' : user_info['uid'],
'uid_email' : user_info['email'],
'rn' : rn,
'last_step' : last_step,
'action_score' : action_score,
'__websubmit_in_jail__' : True,
'form' : form,
'user_info' : user_info,
'__builtins__' : globals()['__builtins__'],
'Request_Print': Request_Print
}
## Get the list of functions to be called
funcs_to_call = get_functions_for_submission_step(doctype, action, step)
## If no functions are found at this step for this doctype,
## get the functions for the DEF(ault) doctype:
if len(funcs_to_call) == 0:
funcs_to_call = get_functions_for_submission_step("DEF", action, step)
if len(funcs_to_call) > 0:
# while there are functions left...
functions = []
for function in funcs_to_call:
try:
function_name = function[0]
function_score = function[1]
currfunction = {
'name' : function_name,
'score' : function_score,
'error' : 0,
'text' : '',
}
from invenio.legacy.websubmit import functions
function_path = os.path.join(function.__path__,
function_name + '.py')
if os.path.exists(function_path):
# import the function itself
#function = getattr(invenio.legacy.websubmit.functions, function_name)
execfile(function_path, the_globals)
if function_name not in the_globals:
currfunction['error'] = 1
else:
the_globals['function'] = the_globals[function_name]
# Evaluate the parameters, and place them in an array
the_globals['parameters'] = Get_Parameters(function_name, doctype)
# Call function:
log_function(curdir, "Start %s" % function_name, start_time)
try:
try:
## Attempt to call the function with 4 arguments:
## ("parameters", "curdir" and "form" as usual),
## and "user_info" - the dictionary of user
## information:
##
## Note: The function should always be called with
## these keyword arguments because the "TypeError"
## except clause checks for a specific mention of
## the 'user_info' keyword argument when a legacy
## function (one that accepts only 'parameters',
## 'curdir' and 'form') has been called and if
## the error string doesn't contain this,
## the TypeError will be considered as a something
## that was incorrectly handled in the function and
## will be propagated as an
## InvenioWebSubmitFunctionError instead of the
## function being called again with the legacy 3
## arguments.
func_returnval = eval("function(parameters=parameters, curdir=curdir, form=form, user_info=user_info)", the_globals)
except TypeError, err:
## If the error contains the string "got an
## unexpected keyword argument", it means that the
## function doesn't accept the "user_info"
## argument. Test for this:
if "got an unexpected keyword argument 'user_info'" in \
str(err).lower():
## As expected, the function doesn't accept
## the user_info keyword argument. Call it
## again with the legacy 3 arguments
## (parameters, curdir, form):
func_returnval = eval("function(parameters=parameters, curdir=curdir, form=form)", the_globals)
else:
## An unexpected "TypeError" was caught.
## It looks as though the function itself didn't
## handle something correctly.
## Convert this error into an
## InvenioWebSubmitFunctionError and raise it:
msg = "Unhandled TypeError caught when " \
"calling [%s] WebSubmit function: " \
"[%s]" % (function_name, str(err))
raise InvenioWebSubmitFunctionError(msg)
except InvenioWebSubmitFunctionWarning, err:
## There was an unexpected behaviour during the
## execution. Log the message into function's log
## and go to next function
log_function(curdir, "***Warning*** from %s: %s" \
% (function_name, str(err)), start_time)
## Reset "func_returnval" to None:
func_returnval = None
register_exception(req=req, alert_admin=True, prefix="Warning in executing function %s with globals %s" % (pprint.pformat(currfunction), pprint.pformat(the_globals)))
log_function(curdir, "End %s" % function_name, start_time)
if func_returnval is not None:
## Append the returned value as a string:
currfunction['text'] = str(func_returnval)
else:
## The function the NoneType. Don't keep that value as
## the currfunction->text. Replace it with the empty
## string.
currfunction['text'] = ""
else:
currfunction['error'] = 1
functions.append(currfunction)
except InvenioWebSubmitFunctionStop, err:
## The submission asked to stop execution. This is
## ok. Do not alert admin, and raise exception further
log_function(curdir, "***Stop*** from %s: %s" \
% (function_name, str(err)), start_time)
raise
except:
register_exception(req=req, alert_admin=True, prefix="Error in executing function %s with globals %s" % (pprint.pformat(currfunction), pprint.pformat(the_globals)))
raise
t = websubmit_templates.tmpl_function_output(
ln = ln,
display_on = (dismode == 'S'),
action = action,
doctype = doctype,
step = step,
functions = functions,
)
else :
if dismode == 'S':
t = "<br /><br /><b>" + _("The chosen action is not supported by the document type.") + "</b>"
return (t, the_globals['last_step'], the_globals['action_score'], the_globals['rn'])
def Propose_Next_Action (doctype, action_score, access, currentlevel, indir, ln=CFG_SITE_LANG):
t = ""
next_submissions = \
get_submissions_at_level_X_with_score_above_N(doctype, currentlevel, action_score)
if len(next_submissions) > 0:
actions = []
first_score = next_submissions[0][10]
for action in next_submissions:
if action[10] == first_score:
## Get the submission directory of this action:
nextdir = get_storage_directory_of_action(action[1])
if nextdir is None:
nextdir = ""
curraction = {
'page' : action[11],
'action' : action[1],
'doctype' : doctype,
'nextdir' : nextdir,
'access' : access,
'indir' : indir,
'name' : action[12],
}
actions.append(curraction)
t = websubmit_templates.tmpl_next_action(
ln = ln,
actions = actions,
)
return t
def specialchars(text):
text = string.replace(text, "&#147;", "\042");
text = string.replace(text, "&#148;", "\042");
text = string.replace(text, "&#146;", "\047");
text = string.replace(text, "&#151;", "\055");
text = string.replace(text, "&#133;", "\056\056\056");
return text
def log_function(curdir, message, start_time, filename="function_log"):
"""Write into file the message and the difference of time
between starttime and current time
@param curdir:(string) path to the destination dir
@param message: (string) message to write into the file
@param starttime: (float) time to compute from
@param filname: (string) name of log file
"""
time_lap = "%.3f" % (time.time() - start_time)
if os.access(curdir, os.F_OK|os.W_OK):
fd = open("%s/%s" % (curdir, filename), "a+")
fd.write("""%s --- %s\n""" % (message, time_lap))
fd.close()
diff --git a/invenio/legacy/websubmit/file_converter.py b/invenio/legacy/websubmit/file_converter.py
index 2efd772b4..ce746f7b7 100644
--- a/invenio/legacy/websubmit/file_converter.py
+++ b/invenio/legacy/websubmit/file_converter.py
@@ -1,1465 +1,1465 @@
# -*- coding: utf-8 -*-
## This file is part of Invenio.
## Copyright (C) 2009, 2010, 2011, 2012 CERN.
##
## Invenio is free software; you can redistribute it and/or
## modify it under the terms of the GNU General Public License as
## published by the Free Software Foundation; either version 2 of the
## License, or (at your option) any later version.
##
## Invenio is distributed in the hope that it will be useful, but
## WITHOUT ANY WARRANTY; without even the implied warranty of
## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
## General Public License for more details.
##
## You should have received a copy of the GNU General Public License
## along with Invenio; if not, write to the Free Software Foundation, Inc.,
## 59 Temple Place, Suite 330, Boston, MA 02111-1307, USA.
"""
This module implement fulltext conversion between many different file formats.
"""
import os
import stat
import re
import sys
import shutil
import tempfile
import HTMLParser
import time
import subprocess
import atexit
import signal
import threading
from logging import DEBUG, getLogger
from htmlentitydefs import entitydefs
from optparse import OptionParser
try:
from invenio.hocrlib import create_pdf, extract_hocr, CFG_PPM_RESOLUTION
try:
from PyPDF2 import PdfFileReader, PdfFileWriter
except ImportError:
from pyPdf import PdfFileReader, PdfFileWriter
CFG_CAN_DO_OCR = True
except ImportError:
CFG_CAN_DO_OCR = False
from invenio.utils.text import wrap_text_in_a_box
from invenio.shellutils import run_process_with_timeout, run_shell_command
from invenio.config import CFG_TMPDIR, CFG_ETCDIR, CFG_PYLIBDIR, \
CFG_PATH_ANY2DJVU, \
CFG_PATH_PDFINFO, \
CFG_PATH_GS, \
CFG_PATH_PDFOPT, \
CFG_PATH_PDFTOPS, \
CFG_PATH_GZIP, \
CFG_PATH_GUNZIP, \
CFG_PATH_PDFTOTEXT, \
CFG_PATH_PDFTOPPM, \
CFG_PATH_OCROSCRIPT, \
CFG_PATH_DJVUPS, \
CFG_PATH_DJVUTXT, \
CFG_PATH_OPENOFFICE_PYTHON, \
CFG_PATH_PSTOTEXT, \
CFG_PATH_TIFF2PDF, \
CFG_PATH_PS2PDF, \
CFG_OPENOFFICE_SERVER_HOST, \
CFG_OPENOFFICE_SERVER_PORT, \
CFG_OPENOFFICE_USER, \
CFG_PATH_CONVERT, \
CFG_PATH_PAMFILE, \
CFG_BINDIR, \
CFG_LOGDIR, \
CFG_BIBSCHED_PROCESS_USER, \
CFG_BIBDOCFILE_BEST_FORMATS_TO_EXTRACT_TEXT_FROM, \
CFG_BIBDOCFILE_DESIRED_CONVERSIONS
-from invenio.errorlib import register_exception
+from invenio.ext.logging import register_exception
def get_file_converter_logger():
return getLogger("InvenioWebSubmitFileConverterLogger")
CFG_TWO2THREE_LANG_CODES = {
'en': 'eng',
'nl': 'nld',
'es': 'spa',
'de': 'deu',
'it': 'ita',
'fr': 'fra',
}
CFG_OPENOFFICE_TMPDIR = os.path.join(CFG_TMPDIR, 'ooffice-tmp-files')
CFG_GS_MINIMAL_VERSION_FOR_PDFA = "8.65"
CFG_GS_MINIMAL_VERSION_FOR_PDFX = "8.52"
CFG_ICC_PATH = os.path.join(CFG_ETCDIR, 'websubmit', 'file_converter_templates', 'ISOCoatedsb.icc')
CFG_PDFA_DEF_PATH = os.path.join(CFG_ETCDIR, 'websubmit', 'file_converter_templates', 'PDFA_def.ps')
CFG_PDFX_DEF_PATH = os.path.join(CFG_ETCDIR, 'websubmit', 'file_converter_templates', 'PDFX_def.ps')
CFG_UNOCONV_LOG_PATH = os.path.join(CFG_LOGDIR, 'unoconv.log')
_RE_CLEAN_SPACES = re.compile(r'\s+')
class InvenioWebSubmitFileConverterError(Exception):
pass
def get_conversion_map():
"""Return a dictionary of the form:
'.pdf' : {'.ps.gz' : ('pdf2ps', {param1 : value1...})
"""
ret = {
'.csv': {},
'.djvu': {},
'.doc': {},
'.docx': {},
'.sxw': {},
'.htm': {},
'.html': {},
'.odp': {},
'.ods': {},
'.odt': {},
'.pdf': {},
'.ppt': {},
'.pptx': {},
'.sxi': {},
'.ps': {},
'.ps.gz': {},
'.rtf': {},
'.tif': {},
'.tiff': {},
'.txt': {},
'.xls': {},
'.xlsx': {},
'.sxc': {},
'.xml': {},
'.hocr': {},
'.pdf;pdfa': {},
'.asc': {},
}
if CFG_PATH_GZIP:
ret['.ps']['.ps.gz'] = (gzip, {})
if CFG_PATH_GUNZIP:
ret['.ps.gz']['.ps'] = (gunzip, {})
if CFG_PATH_ANY2DJVU:
ret['.pdf']['.djvu'] = (any2djvu, {})
ret['.ps']['.djvu'] = (any2djvu, {})
if CFG_PATH_DJVUPS:
ret['.djvu']['.ps'] = (djvu2ps, {'compress': False})
if CFG_PATH_GZIP:
ret['.djvu']['.ps.gz'] = (djvu2ps, {'compress': True})
if CFG_PATH_DJVUTXT:
ret['.djvu']['.txt'] = (djvu2text, {})
if CFG_PATH_PSTOTEXT:
ret['.ps']['.txt'] = (pstotext, {})
if CFG_PATH_GUNZIP:
ret['.ps.gz']['.txt'] = (pstotext, {})
if can_pdfa():
ret['.ps']['.pdf;pdfa'] = (ps2pdfa, {})
ret['.pdf']['.pdf;pdfa'] = (pdf2pdfa, {})
if CFG_PATH_GUNZIP:
ret['.ps.gz']['.pdf;pdfa'] = (ps2pdfa, {})
else:
if CFG_PATH_PS2PDF:
ret['.ps']['.pdf;pdfa'] = (ps2pdf, {})
if CFG_PATH_GUNZIP:
ret['.ps.gz']['.pdf'] = (ps2pdf, {})
if can_pdfx():
ret['.ps']['.pdf;pdfx'] = (ps2pdfx, {})
ret['.pdf']['.pdf;pdfx'] = (pdf2pdfx, {})
if CFG_PATH_GUNZIP:
ret['.ps.gz']['.pdf;pdfx'] = (ps2pdfx, {})
if CFG_PATH_PDFTOPS:
ret['.pdf']['.ps'] = (pdf2ps, {'compress': False})
ret['.pdf;pdfa']['.ps'] = (pdf2ps, {'compress': False})
if CFG_PATH_GZIP:
ret['.pdf']['.ps.gz'] = (pdf2ps, {'compress': True})
ret['.pdf;pdfa']['.ps.gz'] = (pdf2ps, {'compress': True})
if CFG_PATH_PDFTOTEXT:
ret['.pdf']['.txt'] = (pdf2text, {})
ret['.pdf;pdfa']['.txt'] = (pdf2text, {})
ret['.asc']['.txt'] = (txt2text, {})
ret['.txt']['.txt'] = (txt2text, {})
ret['.csv']['.txt'] = (txt2text, {})
ret['.html']['.txt'] = (html2text, {})
ret['.htm']['.txt'] = (html2text, {})
ret['.xml']['.txt'] = (html2text, {})
if CFG_PATH_TIFF2PDF:
ret['.tiff']['.pdf'] = (tiff2pdf, {})
ret['.tif']['.pdf'] = (tiff2pdf, {})
if CFG_PATH_OPENOFFICE_PYTHON and CFG_OPENOFFICE_SERVER_HOST:
ret['.rtf']['.odt'] = (unoconv, {'output_format': 'odt'})
ret['.rtf']['.pdf;pdfa'] = (unoconv, {'output_format': 'pdf'})
ret['.rtf']['.txt'] = (unoconv, {'output_format': 'txt'})
ret['.rtf']['.docx'] = (unoconv, {'output_format': 'docx'})
ret['.doc']['.odt'] = (unoconv, {'output_format': 'odt'})
ret['.doc']['.pdf;pdfa'] = (unoconv, {'output_format': 'pdf'})
ret['.doc']['.txt'] = (unoconv, {'output_format': 'txt'})
ret['.doc']['.docx'] = (unoconv, {'output_format': 'docx'})
ret['.docx']['.odt'] = (unoconv, {'output_format': 'odt'})
ret['.docx']['.pdf;pdfa'] = (unoconv, {'output_format': 'pdf'})
ret['.docx']['.txt'] = (unoconv, {'output_format': 'txt'})
ret['.sxw']['.odt'] = (unoconv, {'output_format': 'odt'})
ret['.sxw']['.pdf;pdfa'] = (unoconv, {'output_format': 'pdf'})
ret['.sxw']['.txt'] = (unoconv, {'output_format': 'txt'})
ret['.docx']['.docx'] = (unoconv, {'output_format': 'docx'})
ret['.odt']['.doc'] = (unoconv, {'output_format': 'doc'})
ret['.odt']['.pdf;pdfa'] = (unoconv, {'output_format': 'pdf'})
ret['.odt']['.txt'] = (unoconv, {'output_format': 'txt'})
ret['.odt']['.docx'] = (unoconv, {'output_format': 'docx'})
ret['.ppt']['.odp'] = (unoconv, {'output_format': 'odp'})
ret['.ppt']['.pdf;pdfa'] = (unoconv, {'output_format': 'pdf'})
ret['.ppt']['.txt'] = (unoconv, {'output_format': 'txt'})
ret['.ppt']['.pptx'] = (unoconv, {'output_format': 'pptx'})
ret['.pptx']['.odp'] = (unoconv, {'output_format': 'odp'})
ret['.pptx']['.pdf;pdfa'] = (unoconv, {'output_format': 'pdf'})
ret['.pptx']['.txt'] = (unoconv, {'output_format': 'txt'})
ret['.sxi']['.odp'] = (unoconv, {'output_format': 'odp'})
ret['.sxi']['.pdf;pdfa'] = (unoconv, {'output_format': 'pdf'})
ret['.sxi']['.txt'] = (unoconv, {'output_format': 'txt'})
ret['.sxi']['.pptx'] = (unoconv, {'output_format': 'pptx'})
ret['.odp']['.ppt'] = (unoconv, {'output_format': 'ppt'})
ret['.odp']['.pptx'] = (unoconv, {'output_format': 'pptx'})
ret['.odp']['.pdf;pdfa'] = (unoconv, {'output_format': 'pdf'})
ret['.odp']['.txt'] = (unoconv, {'output_format': 'txt'})
ret['.odp']['.pptx'] = (unoconv, {'output_format': 'pptx'})
ret['.xls']['.ods'] = (unoconv, {'output_format': 'ods'})
ret['.xls']['.xlsx'] = (unoconv, {'output_format': 'xslx'})
ret['.xlsx']['.ods'] = (unoconv, {'output_format': 'ods'})
ret['.sxc']['.ods'] = (unoconv, {'output_format': 'ods'})
ret['.sxc']['.xlsx'] = (unoconv, {'output_format': 'xslx'})
ret['.ods']['.xls'] = (unoconv, {'output_format': 'xls'})
ret['.ods']['.pdf;pdfa'] = (unoconv, {'output_format': 'pdf'})
ret['.ods']['.csv'] = (unoconv, {'output_format': 'csv'})
ret['.ods']['.xlsx'] = (unoconv, {'output_format': 'xslx'})
ret['.csv']['.txt'] = (txt2text, {})
## Let's add all the existing output formats as potential input formats.
for value in ret.values():
for key in value.keys():
if key not in ret:
ret[key] = {}
return ret
def get_best_format_to_extract_text_from(filelist, best_formats=CFG_BIBDOCFILE_BEST_FORMATS_TO_EXTRACT_TEXT_FROM):
"""
Return among the filelist the best file whose format is best suited for
extracting text.
"""
from invenio.bibdocfile import decompose_file, normalize_format
best_formats = [normalize_format(aformat) for aformat in best_formats if can_convert(aformat, '.txt')]
for aformat in best_formats:
for filename in filelist:
if decompose_file(filename, skip_version=True)[2].endswith(aformat):
return filename
raise InvenioWebSubmitFileConverterError("It's not possible to extract valuable text from any of the proposed files.")
def get_missing_formats(filelist, desired_conversion=None):
"""Given a list of files it will return a dictionary of the form:
file1 : missing formats to generate from it...
"""
from invenio.bibdocfile import normalize_format, decompose_file
def normalize_desired_conversion():
ret = {}
for key, value in desired_conversion.iteritems():
ret[normalize_format(key)] = [normalize_format(aformat) for aformat in value]
return ret
if desired_conversion is None:
desired_conversion = CFG_BIBDOCFILE_DESIRED_CONVERSIONS
available_formats = [decompose_file(filename, skip_version=True)[2] for filename in filelist]
missing_formats = []
desired_conversion = normalize_desired_conversion()
ret = {}
for filename in filelist:
aformat = decompose_file(filename, skip_version=True)[2]
if aformat in desired_conversion:
for desired_format in desired_conversion[aformat]:
if desired_format not in available_formats and desired_format not in missing_formats:
missing_formats.append(desired_format)
if filename not in ret:
ret[filename] = []
ret[filename].append(desired_format)
return ret
def can_convert(input_format, output_format, max_intermediate_conversions=4):
"""Return the chain of conversion to transform input_format into output_format, if any."""
from invenio.bibdocfile import normalize_format
if max_intermediate_conversions <= 0:
return []
input_format = normalize_format(input_format)
output_format = normalize_format(output_format)
if input_format in __CONVERSION_MAP:
if output_format in __CONVERSION_MAP[input_format]:
return [__CONVERSION_MAP[input_format][output_format]]
best_res = []
best_intermediate = ''
for intermediate_format in __CONVERSION_MAP[input_format]:
res = can_convert(intermediate_format, output_format, max_intermediate_conversions-1)
if res and (len(res) < best_res or not best_res):
best_res = res
best_intermediate = intermediate_format
if best_res:
return [__CONVERSION_MAP[input_format][best_intermediate]] + best_res
return []
def can_pdfopt(verbose=False):
"""Return True if it's possible to optimize PDFs."""
if CFG_PATH_PDFOPT:
return True
elif verbose:
print >> sys.stderr, "PDF linearization is not supported because the pdfopt executable is not available"
return False
def can_pdfx(verbose=False):
"""Return True if it's possible to generate PDF/Xs."""
if not CFG_PATH_PDFTOPS:
if verbose:
print >> sys.stderr, "Conversion of PS or PDF to PDF/X is not possible because the pdftops executable is not available"
return False
if not CFG_PATH_GS:
if verbose:
print >> sys.stderr, "Conversion of PS or PDF to PDF/X is not possible because the gs executable is not available"
return False
else:
try:
output = run_shell_command("%s --version" % CFG_PATH_GS)[1].strip()
if not output:
raise ValueError("No version information returned")
if [int(number) for number in output.split('.')] < [int(number) for number in CFG_GS_MINIMAL_VERSION_FOR_PDFX.split('.')]:
print >> sys.stderr, "Conversion of PS or PDF to PDF/X is not possible because the minimal gs version for the executable %s is not met: it should be %s but %s has been found" % (CFG_PATH_GS, CFG_GS_MINIMAL_VERSION_FOR_PDFX, output)
return False
except Exception, err:
print >> sys.stderr, "Conversion of PS or PDF to PDF/X is not possible because it's not possible to retrieve the gs version using the executable %s: %s" % (CFG_PATH_GS, err)
return False
if not CFG_PATH_PDFINFO:
if verbose:
print >> sys.stderr, "Conversion of PS or PDF to PDF/X is not possible because the pdfinfo executable is not available"
return False
if not os.path.exists(CFG_ICC_PATH):
if verbose:
print >> sys.stderr, "Conversion of PS or PDF to PDF/X is not possible because %s does not exists. Have you run make install-pdfa-helper-files?" % CFG_ICC_PATH
return False
return True
def can_pdfa(verbose=False):
"""Return True if it's possible to generate PDF/As."""
if not CFG_PATH_PDFTOPS:
if verbose:
print >> sys.stderr, "Conversion of PS or PDF to PDF/A is not possible because the pdftops executable is not available"
return False
if not CFG_PATH_GS:
if verbose:
print >> sys.stderr, "Conversion of PS or PDF to PDF/A is not possible because the gs executable is not available"
return False
else:
try:
output = run_shell_command("%s --version" % CFG_PATH_GS)[1].strip()
if not output:
raise ValueError("No version information returned")
if [int(number) for number in output.split('.')] < [int(number) for number in CFG_GS_MINIMAL_VERSION_FOR_PDFA.split('.')]:
print >> sys.stderr, "Conversion of PS or PDF to PDF/A is not possible because the minimal gs version for the executable %s is not met: it should be %s but %s has been found" % (CFG_PATH_GS, CFG_GS_MINIMAL_VERSION_FOR_PDFA, output)
return False
except Exception, err:
print >> sys.stderr, "Conversion of PS or PDF to PDF/A is not possible because it's not possible to retrieve the gs version using the executable %s: %s" % (CFG_PATH_GS, err)
return False
if not CFG_PATH_PDFINFO:
if verbose:
print >> sys.stderr, "Conversion of PS or PDF to PDF/A is not possible because the pdfinfo executable is not available"
return False
if not os.path.exists(CFG_ICC_PATH):
if verbose:
print >> sys.stderr, "Conversion of PS or PDF to PDF/A is not possible because %s does not exists. Have you run make install-pdfa-helper-files?" % CFG_ICC_PATH
return False
return True
def can_perform_ocr(verbose=False):
"""Return True if it's possible to perform OCR."""
if not CFG_CAN_DO_OCR:
if verbose:
print >> sys.stderr, "OCR is not supported because either the pyPdf of ReportLab Python libraries are missing"
return False
if not CFG_PATH_OCROSCRIPT:
if verbose:
print >> sys.stderr, "OCR is not supported because the ocroscript executable is not available"
return False
if not CFG_PATH_PDFTOPPM:
if verbose:
print >> sys.stderr, "OCR is not supported because the pdftoppm executable is not available"
return False
return True
def guess_ocropus_produced_garbage(input_file, hocr_p):
"""Return True if the output produced by OCROpus in hocr format contains
only garbage instead of text. This is implemented via an heuristic:
if the most common length for sentences encoded in UTF-8 is 1 then
this is Garbage (tm).
"""
def _get_words_from_text():
ret = []
for row in open(input_file):
for word in row.strip().split(' '):
ret.append(word.strip())
return ret
def _get_words_from_hocr():
ret = []
hocr = extract_hocr(open(input_file).read())
for dummy, dummy, lines in hocr:
for dummy, line in lines:
for word in line.split():
ret.append(word.strip())
return ret
if hocr_p:
words = _get_words_from_hocr()
else:
words = _get_words_from_text()
#stats = {}
#most_common_len = 0
#most_common_how_many = 0
#for word in words:
#if word:
#word_length = len(word.decode('utf-8'))
#stats[word_length] = stats.get(word_length, 0) + 1
#if stats[word_length] > most_common_how_many:
#most_common_len = word_length
#most_common_how_many = stats[word_length]
goods = 0
bads = 0
for word in words:
for char in word.decode('utf-8'):
if (u'a' <= char <= u'z') or (u'A' <= char <= u'Z'):
goods += 1
else:
bads += 1
if bads > goods:
get_file_converter_logger().debug('OCROpus produced garbage')
return True
else:
return False
def guess_is_OCR_needed(input_file, ln='en'):
"""
Tries to see if enough text is retrievable from input_file.
Return True if OCR is needed, False if it's already
possible to retrieve information from the document.
"""
## FIXME: a way to understand if pdftotext has returned garbage
## shuould be found. E.g. 1.0*len(text)/len(zlib.compress(text)) < 2.1
## could be a good hint for garbage being found.
return True
def convert_file(input_file, output_file=None, output_format=None, **params):
"""
Convert files from one format to another.
@param input_file [string] the path to an existing file
@param output_file [string] the path to the desired ouput. (if None a
temporary file is generated)
@param output_format [string] the desired format (if None it is taken from
output_file)
@param params other paramaters to pass to the particular converter
@return [string] the final output_file
"""
from invenio.bibdocfile import decompose_file, normalize_format
if output_format is None:
if output_file is None:
raise ValueError("At least output_file or format should be specified.")
else:
output_ext = decompose_file(output_file, skip_version=True)[2]
else:
output_ext = normalize_format(output_format)
input_ext = decompose_file(input_file, skip_version=True)[2]
conversion_chain = can_convert(input_ext, output_ext)
if conversion_chain:
get_file_converter_logger().debug("Conversion chain from %s to %s: %s" % (input_ext, output_ext, conversion_chain))
current_input = input_file
for i, (converter, final_params) in enumerate(conversion_chain):
current_output = None
if i == (len(conversion_chain) - 1):
current_output = output_file
final_params = dict(final_params)
final_params.update(params)
try:
get_file_converter_logger().debug("Converting from %s to %s using %s with params %s" % (current_input, current_output, converter, final_params))
current_output = converter(current_input, current_output, **final_params)
get_file_converter_logger().debug("... current_output %s" % (current_output, ))
except InvenioWebSubmitFileConverterError, err:
raise InvenioWebSubmitFileConverterError("Error when converting from %s to %s: %s" % (input_file, output_ext, err))
except Exception, err:
register_exception(alert_admin=True)
raise InvenioWebSubmitFileConverterError("Unexpected error when converting from %s to %s (%s): %s" % (input_file, output_ext, type(err), err))
if current_input != input_file:
os.remove(current_input)
current_input = current_output
return current_output
else:
raise InvenioWebSubmitFileConverterError("It's impossible to convert from %s to %s" % (input_ext, output_ext))
try:
_UNOCONV_DAEMON
except NameError:
_UNOCONV_DAEMON = None
_UNOCONV_DAEMON_LOCK = threading.Lock()
def _register_unoconv():
global _UNOCONV_DAEMON
if CFG_OPENOFFICE_SERVER_HOST != 'localhost':
return
_UNOCONV_DAEMON_LOCK.acquire()
try:
if not _UNOCONV_DAEMON:
output_log = open(CFG_UNOCONV_LOG_PATH, 'a')
_UNOCONV_DAEMON = subprocess.Popen(['sudo', '-S', '-u', CFG_OPENOFFICE_USER, os.path.join(CFG_BINDIR, 'inveniounoconv'), '-vvv', '-s', CFG_OPENOFFICE_SERVER_HOST, '-p', str(CFG_OPENOFFICE_SERVER_PORT), '-l'], stdin=open('/dev/null', 'r'), stdout=output_log, stderr=output_log)
time.sleep(3)
finally:
_UNOCONV_DAEMON_LOCK.release()
def _unregister_unoconv():
global _UNOCONV_DAEMON
if CFG_OPENOFFICE_SERVER_HOST != 'localhost':
return
_UNOCONV_DAEMON_LOCK.acquire()
try:
if _UNOCONV_DAEMON:
output_log = open(CFG_UNOCONV_LOG_PATH, 'a')
subprocess.call(['sudo', '-S', '-u', CFG_OPENOFFICE_USER, os.path.join(CFG_BINDIR, 'inveniounoconv'), '-k', '-vvv'], stdin=open('/dev/null', 'r'), stdout=output_log, stderr=output_log)
time.sleep(1)
if _UNOCONV_DAEMON.poll():
try:
os.kill(_UNOCONV_DAEMON.pid, signal.SIGTERM)
except OSError:
pass
if _UNOCONV_DAEMON.poll():
try:
os.kill(_UNOCONV_DAEMON.pid, signal.SIGKILL)
except OSError:
pass
finally:
_UNOCONV_DAEMON_LOCK.release()
## NOTE: in case we switch back keeping LibreOffice running, uncomment
## the following line.
#atexit.register(_unregister_unoconv)
def unoconv(input_file, output_file=None, output_format='txt', pdfopt=True, **dummy):
"""Use unconv to convert among OpenOffice understood documents."""
from invenio.bibdocfile import normalize_format
## NOTE: in case we switch back keeping LibreOffice running, uncomment
## the following line.
#_register_unoconv()
input_file, output_file, dummy = prepare_io(input_file, output_file, output_format, need_working_dir=False)
if output_format == 'txt':
unoconv_format = 'text'
else:
unoconv_format = output_format
try:
try:
## We copy the input file and we make it available to OpenOffice
## with the user nobody
from invenio.bibdocfile import decompose_file
input_format = decompose_file(input_file, skip_version=True)[2]
fd, tmpinputfile = tempfile.mkstemp(dir=CFG_TMPDIR, suffix=normalize_format(input_format))
os.close(fd)
shutil.copy(input_file, tmpinputfile)
get_file_converter_logger().debug("Prepared input file %s" % tmpinputfile)
os.chmod(tmpinputfile, stat.S_IRUSR | stat.S_IWUSR | stat.S_IRGRP | stat.S_IROTH)
tmpoutputfile = tempfile.mktemp(dir=CFG_OPENOFFICE_TMPDIR, suffix=normalize_format(output_format))
get_file_converter_logger().debug("Prepared output file %s" % tmpoutputfile)
try:
execute_command(os.path.join(CFG_BINDIR, 'inveniounoconv'), '-vvv', '-s', CFG_OPENOFFICE_SERVER_HOST, '-p', str(CFG_OPENOFFICE_SERVER_PORT), '--output', tmpoutputfile, '-f', unoconv_format, tmpinputfile, sudo=CFG_OPENOFFICE_USER)
except:
register_exception(alert_admin=True)
raise
except InvenioWebSubmitFileConverterError:
## Ok maybe OpenOffice hanged. Let's better kill it and restarted!
if CFG_OPENOFFICE_SERVER_HOST != 'localhost':
## There's not that much that we can do. Let's bail out
if not os.path.exists(tmpoutputfile) or not os.path.getsize(tmpoutputfile):
raise
else:
## Sometimes OpenOffice crashes but we don't care :-)
## it still have created a nice file.
pass
else:
execute_command(os.path.join(CFG_BINDIR, 'inveniounoconv'), '-vvv', '-k', sudo=CFG_OPENOFFICE_USER)
## NOTE: in case we switch back keeping LibreOffice running, uncomment
## the following lines.
#_unregister_unoconv()
#_register_unoconv()
time.sleep(5)
try:
execute_command(os.path.join(CFG_BINDIR, 'inveniounoconv'), '-vvv', '-s', CFG_OPENOFFICE_SERVER_HOST, '-p', str(CFG_OPENOFFICE_SERVER_PORT), '--output', tmpoutputfile, '-f', unoconv_format, tmpinputfile, sudo=CFG_OPENOFFICE_USER)
except InvenioWebSubmitFileConverterError:
execute_command(os.path.join(CFG_BINDIR, 'inveniounoconv'), '-vvv', '-k', sudo=CFG_OPENOFFICE_USER)
if not os.path.exists(tmpoutputfile) or not os.path.getsize(tmpoutputfile):
raise InvenioWebSubmitFileConverterError('No output was generated by OpenOffice')
else:
## Sometimes OpenOffice crashes but we don't care :-)
## it still have created a nice file.
pass
except Exception, err:
raise InvenioWebSubmitFileConverterError(get_unoconv_installation_guideline(err))
output_format = normalize_format(output_format)
if output_format == '.pdf' and pdfopt:
pdf2pdfopt(tmpoutputfile, output_file)
else:
shutil.copy(tmpoutputfile, output_file)
execute_command(os.path.join(CFG_BINDIR, 'inveniounoconv'), '-r', tmpoutputfile, sudo=CFG_OPENOFFICE_USER)
os.remove(tmpinputfile)
return output_file
def get_unoconv_installation_guideline(err):
"""Return the Libre/OpenOffice installation guideline (embedding the
current error message).
"""
from invenio.bibtask import guess_apache_process_user
return wrap_text_in_a_box("""\
OpenOffice.org can't properly create files in the OpenOffice.org temporary
directory %(tmpdir)s, as the user %(nobody)s (as configured in
CFG_OPENOFFICE_USER invenio(-local).conf variable): %(err)s.
In your /etc/sudoers file, you should authorize the %(apache)s user to run
%(unoconv)s as %(nobody)s user as in:
%(apache)s ALL=(%(nobody)s) NOPASSWD: %(unoconv)s
You should then run the following commands:
$ sudo mkdir -p %(tmpdir)s
$ sudo chown -R %(nobody)s %(tmpdir)s
$ sudo chmod -R 755 %(tmpdir)s""" % {
'tmpdir' : CFG_OPENOFFICE_TMPDIR,
'nobody' : CFG_OPENOFFICE_USER,
'err' : err,
'apache' : CFG_BIBSCHED_PROCESS_USER or guess_apache_process_user(),
'python' : CFG_PATH_OPENOFFICE_PYTHON,
'unoconv' : os.path.join(CFG_BINDIR, 'inveniounoconv')
})
def can_unoconv(verbose=False):
"""
If OpenOffice.org integration is enabled, checks whether the system is
properly configured.
"""
if CFG_PATH_OPENOFFICE_PYTHON and CFG_OPENOFFICE_SERVER_HOST:
try:
test = os.path.join(CFG_TMPDIR, 'test.txt')
open(test, 'w').write('test')
output = unoconv(test, output_format='pdf')
output2 = convert_file(output, output_format='.txt')
if 'test' not in open(output2).read():
raise Exception("Coulnd't produce a valid PDF with Libre/OpenOffice.org")
os.remove(output2)
os.remove(output)
os.remove(test)
return True
except Exception, err:
if verbose:
print >> sys.stderr, get_unoconv_installation_guideline(err)
return False
else:
if verbose:
print >> sys.stderr, "Libre/OpenOffice.org integration not enabled"
return False
def any2djvu(input_file, output_file=None, resolution=400, ocr=True, input_format=5, **dummy):
"""
Transform input_file into a .djvu file.
@param input_file [string] the input file name
@param output_file [string] the output_file file name, None for temporary generated
@param resolution [int] the resolution of the output_file
@param input_format [int] [1-9]:
1 - DjVu Document (for verification or OCR)
2 - PS/PS.GZ/PDF Document (default)
3 - Photo/Picture/Icon
4 - Scanned Document - B&W - <200 dpi
5 - Scanned Document - B&W - 200-400 dpi
6 - Scanned Document - B&W - >400 dpi
7 - Scanned Document - Color/Mixed - <200 dpi
8 - Scanned Document - Color/Mixed - 200-400 dpi
9 - Scanned Document - Color/Mixed - >400 dpi
@return [string] output_file input_file.
raise InvenioWebSubmitFileConverterError in case of errors.
Note: due to the bottleneck of using a centralized server, it is very
slow and is not suitable for interactive usage (e.g. WebSubmit functions)
"""
from invenio.bibdocfile import decompose_file
input_file, output_file, working_dir = prepare_io(input_file, output_file, '.djvu')
ocr = ocr and "1" or "0"
## Any2djvu expect to find the file in the current directory.
execute_command(CFG_PATH_ANY2DJVU, '-a', '-c', '-r', resolution, '-o', ocr, '-f', input_format, os.path.basename(input_file), cwd=working_dir)
## Any2djvu doesn't let you choose the output_file file name.
djvu_output = os.path.join(working_dir, decompose_file(input_file)[1] + '.djvu')
shutil.move(djvu_output, output_file)
clean_working_dir(working_dir)
return output_file
_RE_FIND_TITLE = re.compile(r'^Title:\s*(.*?)\s*$')
def pdf2pdfx(input_file, output_file=None, title=None, pdfopt=False, profile="pdf/x-3:2002", **dummy):
"""
Transform any PDF into a PDF/X (see: <http://en.wikipedia.org/wiki/PDF/X>)
@param input_file [string] the input file name
@param output_file [string] the output_file file name, None for temporary generated
@param title [string] the title of the document. None for autodiscovery.
@param pdfopt [bool] whether to linearize the pdf, too.
@param profile: [string] the PDFX profile to use. Supports: 'pdf/x-1a:2001', 'pdf/x-1a:2003', 'pdf/x-3:2002'
@return [string] output_file input_file
raise InvenioWebSubmitFileConverterError in case of errors.
"""
input_file, output_file, working_dir = prepare_io(input_file, output_file, '.pdf')
if title is None:
stdout = execute_command(CFG_PATH_PDFINFO, input_file)
for line in stdout.split('\n'):
g = _RE_FIND_TITLE.match(line)
if g:
title = g.group(1)
break
if not title:
title = 'No title'
get_file_converter_logger().debug("Extracted title is %s" % title)
if os.path.exists(CFG_ICC_PATH):
shutil.copy(CFG_ICC_PATH, working_dir)
else:
raise InvenioWebSubmitFileConverterError('ERROR: ISOCoatedsb.icc file missing. Have you run "make install-pdfa-helper-files" as part of your Invenio deployment?')
pdfx_header = open(CFG_PDFX_DEF_PATH).read()
pdfx_header = pdfx_header.replace('<<<<TITLEMARKER>>>>', title)
icc_iso_profile_def = ''
if profile == 'pdf/x-1a:2001':
pdfx_version = 'PDF/X-1a:2001'
pdfx_conformance = 'PDF/X-1a:2001'
elif profile == 'pdf/x-1a:2003':
pdfx_version = 'PDF/X-1a:2003'
pdfx_conformance = 'PDF/X-1a:2003'
elif profile == 'pdf/x-3:2002':
icc_iso_profile_def = '/ICCProfile (ISOCoatedsb.icc)'
pdfx_version = 'PDF/X-3:2002'
pdfx_conformance = 'PDF/X-3:2002'
pdfx_header = pdfx_header.replace('<<<<ICCPROFILEDEF>>>>', icc_iso_profile_def)
pdfx_header = pdfx_header.replace('<<<<GTS_PDFXVersion>>>>', pdfx_version)
pdfx_header = pdfx_header.replace('<<<<GTS_PDFXConformance>>>>', pdfx_conformance)
outputpdf = os.path.join(working_dir, 'output_file.pdf')
open(os.path.join(working_dir, 'PDFX_def.ps'), 'w').write(pdfx_header)
if profile in ['pdf/x-3:2002']:
execute_command(CFG_PATH_GS, '-sProcessColorModel=DeviceCMYK', '-dPDFX', '-dBATCH', '-dNOPAUSE', '-dNOOUTERSAVE', '-dUseCIEColor', '-sDEVICE=pdfwrite', '-dAutoRotatePages=/None', '-sOutputFile=output_file.pdf', os.path.join(working_dir, 'PDFX_def.ps'), input_file, cwd=working_dir)
elif profile in ['pdf/x-1a:2001', 'pdf/x-1a:2003']:
execute_command(CFG_PATH_GS, '-sProcessColorModel=DeviceCMYK', '-dPDFX', '-dBATCH', '-dNOPAUSE', '-dNOOUTERSAVE', '-sColorConversionStrategy=CMYK', '-sDEVICE=pdfwrite', '-dAutoRotatePages=/None', '-sOutputFile=output_file.pdf', os.path.join(working_dir, 'PDFX_def.ps'), input_file, cwd=working_dir)
if pdfopt:
execute_command(CFG_PATH_PDFOPT, outputpdf, output_file)
else:
shutil.move(outputpdf, output_file)
clean_working_dir(working_dir)
return output_file
def pdf2pdfa(input_file, output_file=None, title=None, pdfopt=True, **dummy):
"""
Transform any PDF into a PDF/A (see: <http://www.pdfa.org/>)
@param input_file [string] the input file name
@param output_file [string] the output_file file name, None for temporary generated
@param title [string] the title of the document. None for autodiscovery.
@param pdfopt [bool] whether to linearize the pdf, too.
@return [string] output_file input_file
raise InvenioWebSubmitFileConverterError in case of errors.
"""
input_file, output_file, working_dir = prepare_io(input_file, output_file, '.pdf')
if title is None:
stdout = execute_command(CFG_PATH_PDFINFO, input_file)
for line in stdout.split('\n'):
g = _RE_FIND_TITLE.match(line)
if g:
title = g.group(1)
break
if not title:
title = 'No title'
get_file_converter_logger().debug("Extracted title is %s" % title)
if os.path.exists(CFG_ICC_PATH):
shutil.copy(CFG_ICC_PATH, working_dir)
else:
raise InvenioWebSubmitFileConverterError('ERROR: ISOCoatedsb.icc file missing. Have you run "make install-pdfa-helper-files" as part of your Invenio deployment?')
pdfa_header = open(CFG_PDFA_DEF_PATH).read()
pdfa_header = pdfa_header.replace('<<<<TITLEMARKER>>>>', title)
inputps = os.path.join(working_dir, 'input.ps')
outputpdf = os.path.join(working_dir, 'output_file.pdf')
open(os.path.join(working_dir, 'PDFA_def.ps'), 'w').write(pdfa_header)
execute_command(CFG_PATH_PDFTOPS, '-level3', input_file, inputps)
execute_command(CFG_PATH_GS, '-sProcessColorModel=DeviceCMYK', '-dPDFA', '-dBATCH', '-dNOPAUSE', '-dNOOUTERSAVE', '-dUseCIEColor', '-sDEVICE=pdfwrite', '-dAutoRotatePages=/None', '-sOutputFile=output_file.pdf', os.path.join(working_dir, 'PDFA_def.ps'), 'input.ps', cwd=working_dir)
if pdfopt:
execute_command(CFG_PATH_PDFOPT, outputpdf, output_file)
else:
shutil.move(outputpdf, output_file)
clean_working_dir(working_dir)
return output_file
def pdf2pdfopt(input_file, output_file=None, **dummy):
"""
Linearize the input PDF in order to improve the web-experience when
visualizing the document through the web.
@param input_file [string] the input input_file
@param output_file [string] the output_file file name, None for temporary generated
@return [string] output_file input_file
raise InvenioWebSubmitFileConverterError in case of errors.
"""
input_file, output_file, dummy = prepare_io(input_file, output_file, '.pdf', need_working_dir=False)
execute_command(CFG_PATH_PDFOPT, input_file, output_file)
return output_file
def pdf2ps(input_file, output_file=None, level=2, compress=True, **dummy):
"""
Convert from Pdf to Postscript.
"""
if compress:
suffix = '.ps.gz'
else:
suffix = '.ps'
input_file, output_file, working_dir = prepare_io(input_file, output_file, suffix)
execute_command(CFG_PATH_PDFTOPS, '-level%i' % level, input_file, os.path.join(working_dir, 'output.ps'))
if compress:
execute_command(CFG_PATH_GZIP, '-c', os.path.join(working_dir, 'output.ps'), filename_out=output_file)
else:
shutil.move(os.path.join(working_dir, 'output.ps'), output_file)
clean_working_dir(working_dir)
return output_file
def ps2pdfx(input_file, output_file=None, title=None, pdfopt=False, profile="pdf/x-3:2002", **dummy):
"""
Transform any PS into a PDF/X (see: <http://en.wikipedia.org/wiki/PDF/X>)
@param input_file [string] the input file name
@param output_file [string] the output_file file name, None for temporary generated
@param title [string] the title of the document. None for autodiscovery.
@param pdfopt [bool] whether to linearize the pdf, too.
@param profile: [string] the PDFX profile to use. Supports: 'pdf/x-1a:2001', 'pdf/x-1a:2003', 'pdf/x-3:2002'
@return [string] output_file input_file
raise InvenioWebSubmitFileConverterError in case of errors.
"""
input_file, output_file, working_dir = prepare_io(input_file, output_file, '.pdf')
if input_file.endswith('.gz'):
new_input_file = os.path.join(working_dir, 'input.ps')
execute_command(CFG_PATH_GUNZIP, '-c', input_file, filename_out=new_input_file)
input_file = new_input_file
if not title:
title = 'No title'
shutil.copy(CFG_ICC_PATH, working_dir)
pdfx_header = open(CFG_PDFX_DEF_PATH).read()
pdfx_header = pdfx_header.replace('<<<<TITLEMARKER>>>>', title)
icc_iso_profile_def = ''
if profile == 'pdf/x-1a:2001':
pdfx_version = 'PDF/X-1a:2001'
pdfx_conformance = 'PDF/X-1a:2001'
elif profile == 'pdf/x-1a:2003':
pdfx_version = 'PDF/X-1a:2003'
pdfx_conformance = 'PDF/X-1a:2003'
elif profile == 'pdf/x-3:2002':
icc_iso_profile_def = '/ICCProfile (ISOCoatedsb.icc)'
pdfx_version = 'PDF/X-3:2002'
pdfx_conformance = 'PDF/X-3:2002'
pdfx_header = pdfx_header.replace('<<<<ICCPROFILEDEF>>>>', icc_iso_profile_def)
pdfx_header = pdfx_header.replace('<<<<GTS_PDFXVersion>>>>', pdfx_version)
pdfx_header = pdfx_header.replace('<<<<TITLEMARKER>>>>', title)
outputpdf = os.path.join(working_dir, 'output_file.pdf')
open(os.path.join(working_dir, 'PDFX_def.ps'), 'w').write(pdfx_header)
if profile in ['pdf/x-3:2002']:
execute_command(CFG_PATH_GS, '-sProcessColorModel=DeviceCMYK', '-dPDFX', '-dBATCH', '-dNOPAUSE', '-dNOOUTERSAVE', '-dUseCIEColor', '-sDEVICE=pdfwrite', '-dAutoRotatePages=/None', '-sOutputFile=output_file.pdf', os.path.join(working_dir, 'PDFX_def.ps'), 'input.ps', cwd=working_dir)
elif profile in ['pdf/x-1a:2001', 'pdf/x-1a:2003']:
execute_command(CFG_PATH_GS, '-sProcessColorModel=DeviceCMYK', '-dPDFX', '-dBATCH', '-dNOPAUSE', '-dNOOUTERSAVE', '-sColorConversionStrategy=CMYK', '-dAutoRotatePages=/None', '-sDEVICE=pdfwrite', '-sOutputFile=output_file.pdf', os.path.join(working_dir, 'PDFX_def.ps'), 'input.ps', cwd=working_dir)
if pdfopt:
execute_command(CFG_PATH_PDFOPT, outputpdf, output_file)
else:
shutil.move(outputpdf, output_file)
clean_working_dir(working_dir)
return output_file
def ps2pdfa(input_file, output_file=None, title=None, pdfopt=True, **dummy):
"""
Transform any PS into a PDF/A (see: <http://www.pdfa.org/>)
@param input_file [string] the input file name
@param output_file [string] the output_file file name, None for temporary generated
@param title [string] the title of the document. None for autodiscovery.
@param pdfopt [bool] whether to linearize the pdf, too.
@return [string] output_file input_file
raise InvenioWebSubmitFileConverterError in case of errors.
"""
input_file, output_file, working_dir = prepare_io(input_file, output_file, '.pdf')
if input_file.endswith('.gz'):
new_input_file = os.path.join(working_dir, 'input.ps')
execute_command(CFG_PATH_GUNZIP, '-c', input_file, filename_out=new_input_file)
input_file = new_input_file
if not title:
title = 'No title'
shutil.copy(CFG_ICC_PATH, working_dir)
pdfa_header = open(CFG_PDFA_DEF_PATH).read()
pdfa_header = pdfa_header.replace('<<<<TITLEMARKER>>>>', title)
outputpdf = os.path.join(working_dir, 'output_file.pdf')
open(os.path.join(working_dir, 'PDFA_def.ps'), 'w').write(pdfa_header)
execute_command(CFG_PATH_GS, '-sProcessColorModel=DeviceCMYK', '-dPDFA', '-dBATCH', '-dNOPAUSE', '-dNOOUTERSAVE', '-dUseCIEColor', '-sDEVICE=pdfwrite', '-dAutoRotatePages=/None', '-sOutputFile=output_file.pdf', os.path.join(working_dir, 'PDFA_def.ps'), input_file, cwd=working_dir)
if pdfopt:
execute_command(CFG_PATH_PDFOPT, outputpdf, output_file)
else:
shutil.move(outputpdf, output_file)
clean_working_dir(working_dir)
return output_file
def ps2pdf(input_file, output_file=None, pdfopt=True, **dummy):
"""
Transform any PS into a PDF
@param input_file [string] the input file name
@param output_file [string] the output_file file name, None for temporary generated
@param pdfopt [bool] whether to linearize the pdf, too.
@return [string] output_file input_file
raise InvenioWebSubmitFileConverterError in case of errors.
"""
input_file, output_file, working_dir = prepare_io(input_file, output_file, '.pdf')
if input_file.endswith('.gz'):
new_input_file = os.path.join(working_dir, 'input.ps')
execute_command(CFG_PATH_GUNZIP, '-c', input_file, filename_out=new_input_file)
input_file = new_input_file
outputpdf = os.path.join(working_dir, 'output_file.pdf')
execute_command(CFG_PATH_PS2PDF, input_file, outputpdf, cwd=working_dir)
if pdfopt:
execute_command(CFG_PATH_PDFOPT, outputpdf, output_file)
else:
shutil.move(outputpdf, output_file)
clean_working_dir(working_dir)
return output_file
def pdf2pdfhocr(input_pdf, text_hocr, output_pdf, rotations=None, font='Courier', draft=False):
"""
Adds the OCRed text to the original pdf.
@param rotations: a list of angles by which pages should be rotated
"""
def _get_page_rotation(i):
if len(rotations) > i:
return rotations[i]
return 0
if rotations is None:
rotations = []
input_pdf, hocr_pdf, dummy = prepare_io(input_pdf, output_ext='.pdf', need_working_dir=False)
create_pdf(extract_hocr(open(text_hocr).read()), hocr_pdf, font, draft)
input1 = PdfFileReader(file(input_pdf, "rb"))
input2 = PdfFileReader(file(hocr_pdf, "rb"))
output = PdfFileWriter()
info = input1.getDocumentInfo()
if info:
infoDict = output._info.getObject()
infoDict.update(info)
for i in range(0, input1.getNumPages()):
orig_page = input1.getPage(i)
text_page = input2.getPage(i)
angle = _get_page_rotation(i)
if angle != 0:
print >> sys.stderr, "Rotating page %d by %d degrees." % (i, angle)
text_page = text_page.rotateClockwise(angle)
if draft:
below, above = orig_page, text_page
else:
below, above = text_page, orig_page
below.mergePage(above)
if angle != 0 and not draft:
print >> sys.stderr, "Rotating back page %d by %d degrees." % (i, angle)
below.rotateCounterClockwise(angle)
output.addPage(below)
outputStream = file(output_pdf, "wb")
output.write(outputStream)
outputStream.close()
os.remove(hocr_pdf)
return output_pdf
def pdf2hocr2pdf(input_file, output_file=None, ln='en', return_working_dir=False, extract_only_text=False, pdfopt=True, font='Courier', draft=False, **dummy):
"""
Return the text content in input_file.
@param ln is a two letter language code to give the OCR tool a hint.
@param return_working_dir if set to True, will return output_file path and the working_dir path, instead of deleting the working_dir. This is useful in case you need the intermediate images to build again a PDF.
"""
def _perform_rotate(working_dir, imagefile, angle):
"""Rotate imagefile of the corresponding angle. Creates a new file
with rotated.ppm."""
get_file_converter_logger().debug('Performing rotate on %s by %s degrees' % (imagefile, angle))
if not angle:
#execute_command('%s %s %s', CFG_PATH_CONVERT, os.path.join(working_dir, imagefile), os.path.join(working_dir, 'rotated-%s' % imagefile))
shutil.copy(os.path.join(working_dir, imagefile), os.path.join(working_dir, 'rotated.ppm'))
else:
execute_command(CFG_PATH_CONVERT, os.path.join(working_dir, imagefile), '-rotate', str(angle), '-depth', str(8), os.path.join(working_dir, 'rotated.ppm'))
return True
def _perform_deskew(working_dir):
"""Perform ocroscript deskew. Expect to work on rotated-imagefile.
Creates deskewed.ppm.
Return True if deskewing was fine."""
get_file_converter_logger().debug('Performing deskew')
try:
dummy, stderr = execute_command_with_stderr(CFG_PATH_OCROSCRIPT, os.path.join(CFG_ETCDIR, 'websubmit', 'file_converter_templates', 'deskew.lua'), os.path.join(working_dir, 'rotated.ppm'), os.path.join(working_dir, 'deskewed.ppm'))
if stderr.strip():
get_file_converter_logger().debug('Errors found during deskewing')
return False
else:
return True
except InvenioWebSubmitFileConverterError, err:
get_file_converter_logger().debug('Deskewing error: %s' % err)
return False
def _perform_recognize(working_dir):
"""Perform ocroscript recognize. Expect to work on deskewed.ppm.
Creates recognized.out Return True if recognizing was fine."""
get_file_converter_logger().debug('Performing recognize')
if extract_only_text:
output_mode = 'text'
else:
output_mode = 'hocr'
try:
dummy, stderr = execute_command_with_stderr(CFG_PATH_OCROSCRIPT, 'recognize', '--tesslanguage=%s' % ln, '--output-mode=%s' % output_mode, os.path.join(working_dir, 'deskewed.ppm'), filename_out=os.path.join(working_dir, 'recognize.out'))
if stderr.strip():
## There was some output on stderr
get_file_converter_logger().debug('Errors found in recognize.err')
return False
return not guess_ocropus_produced_garbage(os.path.join(working_dir, 'recognize.out'), not extract_only_text)
except InvenioWebSubmitFileConverterError, err:
get_file_converter_logger().debug('Recognizer error: %s' % err)
return False
def _perform_dummy_recognize(working_dir):
"""Return an empty text or an empty hocr referencing the image."""
get_file_converter_logger().debug('Performing dummy recognize')
if extract_only_text:
out = ''
else:
out = """<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
<html xmlns="http://www.w3.org/1999/xhtml"><head><meta content="ocr_line ocr_page" name="ocr-capabilities"/><meta content="en" name="ocr-langs"/><meta content="Latin" name="ocr-scripts"/><meta content="" name="ocr-microformats"/><title>OCR Output</title></head>
<body><div class="ocr_page" title="bbox 0 0 1 1; image deskewed.ppm">
</div></body></html>"""
open(os.path.join(working_dir, 'recognize.out'), 'w').write(out)
def _find_image_file(working_dir, imageprefix, page):
ret = '%s-%d.ppm' % (imageprefix, page)
if os.path.exists(os.path.join(working_dir, ret)):
return ret
ret = '%s-%02d.ppm' % (imageprefix, page)
if os.path.exists(os.path.join(working_dir, ret)):
return ret
ret = '%s-%03d.ppm' % (imageprefix, page)
if os.path.exists(os.path.join(working_dir, ret)):
return ret
ret = '%s-%04d.ppm' % (imageprefix, page)
if os.path.exists(os.path.join(working_dir, ret)):
return ret
ret = '%s-%05d.ppm' % (imageprefix, page)
if os.path.exists(os.path.join(working_dir, ret)):
return ret
ret = '%s-%06d.ppm' % (imageprefix, page)
if os.path.exists(os.path.join(working_dir, ret)):
return ret
## I guess we won't have documents with more than million pages
return None
def _ocr(tmp_output_file):
"""
Append to tmp_output_file the partial results of OCROpus recognize.
Return a list of rotations.
"""
page = 0
rotations = []
while True:
page += 1
get_file_converter_logger().debug('Page %d.' % page)
execute_command(CFG_PATH_PDFTOPPM, '-f', str(page), '-l', str(page), '-r', str(CFG_PPM_RESOLUTION), '-aa', 'yes', '-freetype', 'yes', input_file, os.path.join(working_dir, 'image'))
imagefile = _find_image_file(working_dir, 'image', page)
if imagefile == None:
break
for angle in (0, 180, 90, 270):
get_file_converter_logger().debug('Trying %d degrees...' % angle)
if _perform_rotate(working_dir, imagefile, angle) and _perform_deskew(working_dir) and _perform_recognize(working_dir):
rotations.append(angle)
break
else:
get_file_converter_logger().debug('Dummy recognize')
rotations.append(0)
_perform_dummy_recognize(working_dir)
open(tmp_output_file, 'a').write(open(os.path.join(working_dir, 'recognize.out')).read())
# clean
os.remove(os.path.join(working_dir, imagefile))
return rotations
if CFG_PATH_OCROSCRIPT:
if len(ln) == 2:
ln = CFG_TWO2THREE_LANG_CODES.get(ln, 'eng')
if extract_only_text:
input_file, output_file, working_dir = prepare_io(input_file, output_file, output_ext='.txt')
_ocr(output_file)
else:
input_file, tmp_output_hocr, working_dir = prepare_io(input_file, output_ext='.hocr')
rotations = _ocr(tmp_output_hocr)
if pdfopt:
input_file, tmp_output_pdf, dummy = prepare_io(input_file, output_ext='.pdf', need_working_dir=False)
tmp_output_pdf, output_file, dummy = prepare_io(tmp_output_pdf, output_file, output_ext='.pdf', need_working_dir=False)
pdf2pdfhocr(input_file, tmp_output_hocr, tmp_output_pdf, rotations=rotations, font=font, draft=draft)
pdf2pdfopt(tmp_output_pdf, output_file)
os.remove(tmp_output_pdf)
else:
input_file, output_file, dummy = prepare_io(input_file, output_file, output_ext='.pdf', need_working_dir=False)
pdf2pdfhocr(input_file, tmp_output_hocr, output_file, rotations=rotations, font=font, draft=draft)
clean_working_dir(working_dir)
return output_file
else:
raise InvenioWebSubmitFileConverterError("It's impossible to generate HOCR output from PDF. OCROpus is not available.")
def pdf2text(input_file, output_file=None, perform_ocr=True, ln='en', **dummy):
"""
Return the text content in input_file.
"""
input_file, output_file, dummy = prepare_io(input_file, output_file, '.txt', need_working_dir=False)
execute_command(CFG_PATH_PDFTOTEXT, '-enc', 'UTF-8', '-eol', 'unix', '-nopgbrk', input_file, output_file)
if perform_ocr and can_perform_ocr():
ocred_output = pdf2hocr2pdf(input_file, ln=ln, extract_only_text=True)
try:
output = open(output_file, 'a')
for row in open(ocred_output):
output.write(row)
output.close()
finally:
silent_remove(ocred_output)
return output_file
def txt2text(input_file, output_file=None, **dummy):
"""
Return the text content in input_file
"""
input_file, output_file, dummy = prepare_io(input_file, output_file, '.txt', need_working_dir=False)
shutil.copy(input_file, output_file)
return output_file
def html2text(input_file, output_file=None, **dummy):
"""
Return the text content of an HTML/XML file.
"""
class HTMLStripper(HTMLParser.HTMLParser):
def __init__(self, output_file):
HTMLParser.HTMLParser.__init__(self)
self.output_file = output_file
def handle_entityref(self, name):
if name in entitydefs:
self.output_file.write(entitydefs[name].decode('latin1').encode('utf8'))
def handle_data(self, data):
if data.strip():
self.output_file.write(_RE_CLEAN_SPACES.sub(' ', data))
def handle_charref(self, data):
try:
self.output_file.write(unichr(int(data)).encode('utf8'))
except:
pass
def close(self):
self.output_file.close()
HTMLParser.HTMLParser.close(self)
input_file, output_file, dummy = prepare_io(input_file, output_file, '.txt', need_working_dir=False)
html_stripper = HTMLStripper(open(output_file, 'w'))
for line in open(input_file):
html_stripper.feed(line)
html_stripper.close()
return output_file
def djvu2text(input_file, output_file=None, **dummy):
"""
Return the text content in input_file.
"""
input_file, output_file, dummy = prepare_io(input_file, output_file, '.txt', need_working_dir=False)
execute_command(CFG_PATH_DJVUTXT, input_file, output_file)
return output_file
def djvu2ps(input_file, output_file=None, level=2, compress=True, **dummy):
"""
Convert a djvu into a .ps[.gz]
"""
if compress:
input_file, output_file, working_dir = prepare_io(input_file, output_file, output_ext='.ps.gz')
try:
execute_command(CFG_PATH_DJVUPS, input_file, os.path.join(working_dir, 'output.ps'))
execute_command(CFG_PATH_GZIP, '-c', os.path.join(working_dir, 'output.ps'), filename_out=output_file)
finally:
clean_working_dir(working_dir)
else:
try:
input_file, output_file, working_dir = prepare_io(input_file, output_file, output_ext='.ps')
execute_command(CFG_PATH_DJVUPS, '-level=%i' % level, input_file, output_file)
finally:
clean_working_dir(working_dir)
return output_file
def tiff2pdf(input_file, output_file=None, pdfopt=True, pdfa=True, perform_ocr=True, **args):
"""
Convert a .tiff into a .pdf
"""
if pdfa or pdfopt or perform_ocr:
input_file, output_file, working_dir = prepare_io(input_file, output_file, '.pdf')
try:
partial_output = os.path.join(working_dir, 'output.pdf')
execute_command(CFG_PATH_TIFF2PDF, '-o', partial_output, input_file)
if perform_ocr:
pdf2hocr2pdf(partial_output, output_file, pdfopt=pdfopt, **args)
elif pdfa:
pdf2pdfa(partial_output, output_file, pdfopt=pdfopt, **args)
else:
pdfopt(partial_output, output_file)
finally:
clean_working_dir(working_dir)
else:
input_file, output_file, dummy = prepare_io(input_file, output_file, '.pdf', need_working_dir=False)
execute_command(CFG_PATH_TIFF2PDF, '-o', output_file, input_file)
return output_file
def pstotext(input_file, output_file=None, **dummy):
"""
Convert a .ps[.gz] into text.
"""
input_file, output_file, working_dir = prepare_io(input_file, output_file, '.txt')
try:
if input_file.endswith('.gz'):
new_input_file = os.path.join(working_dir, 'input.ps')
execute_command(CFG_PATH_GUNZIP, '-c', input_file, filename_out=new_input_file)
input_file = new_input_file
execute_command(CFG_PATH_PSTOTEXT, '-output', output_file, input_file)
finally:
clean_working_dir(working_dir)
return output_file
def gzip(input_file, output_file=None, **dummy):
"""
Compress a file.
"""
input_file, output_file, dummy = prepare_io(input_file, output_file, '.gz', need_working_dir=False)
execute_command(CFG_PATH_GZIP, '-c', input_file, filename_out=output_file)
return output_file
def gunzip(input_file, output_file=None, **dummy):
"""
Uncompress a file.
"""
from invenio.bibdocfile import decompose_file
input_ext = decompose_file(input_file, skip_version=True)[2]
if input_ext.endswith('.gz'):
input_ext = input_ext[:-len('.gz')]
else:
input_ext = None
input_file, output_file, dummy = prepare_io(input_file, output_file, input_ext, need_working_dir=False)
execute_command(CFG_PATH_GUNZIP, '-c', input_file, filename_out=output_file)
return output_file
def prepare_io(input_file, output_file=None, output_ext=None, need_working_dir=True):
"""Clean input_file and the output_file."""
from invenio.bibdocfile import decompose_file, normalize_format
output_ext = normalize_format(output_ext)
get_file_converter_logger().debug('Preparing IO for input=%s, output=%s, output_ext=%s' % (input_file, output_file, output_ext))
if output_ext is None:
if output_file is None:
output_ext = '.tmp'
else:
output_ext = decompose_file(output_file, skip_version=True)[2]
if output_file is None:
try:
(fd, output_file) = tempfile.mkstemp(suffix=output_ext, dir=CFG_TMPDIR)
os.close(fd)
except IOError, err:
raise InvenioWebSubmitFileConverterError("It's impossible to create a temporary file: %s" % err)
else:
output_file = os.path.abspath(output_file)
if os.path.exists(output_file):
os.remove(output_file)
if need_working_dir:
try:
working_dir = tempfile.mkdtemp(dir=CFG_TMPDIR, prefix='conversion')
except IOError, err:
raise InvenioWebSubmitFileConverterError("It's impossible to create a temporary directory: %s" % err)
input_ext = decompose_file(input_file, skip_version=True)[2]
new_input_file = os.path.join(working_dir, 'input' + input_ext)
shutil.copy(input_file, new_input_file)
input_file = new_input_file
else:
working_dir = None
input_file = os.path.abspath(input_file)
get_file_converter_logger().debug('IO prepared: input_file=%s, output_file=%s, working_dir=%s' % (input_file, output_file, working_dir))
return (input_file, output_file, working_dir)
def clean_working_dir(working_dir):
"""
Remove the working_dir.
"""
get_file_converter_logger().debug('Cleaning working_dir: %s' % working_dir)
shutil.rmtree(working_dir)
def execute_command(*args, **argd):
"""Wrapper to run_process_with_timeout."""
get_file_converter_logger().debug("Executing: %s" % (args, ))
args = [str(arg) for arg in args]
res, stdout, stderr = run_process_with_timeout(args, cwd=argd.get('cwd'), filename_out=argd.get('filename_out'), filename_err=argd.get('filename_err'), sudo=argd.get('sudo'))
get_file_converter_logger().debug('res: %s, stdout: %s, stderr: %s' % (res, stdout, stderr))
if res != 0:
message = "ERROR: Error in running %s\n stdout:\n%s\nstderr:\n%s\n" % (args, stdout, stderr)
get_file_converter_logger().error(message)
raise InvenioWebSubmitFileConverterError(message)
return stdout
def execute_command_with_stderr(*args, **argd):
"""Wrapper to run_process_with_timeout."""
get_file_converter_logger().debug("Executing: %s" % (args, ))
res, stdout, stderr = run_process_with_timeout(args, cwd=argd.get('cwd'), filename_out=argd.get('filename_out'), sudo=argd.get('sudo'))
if res != 0:
message = "ERROR: Error in running %s\n stdout:\n%s\nstderr:\n%s\n" % (args, stdout, stderr)
get_file_converter_logger().error(message)
raise InvenioWebSubmitFileConverterError(message)
return stdout, stderr
def silent_remove(path):
"""Remove without errors a path."""
if os.path.exists(path):
try:
os.remove(path)
except OSError:
pass
__CONVERSION_MAP = get_conversion_map()
def main_cli():
"""
main function when the library behaves as a normal CLI tool.
"""
from invenio.bibdocfile import normalize_format
parser = OptionParser()
parser.add_option("-c", "--convert", dest="input_name",
help="convert the specified FILE", metavar="FILE")
parser.add_option("-d", "--debug", dest="debug", action="store_true", help="Enable debug information")
parser.add_option("--special-pdf2hocr2pdf", dest="ocrize", help="convert the given scanned PDF into a PDF with OCRed text", metavar="FILE")
parser.add_option("-f", "--format", dest="output_format", help="the desired output format", metavar="FORMAT")
parser.add_option("-o", "--output", dest="output_name", help="the desired output FILE (if not specified a new file will be generated with the desired output format)")
parser.add_option("--without-pdfa", action="store_false", dest="pdf_a", default=True, help="don't force creation of PDF/A PDFs")
parser.add_option("--without-pdfopt", action="store_false", dest="pdfopt", default=True, help="don't force optimization of PDFs files")
parser.add_option("--without-ocr", action="store_false", dest="ocr", default=True, help="don't force OCR")
parser.add_option("--can-convert", dest="can_convert", help="display all the possible format that is possible to generate from the given format", metavar="FORMAT")
parser.add_option("--is-ocr-needed", dest="check_ocr_is_needed", help="check if OCR is needed for the FILE specified", metavar="FILE")
parser.add_option("-t", "--title", dest="title", help="specify the title (used when creating PDFs)", metavar="TITLE")
parser.add_option("-l", "--language", dest="ln", help="specify the language (used when performing OCR, e.g. en, it, fr...)", metavar="LN", default='en')
(options, dummy) = parser.parse_args()
if options.debug:
from logging import basicConfig
basicConfig()
get_file_converter_logger().setLevel(DEBUG)
if options.can_convert:
if options.can_convert:
input_format = normalize_format(options.can_convert)
if input_format == '.pdf':
if can_pdfopt(True):
print "PDF linearization supported"
else:
print "No PDF linearization support"
if can_pdfa(True):
print "PDF/A generation supported"
else:
print "No PDF/A generation support"
if can_perform_ocr(True):
print "OCR supported"
else:
print "OCR not supported"
print 'Can convert from "%s" to:' % input_format[1:],
for output_format in __CONVERSION_MAP:
if can_convert(input_format, output_format):
print '"%s"' % output_format[1:],
print
elif options.check_ocr_is_needed:
print "Checking if OCR is needed on %s..." % options.check_ocr_is_needed,
sys.stdout.flush()
if guess_is_OCR_needed(options.check_ocr_is_needed):
print "needed."
else:
print "not needed."
elif options.ocrize:
try:
output = pdf2hocr2pdf(options.ocrize, output_file=options.output_name, title=options.title, ln=options.ln)
print "Output stored in %s" % output
except InvenioWebSubmitFileConverterError, err:
print "ERROR: %s" % err
sys.exit(1)
else:
try:
if not options.output_name and not options.output_format:
parser.error("Either --format, --output should be specified")
if not options.input_name:
parser.error("An input should be specified!")
output = convert_file(options.input_name, output_file=options.output_name, output_format=options.output_format, pdfopt=options.pdfopt, pdfa=options.pdf_a, title=options.title, ln=options.ln)
print "Output stored in %s" % output
except InvenioWebSubmitFileConverterError, err:
print "ERROR: %s" % err
sys.exit(1)
if __name__ == "__main__":
main_cli()
diff --git a/invenio/legacy/websubmit/file_stamper.py b/invenio/legacy/websubmit/file_stamper.py
index 0edf30c44..de2f6b2c9 100644
--- a/invenio/legacy/websubmit/file_stamper.py
+++ b/invenio/legacy/websubmit/file_stamper.py
@@ -1,1601 +1,1601 @@
# -*- coding: utf-8 -*-
##
## This file is part of Invenio.
## Copyright (C) 2008, 2009, 2010, 2011 CERN.
##
## Invenio is free software; you can redistribute it and/or
## modify it under the terms of the GNU General Public License as
## published by the Free Software Foundation; either version 2 of the
## License, or (at your option) any later version.
##
## Invenio is distributed in the hope that it will be useful, but
## WITHOUT ANY WARRANTY; without even the implied warranty of
## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
## General Public License for more details.
##
## You should have received a copy of the GNU General Public License
## along with Invenio; if not, write to the Free Software Foundation, Inc.,
## 59 Temple Place, Suite 330, Boston, MA 02111-1307, USA.
"""This is websubmit_file_stamper.py
This tool is used to create a stamped version of a PDF file.
+ Python API:
Please see stamp_file().
+ CLI API:
$ python ~invenio/lib/python/invenio/websubmit_file_stamper.py \\
--latex-template=demo-stamp-left.tex \\
--latex-template-var='REPORTNUMBER=TEST-THESIS-2008-019' \\
--latex-template-var='DATE=27/02/2008' \\
--stamp='first' \\
--layer='background' \\
--output-file=testfile_stamped.pdf \\
testfile.pdf
"""
__revision__ = "$Id$"
import getopt, sys, re, os, time, shutil, tempfile
from invenio.config import \
CFG_PATH_PS2PDF, \
CFG_PATH_GFILE,\
CFG_PATH_PDFLATEX
-from invenio.errorlib import register_exception
+from invenio.ext.logging import register_exception
from invenio.config import CFG_TMPDIR
from invenio.config import CFG_ETCDIR
CFG_WEBSUBMIT_FILE_STAMPER_TEMPLATES_DIR = \
"%s/websubmit/file_stamper_templates" % CFG_ETCDIR
from invenio.config import CFG_PATH_PDFTK
from invenio.config import CFG_PATH_PDF2PS
from invenio.shellutils import escape_shell_arg
from invenio.legacy.websubmit.config import InvenioWebSubmitFileStamperError
## ***** Functions related to the creation of the PDF Stamp file: *****
re_latex_includegraphics = re.compile('\\includegraphics\[.*?\]\{(?P<image>.*?)\}')
def copy_template_files_to_stampdir(path_workingdir, latex_template):
"""In order to stamp a PDF fulltext file, LaTeX is used to create a
"stamp" page that is then merged with the fulltext PDF.
The stamp page is created in a temporary stamp "working directory".
This means that the LaTeX file and its image files must be copied
locally into this working directory. This function handles
copying them into the working directory.
Note: Copying of the LaTeX template and its included image files is
fairly naive and assumes that it is a very basic LaTeX file
consisting of a main file and any included graphics.
No other included file items will be copied.
Also note that the order of searching for the LaTeX file and
its associated graphics is as follows:
+ If the templatename provided has a path attached to it,
look here first;
+ If there is no path, look in the current dir.
+ If there is no template in the current dir, look in
~invenio/etc/websubmit/latex
+ Images included within the LaTeX file are sought in the
same way. Full path is used if provided; if not, current
dir and failing that ~invenio/etc/websubmit/latex.
@param path_workingdir: (string) - the working directory into which the
latex templates should be copied.
@param latex_template: (string) - the name of the LaTeX template to copy
to the working dir.
"""
## Get the "base name" of the latex template:
(template_path, template_name) = os.path.split(latex_template)
if template_path != "":
## A full path to the template was provided. We look for it there.
## Test to see whether the template is a real file and is readable:
if os.access("%s/%s" % (template_path, template_name), os.R_OK):
## Template is readable. Copy it locally to the working directory:
try:
shutil.copyfile("%s/%s" % (template_path, template_name), \
"%s/%s" % (path_workingdir, template_name))
except IOError:
## Unable to copy the LaTeX template file to the
## working directory:
msg = """Error: Unable to copy LaTeX file [%s/%s] to """ \
"""working directory for stamping [%s].""" \
% (template_path, template_name, path_workingdir)
raise InvenioWebSubmitFileStamperError(msg)
else:
## Unable to read the template file:
msg = """Error: Unable to copy LaTeX file [%s/%s] to """ \
"""working directory for stamping [%s]. (File not """ \
"""readable.)""" \
% (template_path, template_name, path_workingdir)
raise InvenioWebSubmitFileStamperError(msg)
else:
## There is no path to the template file.
## Look for it first in the current working directory, then in
## ~invenio/websubmit/latex;
## If not found in either, give up.
if os.access("%s" % (template_name), os.F_OK):
## Template has been found in the current working directory.
## Copy it locally to the stamping working directory:
try:
shutil.copyfile("%s" % (template_name), \
"%s/%s" % (path_workingdir, template_name))
except IOError:
## Unable to copy the LaTeX template file to the
## working stamping directory:
msg = """Error: Unable to copy LaTeX file [%s] to """ \
"""working directory for stamping [%s].""" \
% (template_name, path_workingdir)
raise InvenioWebSubmitFileStamperError(msg)
elif os.access("%s/%s" % (CFG_WEBSUBMIT_FILE_STAMPER_TEMPLATES_DIR, \
template_name), os.F_OK):
## The template has been found in WebSubmit's latex templates
## directory. Copy it locally to the stamping working directory:
try:
shutil.copyfile("%s/%s" \
% (CFG_WEBSUBMIT_FILE_STAMPER_TEMPLATES_DIR, \
template_name), \
"%s/%s" % (path_workingdir, template_name))
except IOError:
## Unable to copy the LaTeX template file to the
## working stamping directory:
msg = """Error: Unable to copy LaTeX file [%s/%s] to """ \
"""working directory for stamping [%s].""" \
% (CFG_WEBSUBMIT_FILE_STAMPER_TEMPLATES_DIR, \
template_name, path_workingdir)
raise InvenioWebSubmitFileStamperError(msg)
else:
## Now that the template has been found, set the "template
## path" to the WebSubmit latex templates directory:
template_path = CFG_WEBSUBMIT_FILE_STAMPER_TEMPLATES_DIR
else:
## Unable to locate the latex template.
msg = """Error: Unable to locate LaTeX file [%s].""" % template_name
raise InvenioWebSubmitFileStamperError(msg)
## Now that the LaTeX template file has been copied locally, extract
## the names of graphics files to be included in the resulting
## document and attempt to copy them to the working "stamp" directory:
template_desc = file(os.path.join(path_workingdir, template_name))
template_code = template_desc.read()
template_desc.close()
graphic_names = [match_obj.group('image') for match_obj in \
re_latex_includegraphics.finditer(template_code)]
## Copy each include-graphic extracted from the template
## into the working stamp directory:
for graphic in graphic_names:
## Remove any leading/trailing whitespace:
graphic = graphic.strip()
## Get the path and "base name" of the included graphic:
(graphic_path, graphic_name) = os.path.split(graphic)
## If there is a graphic_name to work with, try copy the file:
if graphic_name != "":
if graphic_path != "":
## The graphic is included from an absolute path:
if os.access("%s/%s" % (graphic_path, graphic_name), os.F_OK):
try:
shutil.copyfile("%s/%s" % (graphic_path, \
graphic_name), \
"%s/%s" % (path_workingdir, \
graphic_name))
except IOError:
## Unable to copy the LaTeX template file to
## the current directory
msg = """Unable to stamp file. There was """ \
"""a problem when trying to copy an image """ \
"""[%s/%s] included by the LaTeX template""" \
""" [%s].""" \
% (graphic_path, graphic_name, template_name)
raise InvenioWebSubmitFileStamperError(msg)
else:
msg = """Unable to locate an image [%s/%s] included""" \
""" by the LaTeX template file [%s].""" \
% (graphic_path, graphic_name, template_name)
raise InvenioWebSubmitFileStamperError(msg)
else:
## The graphic is included from a relative path. Try to obtain
## it from the same directory that the latex template file was
## taken from:
if template_path != "":
## Since template path is not empty, try to get the images
## from that location:
if os.access("%s/%s" % (template_path, graphic_name), \
os.F_OK):
try:
shutil.copyfile("%s/%s" % (template_path, \
graphic_name), \
"%s/%s" % (path_workingdir, \
graphic_name))
except IOError:
## Unable to copy the LaTeX template file to
## the current directory
msg = """Unable to stamp file. There was """ \
"""a problem when trying to copy images """ \
"""included by the LaTeX template."""
raise InvenioWebSubmitFileStamperError(msg)
else:
msg = """Unable to locate an image [%s] included""" \
""" by the LaTeX template file [%s].""" \
% (graphic_name, template_name)
raise InvenioWebSubmitFileStamperError(msg)
else:
## There is no template path. Try to get the images from
## current dir:
if os.access("%s" % graphic_name, os.F_OK):
try:
shutil.copyfile("%s" % graphic_name, \
"%s/%s" % (path_workingdir, \
graphic_name))
except IOError:
## Unable to copy the LaTeX template file to
## the current directory
msg = """Unable to stamp file. There was """ \
"""a problem when trying to copy images """ \
"""included by the LaTeX template."""
raise InvenioWebSubmitFileStamperError(msg)
else:
msg = """Unable to locate an image [%s] included""" \
""" by the LaTeX template file [%s].""" \
% (graphic_name, template_name)
raise InvenioWebSubmitFileStamperError(msg)
## Return the basename of the template so that it can be used to create
## the PDF stamp file:
return template_name
def create_final_latex_template(working_dirname, \
latex_template, \
latex_template_var):
"""In the working directory, create a copy of the the orginal
latex template with all the possible xxx--xxx in the template
replaced with the values identified by the keywords in the
latex_template_var dictionary.
@param working_dirname: (string) the working directory used for the
creation of the PDF stamp file.
@latex_template: (string) name of the latex template before it has
been parsed for replacements.
@latex_template_var: (dict) dictionnary whose keys are the string to
replace in latex_template and values are the replacement content
@return: name of the final latex template (after replacements)
"""
## Regexp used for finding a substitution line in the original template:
re_replacement = re.compile("""XXX-(.+?)-XXX""")
## Now, read-in the local copy of the template and parse it line-by-line,
## replacing any ocurrences of "XXX-SEARCHWORD-XXX" with either:
##
## (a) The value from the "replacements" dictionary;
## (b) Nothing if there was no search-term in the dictionary;
try:
## Open the original latex template for reading:
fpread = open("%s/%s" \
% (working_dirname, latex_template), "r")
## Open a file to contain the "parsed" latex template:
fpwrite = open("%s/create%s" \
% (working_dirname, latex_template), "w")
for line in fpread.readlines():
## For each line in the template file, test for
## substitution-markers:
replacement_markers = re_replacement.finditer(line)
## For each replacement-pattern detected in this line, process it:
for replacement_marker in replacement_markers:
## Found a match:
search_term = replacement_marker.group(1)
try:
## Get the replacement-term for this match
## from the dictionary
replacement_term = latex_template_var[search_term]
except KeyError:
## This search-term was not in the list of replacements
## to be made. It should be replaced with an empty string
## in the template:
line = line[0:replacement_marker.start()] + \
line[replacement_marker.end():]
else:
## Is the replacement term of the form date(XXXX)? If yes,
## take it literally and generate a pythonic date with it:
if replacement_term.find("date(") == 0 \
and replacement_term[-1] == ")":
## Take the date format string, use it to
## generate today's date
date_format = replacement_term[5:-1].strip('\'"')
try:
replacement = time.strftime(date_format, \
time.localtime())
except TypeError:
## Bad date format
replacement = ""
elif replacement_term.find("include(") == 0 \
and replacement_term[-1] == ")":
## Replacement term is a directive to include a file
## in the LaTeX template:
replacement = replacement_term[8:-1].strip('\'"')
else:
## Take replacement_term as a literal string
## to be inserted into the template at this point.
replacement = replacement_term
## Now substitute replacement into the line of the template:
line = line[0:replacement_marker.start()] + replacement \
+ line[replacement_marker.end():]
## Write the modified line to the new template:
fpwrite.write(line)
fpwrite.flush()
## Close up the template files and unlink the original:
fpread.close()
fpwrite.close()
except IOError:
msg = "Unable to read LaTeX template [%s/%s]. Cannot Stamp File" \
% (working_dirname, latex_template)
raise InvenioWebSubmitFileStamperError(msg)
## Return the name of the LaTeX template to be used:
return "create%s" % latex_template
def escape_latex_meta_characters(text):
"""The following are LaTeX meta characters that must be escaped with a
backslash:
# $ % & _ { }
This function therefore takes a string as input and does a simple
replace of these characters with escaped versions.
@param text: (string) - the string to be escaped.
@return: (string) - the string in which the LaTeX meta characters
have been escaped.
"""
text = text.replace('#', '\#')
text = text.replace('$', '\$')
text = text.replace('%', '\%')
text = text.replace('&', '\&')
text = text.replace('_', '\_')
text = text.replace('{', '\{')
text = text.replace('}', '\}')
return text
def escape_latex_template_vars(template_vars, strict=False):
"""Take a dictionary of LaTeX template variables/values and escape LaTeX
meta characters in some of them, or all of them depending upon whether
a call is made in strict mode (if strict is set, ALL values are
escaped.)
Operating in non-strict mode, the rules for escaping are as follows:
* If the string does not contain $ { or }, it must be escaped.
* If the string contains $, then there must be an even number of
these. If the count is even, do not escape. Else, escape.
* If the string contains { or }, it must be balanced with a
counterpart. That's to say that the count of "{" must match the
count of "}". If it does, do not escape. Else, escape.
@param template_vars: (dictionary) - the LaTeX template variables and
their values.
@param strict: (boolean) - a flag indicating whether or not to
operate in strict mode. Strict mode means that all values are
escaped regardless of whether or not they are considered to be
"good" LaTeX.
@return: (dictionary) - the LaTeX template variables with their
values escaped.
"""
## Make a copy of the LaTeX template variables so as not to corrupt
## the original:
working_template_vars = template_vars.copy()
##
## For each of the variables, escape LaTeX meta characteras in the
## value according to the strict flag:
varnames = working_template_vars.keys()
for varname in varnames:
escape_value = False
varval = working_template_vars[varname]
## We don't want to escape values that are date or include directives
## so unfortunately, this if is needed here:
if (varval.find("date(") == 0 or varval.find("include(") == 0) and \
varval[-1] == ")":
## This is a date or include directive:
continue
## Count the number of "$", "{" and "}" in it. If any are present,
## they should be balanced. If so, we will assume that they are
## wanted and that the LaTeX in the string is good.
## If, however, they are not balanced, we will assume that they are
## not valid LaTeX commands and that the string should be escaped.
## If they are not present at all, we assume that the string should
## be escaped.
if "$" in varval and varval.count("$") % 2 != 0:
## $ is present, but not in an even number. This string must
## be escaped:
escape_value = True
elif "{" in varval or "}" in varval:
## "{" and/or "}" is in the value string. Count each of them.
## If they are not matched one to one, consider the string to be
## in need of escaping:
if varval.count("{") != varval.count("}"):
escape_value = True
elif "$" not in varval and "{" not in varval and "}" not in varval:
## Since none of $ { } are in the string, it should be escaped
## to be safe:
escape_value = True
##
if strict:
## If operating in strict mode, escape everything whatever the
## results of the above tests:
escape_value = True
## If the value is to be escaped, go ahead and do so:
if escape_value:
escaped_varval = escape_latex_meta_characters(varval)
working_template_vars[varname] = escaped_varval
## Return the "escaped" LaTeX template variables:
return working_template_vars
def create_pdf_stamp(path_workingdir, latex_template, latex_template_var):
"""Retrieve the LaTeX (and associated) files and use them to create a
PDF "Stamp" file that can be merged with the main file.
The PDF stamp is created in a temporary working directory.
@param path_workingdir: (string) the path to the working directory
that should be used for creating the PDF stamp file.
@param latex_template: (string) - the name of the latex template
to be used for the creation of the stamp.
@param latex_template_var: (dictionary) - key-value pairs of strings
to be sought and replaced within the latex template.
@return: (string) - the name of the PDF stamp file.
"""
## Copy the LaTeX (and helper) files should be copied into the working dir:
template_name = copy_template_files_to_stampdir(path_workingdir, \
latex_template)
##
####
## Make a first attempt at the template PDF creation, escaping the variables
## in non-strict mode:
escaped_latex_template_var = escape_latex_template_vars(latex_template_var)
## Now that the latex template and its helper files have been retrieved,
## the Stamp PDF can be created.
final_template = create_final_latex_template(path_workingdir, \
template_name, \
escaped_latex_template_var)
##
## The name that will be givem to the PDF stamp file:
pdf_stamp_name = "%s.pdf" % os.path.splitext(final_template)[0]
## Now, build the Stamp PDF from the LaTeX template:
cmd_latex = """cd %(workingdir)s; %(path_pdflatex)s """ \
"""-interaction=batchmode """ \
"""%(template-path)s > /dev/null 2>&1""" \
% { 'template-path' : escape_shell_arg("%s/%s" \
% (path_workingdir, final_template)),
'workingdir' : path_workingdir,
'path_pdflatex' : CFG_PATH_PDFLATEX,
}
## Log the latex command
os.system("""echo %s > %s""" % (escape_shell_arg(cmd_latex), \
escape_shell_arg("%s/latex_cmd_first_try" \
% path_workingdir)))
## Run the latex command
errcode_latex = os.system("%s" % cmd_latex)
## Was the PDF stamp file successfully created without error?
if errcode_latex:
## No it wasn't. Perhaps there was a problem with some of the variable
## values that we substituted into the template?
## To be certain, try to create the PDF one more time - this time
## escaping all of the variable values.
##
## Unlink the PDF file if one was created on the previous attempt:
if os.access("%s/%s" % (path_workingdir, pdf_stamp_name), os.F_OK):
try:
os.unlink("%s/%s" % (path_workingdir, pdf_stamp_name))
except OSError:
## Unable to unlink the PDF file.
err_msg = "Unable to unlink the PDF stamp file [%s]. " \
"Stamping has failed." \
% pdf_stamp_name
register_exception(prefix=err_msg)
raise InvenioWebSubmitFileStamperError(err_msg)
##
## Unlink the LaTeX template file that was created with the previously
## escaped variables:
if os.access("%s/%s" % (path_workingdir, final_template), os.F_OK):
try:
os.unlink("%s/%s" % (path_workingdir, final_template))
except OSError:
## Unable to unlink the LaTeX file.
err_msg = "Unable to unlink the LaTeX stamp template file " \
"[%s]. Stamping has failed." \
% final_template
register_exception(prefix=err_msg)
raise InvenioWebSubmitFileStamperError(err_msg)
##
####
## Make another attempt at the template PDF creation, this time escaping
## the variables in strict mode:
escaped_latex_template_var = \
escape_latex_template_vars(latex_template_var, strict=True)
## Now that the latex template and its helper files have been retrieved,
## the Stamp PDF can be created.
final_template = create_final_latex_template(path_workingdir, \
template_name, \
escaped_latex_template_var)
##
## The name that will be givem to the PDF stamp file:
pdf_stamp_name = "%s.pdf" % os.path.splitext(final_template)[0]
## Now, build the Stamp PDF from the LaTeX template:
cmd_latex = """cd %(workingdir)s; %(path_pdflatex)s """ \
"""-interaction=batchmode """ \
"""%(template-path)s > /dev/null 2>&1""" \
% { 'template-path' : escape_shell_arg("%s/%s" \
% (path_workingdir, final_template)),
'workingdir' : path_workingdir,
'path_pdflatex' : CFG_PATH_PDFLATEX,
}
## Log the latex command
os.system("""echo %s > %s""" \
% (escape_shell_arg(cmd_latex), \
escape_shell_arg("%s/latex_cmd_second_try" \
% path_workingdir)))
## Run the latex command
errcode_latex = os.system("%s" % cmd_latex)
## Was the PDF stamp file successfully created?
if errcode_latex or \
not os.access("%s/%s" % (path_workingdir, pdf_stamp_name), os.F_OK):
## It was not possible to create the PDF stamp file. Fail.
msg = """Error: Unable to create a PDF stamp file."""
raise InvenioWebSubmitFileStamperError(msg)
## Return the name of the PDF stamp file:
return pdf_stamp_name
## ***** Functions related to the actual stamping of the file: *****
def apply_stamp_cover_page(path_workingdir, \
stamp_file_name, \
subject_file, \
output_file):
"""Carry out the stamping:
This function adds a cover-page to the file.
@param path_workingdir: (string) - the path to the working directory
that contains all of the files needed for the stamping process to be
carried out.
@param stamp_file_name: (string) - the name of the PDF stamp file (i.e.
the cover-page itself).
@param subject_file: (string) - the name of the file to be stamped.
@param output_file: (string) - the name of the final "stamped" file (i.e.
that with the cover page added) that will be written in the working
directory after the function has ended.
"""
## Build the stamping command:
cmd_add_cover_page = \
"""%(pdftk)s %(cover-page-path)s """ \
"""%(file-to-stamp-path)s """ \
"""cat output %(stamped-file-path)s """ \
"""2>/dev/null"""% \
{ 'pdftk' : CFG_PATH_PDFTK,
'cover-page-path' : escape_shell_arg("%s/%s" \
% (path_workingdir, \
stamp_file_name)),
'file-to-stamp-path' : escape_shell_arg("%s/%s" \
% (path_workingdir, \
subject_file)),
'stamped-file-path' : escape_shell_arg("%s/%s" \
% (path_workingdir, \
output_file)),
}
## Execute the stamping command:
errcode_add_cover_page = os.system(cmd_add_cover_page)
## Was the PDF merged with the coverpage without error?
if errcode_add_cover_page:
## There was a problem:
msg = "Error: Unable to stamp file [%s/%s]. There was an error when " \
"trying to add the cover page [%s/%s] to the file. Stamping " \
"has failed." \
% (path_workingdir, \
subject_file, \
path_workingdir, \
stamp_file_name)
raise InvenioWebSubmitFileStamperError(msg)
def apply_stamp_first_page(path_workingdir, \
stamp_file_name, \
subject_file, \
output_file, \
stamp_layer):
"""Carry out the stamping:
This function adds a stamp to the first page of the file.
@param path_workingdir: (string) - the path to the working directory
that contains all of the files needed for the stamping process to be
carried out.
@param stamp_file_name: (string) - the name of the PDF stamp file (i.e.
the stamp itself).
@param subject_file: (string) - the name of the file to be stamped.
@param output_file: (string) - the name of the final "stamped" file that
will be written in the working directory after the function has ended.
@param stamp_layer: (string) - the layer to consider when stamping
"""
## Since only the first page of the subject file is to be stamped,
## it's safest to separate this into its own temporary file, stamp
## it, then re-merge it with the remaining pages of the original
## document. In this way, the PDF to be stamped will probably be
## simpler (pages with complex figures and tables will probably be
## avoided) and the process will hopefully have a smaller chance of
## failure.
##
## First of all, separate the first page of the subject file into a
## temporary document:
##
## Name to be given to the first page of the document:
output_file_first_page = "p1-%s" % output_file
## Name to be given to the first page of the document once it has
## been stamped:
stamped_output_file_first_page = "stamped-%s" % output_file_first_page
## Perform the separation:
cmd_get_first_page = \
"%(pdftk)s A=%(file-to-stamp-path)s " \
"cat A1 output %(first-page-path)s " \
"2>/dev/null" \
% { 'pdftk' : CFG_PATH_PDFTK,
'file-to-stamp-path' : escape_shell_arg("%s/%s" % \
(path_workingdir, subject_file)),
'first-page-path' : escape_shell_arg("%s/%s" % \
(path_workingdir, \
output_file_first_page)),
}
errcode_get_first_page = os.system(cmd_get_first_page)
## Check that the separation was successful:
if errcode_get_first_page or \
not os.access("%s/%s" % (path_workingdir, \
output_file_first_page), os.F_OK):
## Separation was unsuccessful. Fail.
msg = "Error: Unable to stamp file [%s/%s] - it wasn't possible to " \
"separate the first page from the rest of the document. " \
"Stamping has failed." \
% (path_workingdir, subject_file)
raise InvenioWebSubmitFileStamperError(msg)
## Now stamp the first page:
cmd_stamp_first_page = \
"%(pdftk)s %(first-page-path)s %(stamp_layer)s " \
"%(stamp-file-path)s output " \
"%(stamped-first-page-path)s 2>/dev/null" \
% { 'pdftk' : CFG_PATH_PDFTK,
'first-page-path' : escape_shell_arg("%s/%s" % \
(path_workingdir, \
output_file_first_page)),
'stamp-file-path' : escape_shell_arg("%s/%s" % \
(path_workingdir, \
stamp_file_name)),
'stamped-first-page-path' : escape_shell_arg("%s/%s" % \
(path_workingdir, \
stamped_output_file_first_page)),
'stamp_layer' : stamp_layer == 'foreground' and 'stamp' or 'background'
}
errcode_stamp_first_page = os.system(cmd_stamp_first_page)
## Check that the first page was stamped successfully:
if errcode_stamp_first_page or \
not os.access("%s/%s" % (path_workingdir, \
stamped_output_file_first_page), os.F_OK):
## Unable to stamp the first page. Fail.
msg = "Error: Unable to stamp the file [%s/%s] - it was not possible " \
"to add the stamp to the first page. Stamping has failed." \
% (path_workingdir, subject_file)
raise InvenioWebSubmitFileStamperError(msg)
## Now that the first page has been stamped successfully, merge it with
## the remaining pages of the original file:
cmd_merge_stamped_and_original_files = \
"%(pdftk)s A=%(stamped-first-page-path)s " \
"B=%(original-file-path)s cat A1 B2-end output " \
"%(stamped-file-path)s 2>/dev/null" \
% { 'pdftk' : CFG_PATH_PDFTK,
'stamped-first-page-path' : escape_shell_arg("%s/%s" % \
(path_workingdir, \
stamped_output_file_first_page)),
'original-file-path' : escape_shell_arg("%s/%s" % \
(path_workingdir, \
subject_file)),
'stamped-file-path' : escape_shell_arg("%s/%s" % \
(path_workingdir, \
output_file)),
}
errcode_merge_stamped_and_original_files = \
os.system(cmd_merge_stamped_and_original_files)
## Check to see whether the command exited with an error:
if errcode_merge_stamped_and_original_files:
## There was an error when trying to merge the stamped first-page
## with pages 2 onwards of the original file. One possible
## explanation for this could be that the original file only had
## one page (in which case trying to reference pages 2-end would
## cause an error because they don't exist.
##
## Try to get the number of pages in the original PDF. If it only
## has 1 page, the stamped first page file can become the final
## stamped PDF. If it has more than 1 page, there really was an
## error when merging the stamped first page with the rest of the
## pages and stamping can be considered to have failed.
cmd_find_number_pages = \
"""%(pdftk)s %(original-file-path)s dump_data | """ \
"""grep NumberOfPages | """ \
"""sed -n 's/^NumberOfPages: \\([0-9]\\{1,\\}\\)$/\\1/p'""" \
% { 'pdftk' : CFG_PATH_PDFTK,
'original-file-path' : escape_shell_arg("%s/%s" % \
(path_workingdir, \
subject_file)),
}
fh_find_number_pages = os.popen(cmd_find_number_pages, "r")
match_number_pages = fh_find_number_pages.read()
errcode_find_number_pages = fh_find_number_pages.close()
if errcode_find_number_pages is not None:
## There was an error while checking for the number of pages.
## Fail.
msg = "Error: Unable to stamp file [%s/%s]. There was an error " \
"when attempting to merge the file containing the " \
"first page of the stamped file with the remaining " \
"pages of the original file and when an attempt was " \
"made to count the number of pages in the file, an " \
"error was also encountered. Stamping has failed." \
% (path_workingdir, subject_file)
raise InvenioWebSubmitFileStamperError(msg)
else:
try:
number_pages_in_subject_file = int(match_number_pages)
except ValueError:
## Unable to get the number of pages in the original file.
## Fail.
msg = "Error: Unable to stamp file [%s/%s]. There was an " \
"error when attempting to merge the file containing the" \
" first page of the stamped file with the remaining " \
"pages of the original file and when an attempt was " \
"made to count the number of pages in the file, an " \
"error was also encountered. Stamping has failed." \
% (path_workingdir, subject_file)
raise InvenioWebSubmitFileStamperError(msg)
else:
## Do we have just one page?
if number_pages_in_subject_file == 1:
## There was only one page in the subject file.
## copy the version that was stamped on the first page to
## the output_file filename:
try:
shutil.copyfile("%s/%s" \
% (path_workingdir, \
stamped_output_file_first_page), \
"%s/%s" \
% (path_workingdir, output_file))
except IOError:
## Unable to copy the file that was stamped on page 1
## Stamping has failed.
msg = "Error: It was not possible to copy the " \
"temporary file that was stamped on the " \
"first page [%s/%s] to the final stamped " \
"file [%s/%s]. Stamping has failed." \
% (path_workingdir, \
stamped_output_file_first_page, \
path_workingdir, \
output_file)
raise InvenioWebSubmitFileStamperError(msg)
else:
## Despite the fact that there was NOT only one page
## in the original file, there was an error when trying
## to merge it with the file that was stamped on the
## first page. Fail.
msg = "Error: Unable to stamp file [%s/%s]. There " \
"was an error when attempting to merge the " \
"file containing the first page of the " \
"stamped file with the remaining pages of the " \
"original file. Stamping has failed." \
% (path_workingdir, subject_file)
raise InvenioWebSubmitFileStamperError(msg)
elif not os.access("%s/%s" % (path_workingdir, output_file), os.F_OK):
## A final version of the stamped file was NOT created even though
## no error signal was encountered during the merging process.
## Fail.
msg = "Error: Unable to stamp file [%s/%s]. When attempting to " \
"merge the file containing the first page of the stamped " \
"file with the remaining pages of the original file, no " \
"final file was created. Stamping has failed." \
% (path_workingdir, subject_file)
raise InvenioWebSubmitFileStamperError(msg)
def apply_stamp_all_pages(path_workingdir, \
stamp_file_name, \
subject_file, \
output_file, \
stamp_layer):
"""Carry out the stamping:
This function adds a stamp to all pages of the file.
@param path_workingdir: (string) - the path to the working directory
that contains all of the files needed for the stamping process to be
carried out.
@param stamp_file_name: (string) - the name of the PDF stamp file (i.e.
the stamp itself).
@param subject_file: (string) - the name of the file to be stamped.
@param output_file: (string) - the name of the final "stamped" file that
will be written in the working directory after the function has ended.
@param stamp_layer: (string) - the layer to consider when stamping
"""
cmd_stamp_all_pages = \
"%(pdftk)s %(file-to-stamp-path)s %(stamp_layer)s " \
"%(stamp-file-path)s output " \
"%(stamped-file-all-pages-path)s 2>/dev/null" \
% { 'pdftk' : CFG_PATH_PDFTK,
'file-to-stamp-path' : escape_shell_arg("%s/%s" % \
(path_workingdir, \
subject_file)),
'stamp-file-path' : escape_shell_arg("%s/%s" % \
(path_workingdir, \
stamp_file_name)),
'stamped-file-all-pages-path' : escape_shell_arg("%s/%s" % \
(path_workingdir, \
output_file)),
'stamp_layer' : stamp_layer == 'foreground' and 'stamp' or 'background'
}
errcode_stamp_all_pages = os.system(cmd_stamp_all_pages)
if errcode_stamp_all_pages or \
not os.access("%s/%s" % (path_workingdir, output_file), os.F_OK):
## There was a problem stamping the document. Fail.
msg = "Error: Unable to stamp file [%s/%s]. Stamping has failed." \
% (path_workingdir, subject_file)
raise InvenioWebSubmitFileStamperError(msg)
def apply_stamp_to_file(path_workingdir,
stamp_type,
stamp_file_name,
subject_file,
output_file,
stamp_layer):
"""Given a stamp-file, the details of the type of stamp to apply, and the
details of the file to be stamped, coordinate the process of having
that stamp applied to the file.
@param path_workingdir: (string) - the path to the working directory
that contains all of the files needed for the stamping process to be
carried out.
@param stamp_type: (string) - the type of stamp to be applied to the
file.
@param stamp_file_name: (string) - the name of the PDF stamp file (i.e.
the stamp itself).
@param subject_file: (string) - the name of the file to be stamped.
@param output_file: (string) - the name of the final "stamped" file that
will be written in the working directory after the function has ended.
@param stamp_layer: (string) - the layer to consider when stamping the file.
@return: (string) - the name of the stamped file that has been created.
It will be found in the stamping working directory.
"""
## Stamping is performed on PDF files. We therefore need to test for the
## type of the subject file before attempting to stamp it:
##
## Initialize a variable to hold the "file type" of the subject file:
subject_filetype = ""
## Using the file command, test for the file-type of "subject_file":
cmd_gfile = "%(gfile)s %(file-to-stamp-path)s 2> /dev/null" \
% { 'gfile' : CFG_PATH_GFILE,
'file-to-stamp-path' : escape_shell_arg("%s/%s" % \
(path_workingdir, \
subject_file)),
}
## Execute the file command:
fh_gfile = os.popen(cmd_gfile, "r")
## Read the results string output by gfile:
output_gfile = fh_gfile.read()
## Close the pipe and capture its error code:
errcode_gfile = fh_gfile.close()
## If a result was obtained from gfile, scan it for an acceptable file-type:
if errcode_gfile is None and output_gfile != "":
output_gfile = output_gfile.lower()
if "pdf document" in output_gfile:
## This is a PDF file.
subject_filetype = "pdf"
elif "postscript" in output_gfile:
## This is a PostScript file.
subject_filetype = "ps"
## Unable to determine the file type using gfile.
## Try to determine the file type by examining its extension:
if subject_filetype == "":
## split the name of the file to be stamped on "." and take the last
## part of it. This should be the "extension", once cleaned from
## the possible "version" suffix (for eg. ';2' in "foo.pdf;2")
tmp_file_extension = subject_file.split(".")[-1]
tmp_file_extension = tmp_file_extension.split(';')[0]
if tmp_file_extension.lower() == "pdf":
subject_filetype = "pdf"
elif tmp_file_extension.lower() == "ps":
subject_filetype = "ps"
if subject_filetype not in ("ps", "pdf"):
## unable to process file.
msg = """Error: Input file [%s] is not PDF or PS. - unable to """ \
"""perform stamping.""" % subject_file
raise InvenioWebSubmitFileStamperError(msg)
if subject_filetype == "ps":
## Convert the subject file from PostScript to PDF:
if subject_file[-3:].lower() == ".ps":
## The name of the file to be stamped has a PostScript extension.
## Strip it and give the name of the PDF file to be created a
## PDF extension:
created_pdfname = "%s.pdf" % subject_file[:-3]
elif len(subject_file.split(".")) > 1:
## The file name has an extension - strip it and add a PDF
## extension:
raw_name = subject_file[:subject_file.rfind(".")]
if raw_name != "":
created_pdfname = "%s.pdf" % raw_name
else:
## It would appear that the file had no extension and that its
## name started with a period. Just use the original name with
## a .pdf suffix:
created_pdfname = "%s.pdf" % subject_file
else:
## No extension - use the original name with a .pdf suffix:
created_pdfname = "%s.pdf" % subject_file
## Build the distilling command:
cmd_distill = """%(distiller)s %(ps-file-path)s """ \
"""%(pdf-file-path)s 2>/dev/null""" % \
{ 'distiller' : CFG_PATH_PS2PDF,
'ps-file-path' : escape_shell_arg("%s/%s" % \
(path_workingdir, \
subject_file)),
'pdf-file-path' : escape_shell_arg("%s/%s" % \
(path_workingdir, \
created_pdfname)),
}
## Distill the PS into a PDF:
errcode_distill = os.system(cmd_distill)
## Test to see whether the PS was distilled into a PDF without error:
if errcode_distill or \
not os.access("%s/%s" % (path_workingdir, created_pdfname), os.F_OK):
## The PDF file was not correctly created in the working directory.
## Unable to continue with the stamping process.
msg = "Error: Unable to correctly convert PostScript file [%s] to" \
" PDF. Cannot stamp file." % subject_file
raise InvenioWebSubmitFileStamperError(msg)
## Now assign the name of the created PDF file to subject_file:
subject_file = created_pdfname
## Treat the name of "output_file":
if output_file in (None, ""):
## there is no value for outputfile. outfile should be given the same
## name as subject_file, but with "stamped-" appended to the front.
## E.g.: subject_file: test.pdf; outfile: stamped-test.pdf
output_file = "stamped-%s" % subject_file
else:
## If output_file has an extension, strip it and add a PDF extension:
if len(output_file.split(".")) > 1:
## The file name has an extension - strip it and add a PDF
## extension:
raw_name = output_file[:output_file.rfind(".")]
if raw_name != "":
output_file = "%s.pdf" % raw_name
else:
## It would appear that the file had no extension and that its
## name started with a period. Just use the original name with
## a .pdf suffix:
output_file = "%s.pdf" % output_file
else:
## No extension - use the original name with a .pdf suffix:
output_file = "%s.pdf" % output_file
if stamp_type == 'coverpage':
## The stamp to be applied to the document is in fact a "cover page".
## This means that the entire PDF "stamp" that was created from the
## LaTeX template is to be appended to the subject file as the first
## page (i.e. a cover-page).
apply_stamp_cover_page(path_workingdir, \
stamp_file_name, \
subject_file, \
output_file)
elif stamp_type == "first":
apply_stamp_first_page(path_workingdir, \
stamp_file_name, \
subject_file, \
output_file, \
stamp_layer)
elif stamp_type == 'all':
## The stamp to be applied to the document is a simple that that should
## be applied to ALL pages of the document (i.e. merged onto each page.)
apply_stamp_all_pages(path_workingdir, \
stamp_file_name, \
subject_file, \
output_file, \
stamp_layer)
else:
## Unexpcted stamping mode.
msg = """Error: Unexpected stamping mode [%s]. Stamping has failed.""" \
% stamp_type
raise InvenioWebSubmitFileStamperError(msg)
## Finally, if the original subject file was a PS, convert the stamped
## PDF back to PS:
if subject_filetype == "ps":
if output_file[-4:].lower() == ".pdf":
## The name of the file to be stamped has a PDF extension.
## Strip it and give the name of the PDF file to be created a
## PDF extension:
stamped_psname = "%s.ps" % output_file[:-4]
elif len(output_file.split(".")) > 1:
## The file name has an extension - strip it and add a PDF
## extension:
raw_name = output_file[:output_file.rfind(".")]
if raw_name != "":
stamped_psname = "%s.ps" % raw_name
else:
## It would appear that the file had no extension and that its
## name started with a period. Just use the original name with
## a .pdf suffix:
stamped_psname = "%s.ps" % output_file
else:
## No extension - use the original name with a .pdf suffix:
stamped_psname = "%s.ps" % output_file
## Build the conversion command:
cmd_pdf2ps = "%s %s %s 2>/dev/null" % (CFG_PATH_PDF2PS,
escape_shell_arg("%s/%s" % \
(path_workingdir, \
output_file)),
escape_shell_arg("%s/%s" % \
(path_workingdir, \
stamped_psname)))
errcode_pdf2ps = os.system(cmd_pdf2ps)
## Check to see that the command executed OK:
if not errcode_pdf2ps and \
os.access("%s/%s" % (path_workingdir, stamped_psname), os.F_OK):
## No problem converting the PDF to PS.
output_file = stamped_psname
## Return the name of the "stamped" file:
return output_file
def copy_subject_file_to_working_directory(path_workingdir, input_file):
"""Attempt to copy the subject file (that which is to be stamped) to the
current working directory, returning the name of the subject file if
successful.
@param path_workingdir: (string) - the path to the working directory
for the current stamping session.
@param input_file: (string) - the path to the subject file (that which
is to be stamped).
@return: (string) - the name of the subject file, which has been copied
to the current working directory.
@Exceptions raised: (InvenioWebSubmitFileStamperError) - upon failure
to successfully copy the subject file to the working directory.
"""
## Divide the input filename into path and basename:
(dummy, name_input_file) = os.path.split(input_file)
if name_input_file == "":
## The input file is just a path - not a valid filename. Fail.
msg = """Error: unable to determine the name of the file to be """ \
"""stamped."""
raise InvenioWebSubmitFileStamperError(msg)
## Test to see whether the stamping subject file is a real file and
## is readable:
if os.access("%s" % input_file, os.R_OK):
## File is readable. Copy it locally to the working directory:
try:
shutil.copyfile("%s" % input_file, \
"%s/%s" % (path_workingdir, name_input_file))
except IOError:
## Unable to copy the stamping subject file to the
## working directory. Fail.
msg = """Error: Unable to copy stamping file [%s] to """ \
"""working directory for stamping [%s].""" \
% (input_file, path_workingdir)
raise InvenioWebSubmitFileStamperError(msg)
else:
## Unable to read the subject file. Fail.
msg = """Error: Unable to copy stamping file [%s] to """ \
"""working directory [%s]. (File not readable.)""" \
% (input_file, path_workingdir)
raise InvenioWebSubmitFileStamperError(msg)
## Now that the stamping file has been successfully copied to the working
## directory, return its base name:
return name_input_file
def create_working_directory():
"""Create a "working directory" in which the files related to the stamping
process can be stored, and return the full path to it.
The working directory will be created in ~invenio/var/tmp.
If it cannot be created there, an exception
(InvenioWebSubmitFileStamperError) will be raised.
The working directory will have the prefix
"websubmit_file_stamper_", and could be given a name something like:
- websubmit_file_stamper_Tzs3St
@return: (string) - the full path to the working directory.
@Exceptions raised: InvenioWebSubmitFileStamperError.
"""
## Create the temporary directory in which to place the LaTeX template
## and its helper files in ~invenio/var/tmp:
path_workingdir = None
try:
path_workingdir = tempfile.mkdtemp(prefix="websubmit_file_stamper_", \
dir="%s" % CFG_TMPDIR)
except OSError, err:
## Unable to create the temporary directory in ~invenio/var/tmp
msg = "Error: Unable to create a temporary working directory in " \
"which to carry out the stamping process. An attempt was made " \
"to create the directory in [%s]; the error encountered was " \
"<%s>. Stamping has failed." % (CFG_TMPDIR, str(err))
raise InvenioWebSubmitFileStamperError(msg)
## return the path to the working-directory:
return path_workingdir
## ***** Functions Specific to CLI calling of the program: *****
def usage(wmsg="", err_code=0):
"""Print a "usage" message (along with an optional additional warning/error
message) to stderr and exit with a given error code.
@param wmsg: (string) - some kind of warning message for the user.
@param err_code: (integer) - an error code to be passed to sys.exit,
which is called after the usage message has been printed.
@return: None.
"""
## Wash the warning message:
if wmsg != "":
wmsg = wmsg.strip() + "\n"
## The usage message:
msg = """ Usage:
python ~invenio/lib/python/invenio/websubmit_file_stamper.py \\
[options] input-file.pdf
websubmit_file_stamper.py is used to add a "stamp" to a PDF file.
A LaTeX template is used to create the stamp and this stamp is then
concatenated with the original PDF file.
The stamp can take the form of either a separate "cover page" that is
appended to the document; or a "mark" that is applied somewhere either
on the document's first page or on all of its pages.
Options:
-h, --help Print this help.
-V, --version Print version information.
-v, --verbose=LEVEL Verbose level (0=min, 1=default, 9=max).
[NOT IMPLEMENTED]
-t, --latex-template=PATH
Path to the LaTeX template file that should be used
for the creation of the PDF stamp. (Note, if it's
just a basename, it will be sought first in the
current working directory, and then in the invenio
file-stamper templates directory; If there is a
qualifying path to the template name, it will be
sought only in that location);
-c, --latex-template-var='VARNAME=VALUE'
A variable that should be replaced in the LaTeX
template file with its corresponding value. Of the
following format:
VARNAME=VALUE
This option is repeatable - one for each template
variable;
-s, --stamp=STAMP-TYPE
The type of stamp to be applied to the subject
file. Must be one of 3 values:
+ "first" - stamp only the first page;
+ "all" - stamp all pages;
+ "coverpage" - add a cover page to the
document;
The default value is "first";
-l, --layer=LAYER
The position of the stamp. Should be one of:
+ "background" (invisible if original file has
a white -not transparent- background layer)
+ "foreground" (on top of the stamped file.
If the stamp does not have a transparent
background, will hide all of the document
layers)
The default value is "background"
-o, --output-file=XYZ
The optional name to be given to the finished
(stamped) file IN THE WORKING DIRECTORY
(Specify a file name, including
extension, not a path). If this is
omitted, the stamped file will be given
the same name as the input file, but will
be prefixed by"stamped-";
Example:
python ~invenio/lib/python/invenio/websubmit_file_stamper.py \\
--latex-template=demo-stamp-left.tex \\
--latex-template-var='REPORTNUMBER=TEST-THESIS-2008-019' \\
--latex-template-var='DATE=27/02/2008' \\
--stamp='first' \\
--layer='background' \\
--output-file=testfile_stamped.pdf \\
testfile.pdf
"""
sys.stderr.write(wmsg + msg)
sys.exit(err_code)
def get_cli_options():
"""From the options and arguments supplied by the user via the CLI,
build a dictionary of options to drive websubmit-file-stamper.
For reference, the CLI options available to the user are as follows:
-h, --help -> Display help/usage message and exit;
-V, --version -> Display version information and exit;
-v, --verbose= -> Set verbosity level (0=min, 1=default,
9=max).
-t, --latex-template= -> Path to the LaTeX template file that
should be used for the creation of the
PDF stamp. (Note, if it's just a
basename, it will be sought first in the
current working directory, and then in
the invenio file-stamper templates
directory; If there is a qualifying
path to the template name, it will be
sought only in that location);
-c, --latex-template-var= -> A variable that should be
replaced in the LaTeX template file
with its corresponding value. Of the
following format:
varname=value
This option is repeatable - one for each
template variable;
-s, --stamp= The type of stamp to be applied to the
subject file. Must be one of 3 values:
+ "first" - stamp only the first page;
+ "all" - stamp all pages;
+ "coverpage" - add a cover page to the
document;
The default value is "first";
-l, --layer= -> The position of the stamp. Should be one
of:
+ "background" (invisible if original
file has a white -not transparent-
background layer)
+ "foreground" (on top of the stamped
file. If the stamp does not have a
transparent background, will hide all
of the document layers).
The default value is "background"
-o, --output-file= -> The optional name to be given to the
finished (stamped) file IN THE
WORKING DIRECTORY (Specify a
name, not a path). If this is
omitted, the stamped file will
be given the same name as the
input file, but will be
prefixed by"stamped-";
@return: (dictionary) of input options and flags, set as
appropriate. The dictionary has the following structure:
+ latex-template: (string) - the path to the LaTeX template to be
used for the creation of the stamp itself;
+ latex-template-var: (dictionary) - This dictionary contains
variables that should be sought in the LaTeX template file, and
the values that should be substituted in their place. E.g.:
{ "TITLE" : "An Introduction to Invenio" }
+ input-file: (string) - the path to the input file (i.e. that
which is to be stamped;
+ output-file: (string) - the name of the stamped file that should
be created by the program. This is optional - if not provided,
a default name will be applied to a file instead;
+ stamp: (string) - the type of stamp that is to be applied to the
input file. It must take one of 3 values:
- "first": Stamp only the first page of the document;
- "all": Apply the stamp to all pages of the document;
- "coverpage": Add a "cover page" to the document;
+ layer: (string) - the position of the stamp in the layers of the
file. Will be one of the following values:
- "background": stamp applied to the background layer;
- "foreground": stamp applied to the foreground layer;
+ verbosity: (integer) - the verbosity level under which the program
is to run;
So, an example of the returned dictionary would be something like:
{ 'latex-template' : "demo-stamp-left.tex",
'latex-template-var' : { "REPORTNUMBER" : "TEST-2008-001",
"DATE" : "15/02/2008",
},
'input-file' : "test-doc.pdf",
'output-file' : "",
'stamp' : "first",
'layer' : "background",
'verbosity' : 0,
}
"""
## dictionary of important values relating to cli call of program:
options = { 'latex-template' : "",
'latex-template-var' : {},
'input-file' : "",
'output-file' : "",
'stamp' : "first",
'layer' : "background",
'verbosity' : 0,
}
## Get the options and arguments provided by the user via the CLI:
try:
myoptions, myargs = getopt.getopt(sys.argv[1:], "hVv:t:c:s:l:o:", \
["help",
"version",
"verbosity=",
"latex-template=",
"latex-template-var=",
"stamp=",
"layer=",
"output-file="])
except getopt.GetoptError, err:
## Invalid option provided - usage message
usage(wmsg="Error: %(msg)s." % { 'msg' : str(err) })
## Get the input file from the arguments list (it should be the
## first argument):
if len(myargs) > 0:
options["input-file"] = myargs[0]
## Extract the details of the options:
for opt in myoptions:
if opt[0] in ("-V","--version"):
## version message and exit
sys.stdout.write("%s\n" % __revision__)
sys.stdout.flush()
sys.exit(0)
elif opt[0] in ("-h","--help"):
## help message and exit
usage()
elif opt[0] in ("-v", "--verbosity"):
## Get verbosity level:
if not opt[1].isdigit():
options['verbosity'] = 0
elif int(opt[1]) not in xrange(0, 10):
options['verbosity'] = 0
else:
options['verbosity'] = int(opt[1])
elif opt[0] in ("-o", "--output-file"):
## Get the name of the "output file" that is to be created after
## stamping (i.e. the "stamped file"):
options["output-file"] = opt[1]
if '/' in options["output-file"]:
# probably user specified a file path, which is not
# supported
print "Warning: you seem to have specifed a path for option '--output-file'."
print "Only a file name can be specified. Stamping might fail."
elif opt[0] in ("-t", "--latex-template"):
## Get the path to the latex template to be used for the creation
## of the stamp file:
options["latex-template"] = opt[1]
elif opt[0] in ("-m", "--stamp"):
## The type of stamp that is to be applied to the document:
## Options are coverpage, first, all:
if str(opt[1].lower()) in ("coverpage", "first", "all"):
## Valid stamp type, accept it;
options["stamp"] = str(opt[1]).lower()
else:
## Invalid stamp type. Print usage message and quit.
usage(wmsg="Chosen stamp type '%s' is not valid" % opt[1])
elif opt[0] in ("-l", "--layer"):
## The layer to consider for the stamp
if str(opt[1].lower()) in ("background", "foreground"):
## Valid layer type, accept it;
options["layer"] = str(opt[1]).lower()
else:
## Invalid layer type. Print usage message and quit.
usage(wmsg="Chosen layer type '%s' is not valid" % opt[1])
elif opt[0] in ("-c", "--latex-template-var"):
## This is a variable to be replaced in the LaTeX template.
## It should take the following form:
## varname=value
## We can therefore split it on the first "=" sign - anything to
## left will be considered to be the name of the variable to search
## for; anything to the right will be considered as the value that
## should replace the variable in the LaTeX template.
## Note: If the user supplies the same variable name more than once,
## the latter occurrence will be kept and the previous value will be
## overwritten.
## Note also that if the variable string does not take the
## expected format a=b, it will be ignored.
##
## Get the complete string:
varstring = str(opt[1])
## Split into 2 string based on the first "=":
split_varstring = varstring.split("=", 1)
if len(split_varstring) == 2:
## Split based on equals sign was successful:
if split_varstring[0] != "":
## The variable name was not empty - keep it:
options["latex-template-var"]["%s" % split_varstring[0]] = \
"%s" % split_varstring[1]
## Return the input options:
return options
def stamp_file(options):
"""The driver for the stamping process. This is effectively the function
that is responsible for coordinating the stamping of a file.
@param options: (dictionary) - a dictionary of options that are required
by the function in order to carry out the stamping process.
The dictionary must have the following structure:
+ latex-template: (string) - the path to the LaTeX template to be
used for the creation of the stamp itself;
+ latex-template-var: (dictionary) - This dictionary contains
variables that should be sought in the LaTeX template file, and
the values that should be substituted in their place. E.g.:
{ "TITLE" : "An Introduction to Invenio" }
+ input-file: (string) - the path to the input file (i.e. that
which is to be stamped;
+ output-file: (string) - the name of the stamped file that
should be created by the program IN THE WORKING
DIRECTORY (Specify a name, not a path).
This is optional - if not provided, a default name will
be applied to a file instead;
+ stamp: (string) - the type of stamp that is to be applied to the
input file. It must take one of 3 values:
- "first": Stamp only the first page of the document;
- "all": Apply the stamp to all pages of the document;
- "coverpage": Add a "cover page" to the document;
+ layer: (string) - the layer to consider to stamp the file. Can be
one of the following values:
- "background": stamp the background layer;
- "foreground": stamp the foreground layer;
+ verbosity: (integer) - the verbosity level under which the program
is to run;
So, an example of the returned dictionary would be something like:
{ 'latex-template' : "demo-stamp-left.tex",
'latex-template-var' : { "REPORTNUMBER" : "TEST-2008-001",
"DATE" : "15/02/2008",
},
'input-file' : "test-doc.pdf",
'output-file' : "",
'stamp' : "first",
'layer' : "background"
'verbosity' : 0,
}
@return: (tuple) - consisting of two strings:
1. the path to the working directory in which all stamping-related
files are stored;
2. The name of the "stamped" file;
@Exceptions raised: (InvenioWebSubmitFileStamperError) exceptions may
be raised or propagated by this function when the stamping process
fails for one reason or another.
"""
## SANITY CHECKS:
## Does the options dictionary contain all mandatory keys?
##
## A list of the names of the expected options:
mandatory_option_names = ["latex-template", \
"latex-template-var", \
"input-file", \
"output-file"]
optional_option_names_and_defaults = {"layer": "background", \
"verbosity": 0,
"stamp": "first"}
## Are we missing some mandatory parameters?
received_option_names = options.keys()
for mandatory_option_name in mandatory_option_names:
if not mandatory_option_name in received_option_names:
msg = """Error: Mandatory parameter %s is missing""" % mandatory_option_name
raise InvenioWebSubmitFileStamperError(msg)
## Are we getting some unknown option?
for received_option_name in received_option_names:
if not received_option_name in mandatory_option_names and \
not received_option_name in optional_option_names_and_defaults.keys():
## Error: the dictionary of options had an illegal structure:
msg = """Error: Option %s is not a recognized parameter""" % received_option_name
raise InvenioWebSubmitFileStamperError(msg)
## Set default options when not specified
for opt, value in optional_option_names_and_defaults.iteritems():
if not options.has_key(opt):
options[opt] = value
## Do we have an input file to work on?
if options["input-file"] in (None, ""):
## No input file - stop the stamping:
msg = "Error: unable to determine the name of the file to be stamped."
raise InvenioWebSubmitFileStamperError(msg)
## Do we have a LaTeX file for creation of the stamp?
if options["latex-template"] in (None, ""):
## No latex stamp file - stop the stamping:
msg = "Error: unable to determine the name of the LaTeX template " \
"file to be used for stamp creation."
raise InvenioWebSubmitFileStamperError(msg)
## OK - begin the document stamping process:
##
## Get the output file:
(dummy, name_outfile) = os.path.split(options["output-file"])
if name_outfile != "":
## Take just the basename component of outfile:
options["output-file"] = name_outfile
## Create a working directory (in which to store the various files used and
## created during the stamping process) and get the full path to it:
path_workingdir = create_working_directory()
## Copy the file to be stamped into the working directory:
basename_input_file = \
copy_subject_file_to_working_directory(path_workingdir, \
options["input-file"])
## Now import the LaTeX (and associated) files into a temporary directory
## and use them to create the "stamp" PDF:
pdf_stamp_name = create_pdf_stamp(path_workingdir, \
options["latex-template"], \
options["latex-template-var"])
## Everything is now ready to merge the "stamping subject" file with the
## PDF "stamp" file that has been created:
name_stamped_file = apply_stamp_to_file(path_workingdir, \
options["stamp"], \
pdf_stamp_name, \
basename_input_file, \
options["output-file"], \
options["layer"])
## Return a tuple containing the working directory and the name of the
## stamped file to the caller:
return (path_workingdir, name_stamped_file)
def stamp_file_cli():
"""The function responsible for triggering the stamping process when called
via the CLI.
This function will effectively get the CLI options, then pass them to
function that is responsible for coordinating the stamping process
itself.
Once stamping has been completed, an attempt will be made to copy the
stamped file to the current working directory.
"""
## Get CLI options and arguments:
input_options = get_cli_options()
## Stamp the file and obtain the working directory in which the stamped file
## is situated and the name of the stamped file:
try:
(working_dir, stamped_file) = stamp_file(input_options)
except InvenioWebSubmitFileStamperError, err:
## Something went wrong:
sys.stderr.write("Stamping failed: [%s]\n" % str(err))
sys.stderr.flush()
sys.exit(1)
if not os.access("./%s" % stamped_file, os.F_OK):
## Copy the stamped file into the current directory:
try:
shutil.copyfile("%s/%s" % (working_dir, stamped_file), \
"./%s" % stamped_file)
except IOError:
## Report that it wasn't possible to copy the stamped file locally
## and offer the user a path to it:
msg = "It was not possible to copy the stamped file to the " \
"current working directory.\nYou can find it here: " \
"[%s/%s].\n" \
% (working_dir, stamped_file)
sys.stderr.write(msg)
sys.stderr.flush()
else:
## A file exists in curdir with the same name as the final stamped file.
## just print out a message stating this fact, along with the path to
## the stamped file in the temporary working directory:
msg = "The stamped file [%s] has not been copied to the current " \
"working directory because a file with this name already " \
"existed there.\nYou can find the stamped file here: " \
"[%s/%s].\n" % (stamped_file, working_dir, stamped_file)
sys.stderr.write(msg)
sys.stderr.flush()
## Start proceedings for CLI calls:
if __name__ == "__main__":
stamp_file_cli()
diff --git a/invenio/legacy/websubmit/functions/Create_Modify_Interface.py b/invenio/legacy/websubmit/functions/Create_Modify_Interface.py
index f5397ba48..a5b1c7ad3 100644
--- a/invenio/legacy/websubmit/functions/Create_Modify_Interface.py
+++ b/invenio/legacy/websubmit/functions/Create_Modify_Interface.py
@@ -1,271 +1,271 @@
## This file is part of Invenio.
## Copyright (C) 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011 CERN.
##
## Invenio is free software; you can redistribute it and/or
## modify it under the terms of the GNU General Public License as
## published by the Free Software Foundation; either version 2 of the
## License, or (at your option) any later version.
##
## Invenio is distributed in the hope that it will be useful, but
## WITHOUT ANY WARRANTY; without even the implied warranty of
## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
## General Public License for more details.
##
## You should have received a copy of the GNU General Public License
## along with Invenio; if not, write to the Free Software Foundation, Inc.,
## 59 Temple Place, Suite 330, Boston, MA 02111-1307, USA.
"""
This is the Create_Modify_Interface function (along with its helpers).
It is used by WebSubmit for the "Modify Bibliographic Information" action.
"""
__revision__ = "$Id$"
import os
import re
import time
import pprint
from invenio.dbquery import run_sql
from invenio.legacy.websubmit.config import InvenioWebSubmitFunctionError
from invenio.legacy.websubmit.functions.Retrieve_Data import Get_Field
-from invenio.errorlib import register_exception
+from invenio.ext.logging import register_exception
def Create_Modify_Interface_getfieldval_fromfile(cur_dir, fld=""):
"""Read a field's value from its corresponding text file in 'cur_dir' (if it exists) into memory.
Delete the text file after having read-in its value.
This function is called on the reload of the modify-record page. This way, the field in question
can be populated with the value last entered by the user (before reload), instead of always being
populated with the value still found in the DB.
"""
fld_val = ""
if len(fld) > 0 and os.access("%s/%s" % (cur_dir, fld), os.R_OK|os.W_OK):
fp = open( "%s/%s" % (cur_dir, fld), "r" )
fld_val = fp.read()
fp.close()
try:
os.unlink("%s/%s"%(cur_dir, fld))
except OSError:
# Cannot unlink file - ignore, let WebSubmit main handle this
pass
fld_val = fld_val.strip()
return fld_val
def Create_Modify_Interface_getfieldval_fromDBrec(fieldcode, recid):
"""Read a field's value from the record stored in the DB.
This function is called when the Create_Modify_Interface function is called for the first time
when modifying a given record, and field values must be retrieved from the database.
"""
fld_val = ""
if fieldcode != "":
for next_field_code in [x.strip() for x in fieldcode.split(",")]:
fld_val += "%s\n" % Get_Field(next_field_code, recid)
fld_val = fld_val.rstrip('\n')
return fld_val
def Create_Modify_Interface_transform_date(fld_val):
"""Accept a field's value as a string. If the value is a date in one of the following formats:
DD Mon YYYY (e.g. 23 Apr 2005)
YYYY-MM-DD (e.g. 2005-04-23)
...transform this date value into "DD/MM/YYYY" (e.g. 23/04/2005).
"""
if re.search("^[0-9]{2} [a-z]{3} [0-9]{4}$", fld_val, re.IGNORECASE) is not None:
try:
fld_val = time.strftime("%d/%m/%Y", time.strptime(fld_val, "%d %b %Y"))
except (ValueError, TypeError):
# bad date format:
pass
elif re.search("^[0-9]{4}-[0-9]{2}-[0-9]{2}$", fld_val, re.IGNORECASE) is not None:
try:
fld_val = time.strftime("%d/%m/%Y", time.strptime(fld_val, "%Y-%m-%d"))
except (ValueError,TypeError):
# bad date format:
pass
return fld_val
def Create_Modify_Interface(parameters, curdir, form, user_info=None):
"""
Create an interface for the modification of a document, based on
the fields that the user has chosen to modify. This avoids having
to redefine a submission page for the modifications, but rely on
the elements already defined for the initial submission i.e. SBI
action (The only page that needs to be built for the modification
is the page letting the user specify a document to modify).
This function should be added at step 1 of your modification
workflow, after the functions that retrieves report number and
record id (Get_Report_Number, Get_Recid). Functions at step 2 are
the one executed upon successful submission of the form.
Create_Modify_Interface expects the following parameters:
* "fieldnameMBI" - the name of a text file in the submission
working directory that contains a list of the names of the
WebSubmit fields to include in the Modification interface.
These field names are separated by"\n" or "+".
Given the list of WebSubmit fields to be included in the
modification interface, the values for each field are retrieved
for the given record (by way of each WebSubmit field being
configured with a MARC Code in the WebSubmit database). An HTML
FORM is then created. This form allows a user to modify certain
field values for a record.
The file referenced by 'fieldnameMBI' is usually generated from a
multiple select form field): users can then select one or several
fields to modify
Note that the function will display WebSubmit Response elements,
but will not be able to set an initial value: this must be done by
the Response element iteself.
Additionally the function creates an internal field named
'Create_Modify_Interface_DONE' on the interface, that can be
retrieved in curdir after the form has been submitted.
This flag is an indicator for the function that displayed values
should not be retrieved from the database, but from the submitted
values (in case the page is reloaded). You can also rely on this
value when building your WebSubmit Response element in order to
retrieve value either from the record, or from the submission
directory.
"""
global sysno,rn
t = ""
# variables declaration
fieldname = parameters['fieldnameMBI']
# Path of file containing fields to modify
the_globals = {
'doctype' : doctype,
'action' : action,
'act' : action, ## for backward compatibility
'step' : step,
'access' : access,
'ln' : ln,
'curdir' : curdir,
'uid' : user_info['uid'],
'uid_email' : user_info['email'],
'rn' : rn,
'last_step' : last_step,
'action_score' : action_score,
'__websubmit_in_jail__' : True,
'form': form,
'sysno': sysno,
'user_info' : user_info,
'__builtins__' : globals()['__builtins__'],
'Request_Print': Request_Print
}
if os.path.exists("%s/%s" % (curdir, fieldname)):
fp = open( "%s/%s" % (curdir, fieldname), "r" )
fieldstext = fp.read()
fp.close()
fieldstext = re.sub("\+","\n", fieldstext)
fields = fieldstext.split("\n")
else:
res = run_sql("SELECT fidesc FROM sbmFIELDDESC WHERE name=%s", (fieldname,))
if len(res) == 1:
fields = res[0][0].replace(" ", "")
fields = re.findall("<optionvalue=.*>", fields)
regexp = re.compile("""<optionvalue=(?P<quote>['|"]?)(?P<value>.*?)(?P=quote)""")
fields = [regexp.search(x) for x in fields]
fields = [x.group("value") for x in fields if x is not None]
fields = [x for x in fields if x not in ("Select", "select")]
else:
raise InvenioWebSubmitFunctionError("cannot find fields to modify")
#output some text
t = t+"<CENTER bgcolor=\"white\">The document <B>%s</B> has been found in the database.</CENTER><br />Please modify the following fields:<br />Then press the 'END' button at the bottom of the page<br />\n" % rn
for field in fields:
subfield = ""
value = ""
marccode = ""
text = ""
# retrieve and display the modification text
t = t + "<FONT color=\"darkblue\">\n"
res = run_sql("SELECT modifytext FROM sbmFIELDDESC WHERE name=%s", (field,))
if len(res)>0:
t = t + "<small>%s</small> </FONT>\n" % res[0][0]
# retrieve the marc code associated with the field
res = run_sql("SELECT marccode FROM sbmFIELDDESC WHERE name=%s", (field,))
if len(res) > 0:
marccode = res[0][0]
# then retrieve the previous value of the field
if os.path.exists("%s/%s" % (curdir, "Create_Modify_Interface_DONE")):
# Page has been reloaded - get field value from text file on server, not from DB record
value = Create_Modify_Interface_getfieldval_fromfile(curdir, field)
else:
# First call to page - get field value from DB record
value = Create_Modify_Interface_getfieldval_fromDBrec(marccode, sysno)
# If field is a date value, transform date into format DD/MM/YYYY:
value = Create_Modify_Interface_transform_date(value)
res = run_sql("SELECT * FROM sbmFIELDDESC WHERE name=%s", (field,))
if len(res) > 0:
element_type = res[0][3]
numcols = res[0][6]
numrows = res[0][5]
size = res[0][4]
maxlength = res[0][7]
val = res[0][8]
fidesc = res[0][9]
if element_type == "T":
text = "<TEXTAREA name=\"%s\" rows=%s cols=%s wrap>%s</TEXTAREA>" % (field, numrows, numcols, value)
elif element_type == "F":
text = "<INPUT TYPE=\"file\" name=\"%s\" size=%s maxlength=\"%s\">" % (field, size, maxlength)
elif element_type == "I":
value = re.sub("[\n\r\t]+", "", value)
text = "<INPUT name=\"%s\" size=%s value=\"%s\"> " % (field, size, val)
text = text + "<SCRIPT>document.forms[0].%s.value=\"%s\";</SCRIPT>" % (field, value)
elif element_type == "H":
text = "<INPUT type=\"hidden\" name=\"%s\" value=\"%s\">" % (field, val)
text = text + "<SCRIPT>document.forms[0].%s.value=\"%s\";</SCRIPT>" % (field, value)
elif element_type == "S":
values = re.split("[\n\r]+", value)
text = fidesc
if re.search("%s\[\]" % field, fidesc):
multipletext = "[]"
else:
multipletext = ""
if len(values) > 0 and not(len(values) == 1 and values[0] == ""):
text += "<SCRIPT>\n"
text += "var i = 0;\n"
text += "el = document.forms[0].elements['%s%s'];\n" % (field, multipletext)
text += "max = el.length;\n"
for val in values:
text += "var found = 0;\n"
text += "var i=0;\n"
text += "while (i != max) {\n"
text += " if (el.options[i].value == \"%s\" || el.options[i].text == \"%s\") {\n" % (val, val)
text += " el.options[i].selected = true;\n"
text += " found = 1;\n"
text += " }\n"
text += " i=i+1;\n"
text += "}\n"
#text += "if (found == 0) {\n"
#text += " el[el.length] = new Option(\"%s\", \"%s\", 1,1);\n"
#text += "}\n"
text += "</SCRIPT>\n"
elif element_type == "D":
text = fidesc
elif element_type == "R":
try:
co = compile(fidesc.replace("\r\n", "\n"), "<string>", "exec")
## Note this exec is safe WRT global variable because the
## Create_Modify_Interface has already been parsed by
## execfile within a protected environment.
the_globals['text'] = ''
exec co in the_globals
text = the_globals['text']
except:
msg = "Error in evaluating response element %s with globals %s" % (pprint.pformat(field), pprint.pformat(globals()))
register_exception(req=None, alert_admin=True, prefix=msg)
raise InvenioWebSubmitFunctionError(msg)
else:
text = "%s: unknown field type" % field
t = t + "<small>%s</small>" % text
# output our flag field
t += '<input type="hidden" name="Create_Modify_Interface_DONE" value="DONE\n" />'
# output some more text
t = t + "<br /><br /><CENTER><small><INPUT type=\"button\" width=400 height=50 name=\"End\" value=\"END\" onClick=\"document.forms[0].step.value = 2;user_must_confirm_before_leaving_page = false;document.forms[0].submit();\"></small></CENTER></H4>"
return t
diff --git a/invenio/legacy/websubmit/functions/Generate_Group_File.py b/invenio/legacy/websubmit/functions/Generate_Group_File.py
index a0df0e6f6..6d7ad1b26 100644
--- a/invenio/legacy/websubmit/functions/Generate_Group_File.py
+++ b/invenio/legacy/websubmit/functions/Generate_Group_File.py
@@ -1,61 +1,61 @@
## This file is part of Invenio.
## Copyright (C) 2008, 2010, 2011 CERN.
##
## Invenio is free software; you can redistribute it and/or
## modify it under the terms of the GNU General Public License as
## published by the Free Software Foundation; either version 2 of the
## License, or (at your option) any later version.
##
## Invenio is distributed in the hope that it will be useful, but
## WITHOUT ANY WARRANTY; without even the implied warranty of
## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
## General Public License for more details.
##
## You should have received a copy of the GNU General Public License
## along with Invenio; if not, write to the Free Software Foundation, Inc.,
## 59 Temple Place, Suite 330, Boston, MA 02111-1307, USA.
-from invenio.errorlib import register_exception
+from invenio.ext.logging import register_exception
from invenio.legacy.websubmit.config import InvenioWebSubmitFunctionError
CFG_WEBSUBMIT_GROUP_FILE_NAME = "Group"
def Generate_Group_File(parameters, curdir, form, user_info=None):
"""
Generates a group file (stored in 'curdir/Group') for use with
publiline.
@param parameters: (dictionary) - must contain:
+ group_name: (string) - the id of the Group for
use in the complex approval refereeing workflow
@param curdir: (string) - the current submission's working
directory.
@param form: (dictionary) - form fields.
@param user_info: (dictionary) - various information about the
submitting user (includes the
apache req object).
@return: (string) - empty string.
@Exceptions raised: InvenioWebSubmitFunctionError when an
unexpected error is encountered.
"""
try:
group_file = open("%s/%s" % (curdir, CFG_WEBSUBMIT_GROUP_FILE_NAME), "w")
group_file.write(parameters['group_name'])
group_file.flush()
group_file.close()
except IOError, err:
## Unable to write the Group file to curdir.
err_msg = "Error: Unable to create Group file [%s/%s]. " \
"Perhaps check directory permissions. " \
% (curdir, CFG_WEBSUBMIT_GROUP_FILE_NAME)
register_exception(prefix=err_msg)
raise InvenioWebSubmitFunctionError(err_msg)
## Return an empty string:
return ""
diff --git a/invenio/legacy/websubmit/functions/Mail_Approval_Request_to_Referee.py b/invenio/legacy/websubmit/functions/Mail_Approval_Request_to_Referee.py
index 9f168bfb0..b01156cae 100644
--- a/invenio/legacy/websubmit/functions/Mail_Approval_Request_to_Referee.py
+++ b/invenio/legacy/websubmit/functions/Mail_Approval_Request_to_Referee.py
@@ -1,409 +1,409 @@
## This file is part of Invenio.
## Copyright (C) 2008, 2009, 2010, 2011 CERN.
##
## Invenio is free software; you can redistribute it and/or
## modify it under the terms of the GNU General Public License as
## published by the Free Software Foundation; either version 2 of the
## License, or (at your option) any later version.
##
## Invenio is distributed in the hope that it will be useful, but
## WITHOUT ANY WARRANTY; without even the implied warranty of
## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
## General Public License for more details.
##
## You should have received a copy of the GNU General Public License
## along with Invenio; if not, write to the Free Software Foundation, Inc.,
## 59 Temple Place, Suite 330, Boston, MA 02111-1307, USA.
"""Mail_Approval_Request_to_Referee: A function to send an email to the referee
of a document informing him/her that a request for its approval has been
submitted by the user.
"""
__revision__ = "$Id$"
import os
import re
import sre_constants
from invenio.websubmit_dblayer import get_approval_request_notes
from invenio.legacy.websubmit.config import InvenioWebSubmitFunctionError, \
CFG_WEBSUBMIT_COPY_MAILS_TO_ADMIN
from invenio.config import CFG_CERN_SITE, \
CFG_SITE_NAME, \
CFG_SITE_URL, \
CFG_SITE_SUPPORT_EMAIL, \
CFG_SITE_RECORD
from invenio.access_control_admin import acc_get_role_users, acc_get_role_id
from invenio.legacy.websubmit.functions.Shared_Functions import ParamFromFile
-from invenio.errorlib import register_exception
+from invenio.ext.logging import register_exception
from invenio.search_engine import print_record
from invenio.ext.email import send_email
CFG_MAIL_BODY = """
A request for the approval of a document in the %(site-name)s has been
made and requires your attention as a referee. The details are as
follows:
Reference Number: [%(report-number)s]
Title: %(title)s
Author(s): %(authors)s
You can see the details of the record at the following address:
<%(site-url)s/%(CFG_SITE_RECORD)s/%(record-id)s>
Please register your decision by following the instructions at the
following address:
<%(site-url)s/submit/direct?%(report-number-fieldname)s=%(report-number)s&sub=%(approval-action)s%(doctype)s&combo%(doctype)s=%(category)s>
Below, you may find some additional information about the approval request:
%(notes)s
"""
def Mail_Approval_Request_to_Referee(parameters, curdir, form, user_info=None):
"""
This function sends an email to the referee of a document informing
him/her that a request for its approval has been submitted by the
user.
@param categ_file_appreq: (string) - some document types are
separated into different categories, each of which has its own
referee(s).
In such document types, it's necessary to know the document-
type's category in order to choose the referee.
This parameter provides a means by which the category information
can be extracted from a file in the current submission's working
directory. It should therefore be a filename.
@param categ_rnseek_appreq: (string) - some document types are
separated into different categories, each of which has its own
referee(s).
In such document types, it's necessary to know the document-
type's category in order to choose the referee.
This parameter provides a means by which the category information
can be extracted from the document's reference number.
It is infact a string that will be compiled into a regexp and
an attempt will be made to match it agains the document's reference
number starting from the left-most position.
The only pre-requisite is that the segment in which the category is
sought should be indicated with <CATEGORY>.
Thus, an example might be as follows:
ATL(-COM)?-<CATEGORY>-.+
This would allow "PHYS" in the following reference number to be
recognised as the category:
ATL-COM-PHYS-2008-001
@param edsrn: (string) - the name of the field in which the report
number should be placed when the referee visits the form for making
a decision.
@return: (string) - empty string.
"""
## Get the reference number (as global rn - sorry!) and the document type:
global sysno, rn
doctype = form['doctype']
########
## Get the parameters from the list:
########
## Get the name of the report-number file:
########
try:
edsrn_file = parameters["edsrn"]
except KeyError:
## No value given for the edsrn file:
msg = "Error in Mail_Approval_Request_to_Referee function: unable " \
"to determine the name of the file in which the document's " \
"report number should be stored."
raise InvenioWebSubmitFunctionError(msg)
else:
edsrn_file = str(edsrn_file)
edsrn_file = os.path.basename(edsrn_file).strip()
if edsrn_file == "":
msg = "Error in Mail_Approval_Request_to_Referee function: " \
"unable to determine the name of the file in which " \
"the document's report number should be stored."
raise InvenioWebSubmitFunctionError(msg)
########
## Get the name of the category file:
#######
try:
## If it has been provided, get the name of the file in which the
## category is stored:
category_file = parameters["categ_file_appreq"]
except KeyError:
## No value given for the category file:
category_file = None
else:
if category_file is not None:
category_file = str(category_file)
category_file = os.path.basename(category_file).strip()
if category_file == "":
category_file = None
########
## Get the regexp that is used to find the category in the report number:
########
try:
## If it has been provided, get the regexp used for identifying
## a document-type's category from its reference number:
category_rn_regexp = parameters["categ_rnseek_appreq"]
except KeyError:
## No value given for the category regexp:
category_rn_regexp = None
else:
if category_rn_regexp is not None:
category_rn_regexp = str(category_rn_regexp).strip()
if category_rn_regexp == "":
category_rn_regexp = None
#######
## Resolve the document type's category:
##
## This is a long process. The end result is that the category is extracted
## either from a file in curdir, or from the report number.
## If it's taken from the report number, the admin must configure the
## function to accept a regular expression that is used to find the
## category in the report number.
##
if category_file is not None and category_rn_regexp is not None:
## It is not valid to have both a category file and a pattern
## describing how to extract the category from a report number.
## raise an InvenioWebSubmitFunctionError
msg = "Error in Register_Approval_Request function: received " \
"instructions to search for the document's category in " \
"both its report number AND in a category file. Could " \
"not determine which to use - please notify the " \
"administrator."
raise InvenioWebSubmitFunctionError(msg)
elif category_file is not None:
## Attempt to recover the category information from a file in the
## current submission's working directory:
category = ParamFromFile("%s/%s" % (curdir, category_file))
if category is not None:
category = category.strip()
if category in (None, ""):
## The category cannot be resolved.
msg = "Error in Register_Approval_Request function: received " \
"instructions to search for the document's category in " \
"a category file, but could not recover the category " \
"from that file. An approval request therefore cannot " \
"be registered for the document."
raise InvenioWebSubmitFunctionError(msg)
elif category_rn_regexp is not None:
## Attempt to recover the category information from the document's
## reference number using the regexp in category_rn_regexp:
##
## Does the category regexp contain the key-phrase "<CATEG>"?
if category_rn_regexp.find("<CATEG>") != -1:
## Yes. Replace "<CATEG>" with "(?P<category>.+?)".
## For example, this:
## ATL(-COM)?-<CATEG>-
## Will be transformed into this:
## ATL(-COM)?-(?P<category>.+?)-
category_rn_final_regexp = \
category_rn_regexp.replace("<CATEG>", r"(?P<category>.+?)", 1)
else:
## The regexp for category didn't contain "<CATEG>", but this is
## mandatory.
msg = "Error in Register_Approval_Request function: The " \
"[%(doctype)s] submission has been configured to search " \
"for the document type's category in its reference number, " \
"using a poorly formed search expression (no marker for " \
"the category was present.) Since the document's category " \
"therefore cannot be retrieved, an approval request cannot " \
"be registered for it. Please report this problem to the " \
"administrator." \
% { 'doctype' : doctype, }
raise InvenioWebSubmitFunctionError(msg)
##
try:
## Attempt to compile the regexp for finding the category:
re_categ_from_rn = re.compile(category_rn_final_regexp)
except sre_constants.error:
## The expression passed to this function could not be compiled
## into a regexp. Register this exception and raise an
## InvenioWebSubmitFunctionError:
exception_prefix = "Error in Register_Approval_Request function: " \
"The [%(doctype)s] submission has been " \
"configured to search for the document type's " \
"category in its reference number, using the " \
"following regexp: /%(regexp)s/. This regexp, " \
"however, could not be compiled correctly " \
"(created it from %(categ-search-term)s.)" \
% { 'doctype' : doctype, \
'regexp' : category_rn_final_regexp, \
'categ-search-term' : category_rn_regexp, }
register_exception(prefix=exception_prefix)
msg = "Error in Register_Approval_Request function: The " \
"[%(doctype)s] submission has been configured to search " \
"for the document type's category in its reference number, " \
"using a poorly formed search expression. Since the " \
"document's category therefore cannot be retrieved, an " \
"approval request cannot be registered for it. Please " \
"report this problem to the administrator." \
% { 'doctype' : doctype, }
raise InvenioWebSubmitFunctionError(msg)
else:
## Now attempt to recover the category from the RN string:
m_categ_from_rn = re_categ_from_rn.match(rn)
if m_categ_from_rn is not None:
## The pattern matched in the string.
## Extract the category from the match:
try:
category = m_categ_from_rn.group("category")
except IndexError:
## There was no "category" group. That group is mandatory.
exception_prefix = \
"Error in Register_Approval_Request function: The " \
"[%(doctype)s] submission has been configured to " \
"search for the document type's category in its " \
"reference number using the following regexp: " \
"/%(regexp)s/. The search produced a match, but " \
"there was no \"category\" group in the match " \
"object although this group is mandatory. The " \
"regexp was compiled from the following string: " \
"[%(categ-search-term)s]." \
% { 'doctype' : doctype, \
'regexp' : category_rn_final_regexp, \
'categ-search-term' : category_rn_regexp, }
register_exception(prefix=exception_prefix)
msg = "Error in Register_Approval_Request function: The " \
"[%(doctype)s] submission has been configured to " \
"search for the document type's category in its " \
"reference number, using a poorly formed search " \
"expression (there was no category marker). Since " \
"the document's category therefore cannot be " \
"retrieved, an approval request cannot be " \
"registered for it. Please report this problem to " \
"the administrator." \
% { 'doctype' : doctype, }
raise InvenioWebSubmitFunctionError(msg)
else:
category = category.strip()
if category == "":
msg = "Error in Register_Approval_Request function: " \
"The [%(doctype)s] submission has been " \
"configured to search for the document type's " \
"category in its reference number, but no " \
"category was found. The request for approval " \
"cannot be registered. Please report this " \
"problem to the administrator." \
% { 'doctype' : doctype, }
raise InvenioWebSubmitFunctionError(msg)
else:
## No match. Cannot find the category and therefore cannot
## continue:
msg = "Error in Register_Approval_Request function: The " \
"[%(doctype)s] submission has been configured to " \
"search for the document type's category in its " \
"reference number, but no match was made. The request " \
"for approval cannot be registered. Please report " \
"this problem to the administrator." \
% { 'doctype' : doctype, }
raise InvenioWebSubmitFunctionError(msg)
else:
## The document type has no category.
category = ""
##
## End of category recovery
#######
#######
## Get the title and author(s) from the record:
#######
## Author(s):
rec_authors = ""
rec_first_author = print_record(int(sysno), 'tm', "100__a")
rec_other_authors = print_record(int(sysno), 'tm', "700__a")
if rec_first_author != "":
rec_authors += "".join(["%s\n" % author.strip() for \
author in rec_first_author.split("\n")])
if rec_other_authors != "":
rec_authors += "".join(["%s\n" % author.strip() for \
author in rec_other_authors.split("\n")])
## Title:
rec_title = "".join(["%s\n" % title.strip() for title in \
print_record(int(sysno), 'tm', "245__a").split("\n")])
##
#######
## the normal approval action
approve_act = 'APP'
## Get notes about the approval request:
approval_notes = get_approval_request_notes(doctype, rn)
## Get the referee email address:
if CFG_CERN_SITE:
## The referees system in CERN now works with listbox membership.
## List names should take the format
## "service-cds-referee-doctype-category@cern.ch"
## Make sure that your list exists!
## FIXME - to be replaced by a mailing alias in webaccess in the
## future.
if doctype == 'ATN': ## Special case of 'RPR' action for doctype ATN
action = ParamFromFile("%s/%s" % (curdir,'act')).strip()
if action == 'RPR':
notetype = ParamFromFile("%s/%s" % (curdir,'ATN_NOTETYPE')).strip()
if notetype not in ('SLIDE','PROC'):
raise InvenioWebSubmitFunctionError('ERROR function Mail_Approval_Request_to_Referee:: do not recognize notetype ' + notetype)
if notetype == 'PROC':
approve_act = 'APR' # RPR PROC requires APR action to approve
referee_listname = "service-cds-referee-atn-proc@cern.ch"
elif notetype == 'SLIDE': ## SLIDES approval
approve_act = 'APS' # RPR SLIDE requires APS action to approve
referee_listname = "atlas-speakers-comm@cern.ch"
else:
raise InvenioWebSubmitFunctionError('ERROR function Mail_Approval_Request_to_Referee:: do not understand notetype: ' +notetype)
else:
referee_listname = "service-cds-referee-%s" % doctype.lower()
if category != "":
referee_listname += "-%s" % category.lower()
mailto_addresses = referee_listname + "@cern.ch"
if category == 'CDSTEST':
referee_listname = "service-cds-referee-%s" % doctype.lower()
referee_listname += "-%s" % category.lower()
mailto_addresses = referee_listname + "@cern.ch"
else:
referee_address = ""
## Try to retrieve the referee's email from the referee's database:
for user in \
acc_get_role_users(acc_get_role_id("referee_%s_%s" \
% (doctype, category))):
referee_address += user[1] + ","
## And if there are general referees:
for user in \
acc_get_role_users(acc_get_role_id("referee_%s_*" % doctype)):
referee_address += user[1] + ","
referee_address = re.sub(",$", "", referee_address)
# Creation of the mail for the referee
mailto_addresses = ""
if referee_address != "":
mailto_addresses = referee_address + ","
else:
mailto_addresses = re.sub(",$", "", mailto_addresses)
##
## Send the email:
mail_subj = "Request for approval of [%s]" % rn
mail_body = CFG_MAIL_BODY % \
{ 'site-name' : CFG_SITE_NAME,
'CFG_SITE_RECORD' : CFG_SITE_RECORD,
'report-number-fieldname' : edsrn_file,
'report-number' : rn,
'title' : rec_title,
'authors' : rec_authors,
'site-url' : CFG_SITE_URL,
'record-id' : sysno,
'approval-action' : approve_act,
'doctype' : doctype,
'notes' : approval_notes,
'category' : category,
}
send_email(CFG_SITE_SUPPORT_EMAIL,
mailto_addresses,
mail_subj,
mail_body,
copy_to_admin=CFG_WEBSUBMIT_COPY_MAILS_TO_ADMIN)
##
return ""
diff --git a/invenio/legacy/websubmit/functions/Mail_Approval_Withdrawn_to_Referee.py b/invenio/legacy/websubmit/functions/Mail_Approval_Withdrawn_to_Referee.py
index 13b88b2fb..627ed4558 100644
--- a/invenio/legacy/websubmit/functions/Mail_Approval_Withdrawn_to_Referee.py
+++ b/invenio/legacy/websubmit/functions/Mail_Approval_Withdrawn_to_Referee.py
@@ -1,345 +1,345 @@
## This file is part of Invenio.
## Copyright (C) 2008, 2009, 2010, 2011 CERN.
##
## Invenio is free software; you can redistribute it and/or
## modify it under the terms of the GNU General Public License as
## published by the Free Software Foundation; either version 2 of the
## License, or (at your option) any later version.
##
## Invenio is distributed in the hope that it will be useful, but
## WITHOUT ANY WARRANTY; without even the implied warranty of
## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
## General Public License for more details.
##
## You should have received a copy of the GNU General Public License
## along with Invenio; if not, write to the Free Software Foundation, Inc.,
## 59 Temple Place, Suite 330, Boston, MA 02111-1307, USA.
"""Mail_Approval_Withdrawn_to_Referee: A function to send an email to the
referee of a document informing him/her that the request for its approval
has been withdrawn.
"""
__revision__ = "$Id$"
import os
import re
import sre_constants
from invenio.legacy.websubmit.config import InvenioWebSubmitFunctionWarning, \
CFG_WEBSUBMIT_COPY_MAILS_TO_ADMIN
from invenio.config import CFG_CERN_SITE, \
CFG_SITE_NAME, \
CFG_SITE_SUPPORT_EMAIL
from invenio.access_control_admin import acc_get_role_users, acc_get_role_id
from invenio.legacy.websubmit.functions.Shared_Functions import ParamFromFile
-from invenio.errorlib import register_exception
+from invenio.ext.logging import register_exception
from invenio.websubmit_dblayer import get_approval_request_notes
from invenio.ext.email import send_email
CFG_MAIL_BODY = """
The request for approval of the document [%(report-number)s] in
%(site-name)s has been withdrawn and no longer
requires your attention as referee.
"""
def Mail_Approval_Withdrawn_to_Referee(parameters, \
curdir, \
form, \
user_info=None):
"""
This function sends an email to the referee of a document informing
him/her that the request for its approval has been withdrawn.
@param categ_file_withd: (string) - some document types are
separated into different categories, each of which has its own
referee(s).
In such document types, it's necessary to know the document-
type's category in order to choose the referee.
This parameter provides a means by which the category information
can be extracted from a file in the current submission's working
directory. It should therefore be a filename.
@param categ_rnseek_withd: (string) - some document types are
separated into different categories, each of which has its own
referee(s).
In such document types, it's necessary to know the document-
type's category in order to choose the referee.
This parameter provides a means by which the category information
can be extracted from the document's reference number.
It is infact a string that will be compiled into a regexp and
an attempt will be made to match it agains the document's reference
number starting from the left-most position.
The only pre-requisite is that the segment in which the category is
sought should be indicated with <CATEGORY>.
Thus, an example might be as follows:
ATL(-COM)?-<CATEGORY>-.+
This would allow "PHYS" in the following reference number to be
recognised as the category:
ATL-COM-PHYS-2008-001
@return: (string) - empty string.
"""
## Get the reference number (as global rn - sorry!) and the document type:
global sysno, rn
doctype = form['doctype']
########
## Get the parameters from the list:
########
## Get the name of the category file:
#######
try:
## If it has been provided, get the name of the file in which the
## category is stored:
category_file = parameters["categ_file_withd"]
except KeyError:
## No value given for the category file:
category_file = None
else:
if category_file is not None:
category_file = str(category_file)
category_file = os.path.basename(category_file).strip()
if category_file == "":
category_file = None
########
## Get the regexp that is used to find the category in the report number:
########
try:
## If it has been provided, get the regexp used for identifying
## a document-type's category from its reference number:
category_rn_regexp = parameters["categ_rnseek_withd"]
except KeyError:
## No value given for the category regexp:
category_rn_regexp = None
else:
if category_rn_regexp is not None:
category_rn_regexp = str(category_rn_regexp).strip()
if category_rn_regexp == "":
category_rn_regexp = None
#######
## Resolve the document type's category:
##
## This is a long process. The end result is that the category is extracted
## either from a file in curdir, or from the report number.
## If it's taken from the report number, the admin must configure the
## function to accept a regular expression that is used to find the
## category in the report number.
##
if category_file is not None and category_rn_regexp is not None:
## It is not valid to have both a category file and a pattern
## describing how to extract the category from a report number.
## raise an InvenioWebSubmitFunctionWarning:
msg = "Error in Mail_Approval_Withdrawn_to_Referee function: " \
"received instructions to search for the document's category " \
"in both its report number AND in a category file. Could " \
"not determine which to use - please notify the " \
"administrator."
raise InvenioWebSubmitFunctionWarning(msg)
elif category_file is not None:
## Attempt to recover the category information from a file in the
## current submission's working directory:
category = ParamFromFile("%s/%s" % (curdir, category_file))
if category is not None:
category = category.strip()
if category in (None, ""):
## The category cannot be resolved.
msg = "Error in Mail_Approval_Withdrawn_to_Referee function: " \
"received instructions to search for the document's " \
"category in a category file, but could not recover the " \
"category from that file. The referee cannot be notified " \
"of the approval request withdrawal by mail."
raise InvenioWebSubmitFunctionWarning(msg)
elif category_rn_regexp is not None:
## Attempt to recover the category information from the document's
## reference number using the regexp in category_rn_regexp:
##
## Does the category regexp contain the key-phrase "<CATEG>"?
if category_rn_regexp.find("<CATEG>") != -1:
## Yes. Replace "<CATEG>" with "(?P<category>.+?)".
## For example, this:
## ATL(-COM)?-<CATEG>-
## Will be transformed into this:
## ATL(-COM)?-(?P<category>.+?)-
category_rn_final_regexp = \
category_rn_regexp.replace("<CATEG>", r"(?P<category>.+?)", 1)
else:
## The regexp for category didn't contain "<CATEG>", but this is
## mandatory.
msg = "Error in Mail_Approval_Withdrawn_to_Referee function: The" \
" [%(doctype)s] submission has been configured to search " \
"for the document type's category in its reference number," \
" using a poorly formed search expression (no marker for " \
"the category was present.) Since the document's category " \
"cannot be retrieved, the referee cannot be " \
"notified of the approval request withdrawal by mail." \
% { 'doctype' : doctype, }
raise InvenioWebSubmitFunctionWarning(msg)
##
try:
## Attempt to compile the regexp for finding the category:
re_categ_from_rn = re.compile(category_rn_final_regexp)
except sre_constants.error:
## The expression passed to this function could not be compiled
## into a regexp. Register this exception and raise an
## InvenioWebSubmitFunctionWarning:
exception_prefix = "Error in Mail_Approval_Withdrawn_to_Referee " \
"function: The [%(doctype)s] submission has " \
"been configured to search for the document " \
"type's category in its reference number, " \
"using the following regexp: /%(regexp)s/. " \
"This regexp, however, could not be " \
"compiled correctly (created it from " \
"%(categ-search-term)s.)" \
% { 'doctype' : doctype, \
'regexp' : category_rn_final_regexp, \
'categ-search-term' : category_rn_regexp, }
register_exception(prefix=exception_prefix)
msg = "Error in Mail_Approval_Withdrawn_to_Referee function: The" \
" [%(doctype)s] submission has been configured to search " \
"for the document type's category in its reference number," \
" using a poorly formed search expression. Since the " \
"document's category cannot be retrieved, the referee" \
"cannot be notified of the approval request withdrawal by " \
"mail." \
% { 'doctype' : doctype, }
raise InvenioWebSubmitFunctionWarning(msg)
else:
## Now attempt to recover the category from the RN string:
m_categ_from_rn = re_categ_from_rn.match(rn)
if m_categ_from_rn is not None:
## The pattern matched in the string.
## Extract the category from the match:
try:
category = m_categ_from_rn.group("category")
except IndexError:
## There was no "category" group. That group is mandatory.
exception_prefix = \
"Error in Mail_Approval_Withdrawn_to_Referee " \
"function: The [%(doctype)s] submission has been " \
"configured to search for the document type's " \
"category in its reference number using the " \
"following regexp: " \
"/%(regexp)s/. The search produced a match, but " \
"there was no \"category\" group in the match " \
"object although this group is mandatory. The " \
"regexp was compiled from the following string: " \
"[%(categ-search-term)s]." \
% { 'doctype' : doctype, \
'regexp' : category_rn_final_regexp, \
'categ-search-term' : category_rn_regexp, }
register_exception(prefix=exception_prefix)
msg = "Error in Mail_Approval_Withdrawn_to_Referee " \
"function: The [%(doctype)s] submission has been " \
"configured to search for the document type's " \
"category in its reference number, using a poorly " \
"formed search expression (there was no category " \
"marker). Since the document's category therefore " \
"cannot be retrieved, the referee cannot be " \
"notified of the approval request withdrawal " \
"by mail." \
% { 'doctype' : doctype, }
raise InvenioWebSubmitFunctionWarning(msg)
else:
category = category.strip()
if category == "":
msg = "Error in Mail_Approval_Withdrawn_to_Referee " \
"function: The [%(doctype)s] submission has " \
"been configured to search for the document " \
"type's category in its reference number, but " \
"no category was found. The referee cannot be " \
"notified of the approval request withdrawal " \
"by mail." \
% { 'doctype' : doctype, }
raise InvenioWebSubmitFunctionWarning(msg)
else:
## No match. Cannot find the category and therefore cannot
## continue:
msg = "Error in Mail_Approval_Withdrawn_to_Referee function:" \
" The [%(doctype)s] submission has been configured to " \
"search for the document type's category in its " \
"reference number, but no match was made. The referee " \
"cannot be notified of the approval request " \
"withdrawal by mail." \
% { 'doctype' : doctype, }
raise InvenioWebSubmitFunctionWarning(msg)
else:
## The document type has no category.
category = ""
##
## End of category recovery
#######
## Get the referee email address:
if CFG_CERN_SITE:
## The referees system in CERN now works with listbox membership.
## List names should take the format
## "service-cds-referee-doctype-category@cern.ch"
## Make sure that your list exists!
## FIXME - to be replaced by a mailing alias in webaccess in the
## future.
## see if was a PROC request or not
notes = get_approval_request_notes(doctype,rn)
was_proc = 'n'
was_slide = 'n'
if notes:
note_lines = notes.split('\n')
for note_line in note_lines:
if note_line.find('Requested Note Classification:') == 0:
note_type = note_line.split()[-1]
if note_type == 'PROC':
was_proc = 'y'
elif note_type == 'SLIDE':
was_slide = 'y'
break # there may be more than one - just take the first
if was_proc == 'y':
referee_listname = "service-cds-referee-%s" % doctype.lower()
referee_listname += "-%s" % 'proc'
elif was_slide == 'y':
referee_listname = "atlas-speakers-comm"
else:
referee_listname = "service-cds-referee-%s" % doctype.lower()
if category != "":
referee_listname += "-%s" % category.lower()
referee_listname += "@cern.ch"
mailto_addresses = referee_listname
if category == 'CDSTEST': ## our special testing category
referee_listname = "service-cds-referee-%s" % doctype.lower()
referee_listname += "-%s" % category.lower()
mailto_addresses = referee_listname + "@cern.ch"
else:
referee_address = ""
## Try to retrieve the referee's email from the referee's database:
for user in \
acc_get_role_users(acc_get_role_id("referee_%s_%s" \
% (doctype, category))):
referee_address += user[1] + ","
## And if there are general referees:
for user in \
acc_get_role_users(acc_get_role_id("referee_%s_*" % doctype)):
referee_address += user[1] + ","
referee_address = re.sub(",$", "", referee_address)
# Creation of the mail for the referee
mailto_addresses = ""
if referee_address != "":
mailto_addresses = referee_address + ","
else:
mailto_addresses = re.sub(",$", "", mailto_addresses)
##
## Send the email:
mail_subj = "Request for approval of [%s] withdrawn" % rn
mail_body = CFG_MAIL_BODY % \
{ 'site-name' : CFG_SITE_NAME,
'report-number' : rn,
}
send_email(CFG_SITE_SUPPORT_EMAIL,
mailto_addresses,
mail_subj,
mail_body,
copy_to_admin=CFG_WEBSUBMIT_COPY_MAILS_TO_ADMIN)
##
return ""
diff --git a/invenio/legacy/websubmit/functions/Make_Dummy_MARC_XML_Record.py b/invenio/legacy/websubmit/functions/Make_Dummy_MARC_XML_Record.py
index 63562c25f..186ee42c8 100644
--- a/invenio/legacy/websubmit/functions/Make_Dummy_MARC_XML_Record.py
+++ b/invenio/legacy/websubmit/functions/Make_Dummy_MARC_XML_Record.py
@@ -1,156 +1,156 @@
## This file is part of Invenio.
## Copyright (C) 2008, 2009, 2010, 2011 CERN.
##
## Invenio is free software; you can redistribute it and/or
## modify it under the terms of the GNU General Public License as
## published by the Free Software Foundation; either version 2 of the
## License, or (at your option) any later version.
##
## Invenio is distributed in the hope that it will be useful, but
## WITHOUT ANY WARRANTY; without even the implied warranty of
## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
## General Public License for more details.
##
## You should have received a copy of the GNU General Public License
## along with Invenio; if not, write to the Free Software Foundation, Inc.,
## 59 Temple Place, Suite 330, Boston, MA 02111-1307, USA.
"""Make a dummy MARC XML record and store it in the submission's working-
directory.
"""
__revision__ = "$Id$"
import os
-from invenio.errorlib import register_exception
+from invenio.ext.logging import register_exception
from invenio.utils.text import wash_for_xml
from invenio.config import \
CFG_BINDIR, \
CFG_WEBSUBMIT_BIBCONVERTCONFIGDIR
from invenio.legacy.websubmit.config import InvenioWebSubmitFunctionError
CFG_WEBSUBMIT_DUMMY_XML_NAME = "dummy_marcxml_rec"
def Make_Dummy_MARC_XML_Record(parameters, curdir, form, user_info=None):
"""
Make a dummy MARC XML record and store it in a submission's working-
directory.
This dummy record is not intended to be inserted into the Invenio
repository. Rather, it is intended as a way for other submission-
related functionalities to have access to the data submitted without
necessarily having to know the names of the files in which the
values were stored.
An example could be the publiline service: by using a dummy record
in the submission's directory in would be able to access an item's
information (e.g. title, etc) without having to know the name of the
title file, etc.
Another use for the dummy record could be, for example, creating a
preview of the submitted record information with bibconvert.
@param parameters: (dictionary) - must contain:
+ dummyrec_source_tpl: (string) - the name of the bibconvert
source template used for the creation of the dummy record.
+ dummyrec_create_tpl: (string) - the name of the bibconvert
create template used for the creation of the dummy record.
@param curdir: (string) - the current submission's working
directory.
@param form: (dictionary) - form fields.
@param user_info: (dictionary) - various information about the
submitting user (includes the
apache req object).
@return: (string) - empty string.
@Exceptions raised: InvenioWebSubmitFunctionError when an
unexpected error is encountered.
"""
## Get the apache request object from user_info: (we may use it for
## error reporting)
try:
req_obj = user_info['req']
except (KeyError, TypeError):
req_obj = None
## Strip whitespace from the names of the source and creation templates:
source_tpl = parameters['dummyrec_source_tpl'].replace(" ","")
create_tpl = parameters['dummyrec_create_tpl'].replace(" ","")
## Call bibconvert to create the MARC XML record:
cmd_bibconvert_call = "%s/bibconvert -l1 -d'%s' -Cs'%s/%s' -Ct'%s/%s' " \
"> %s/%s 2>/dev/null" \
% (CFG_BINDIR, \
curdir, \
CFG_WEBSUBMIT_BIBCONVERTCONFIGDIR, \
source_tpl, \
CFG_WEBSUBMIT_BIBCONVERTCONFIGDIR, \
create_tpl, \
curdir, \
CFG_WEBSUBMIT_DUMMY_XML_NAME)
errcode_bibconvert = os.system(cmd_bibconvert_call)
if errcode_bibconvert:
## There was a problem creating the dummy MARC XML record. Fail.
err_msg = "Error: Unable to create dummy MARC XML record [%s/%s]. " \
"Bibconvert failed with error code [%s]." \
% (curdir, \
CFG_WEBSUBMIT_DUMMY_XML_NAME, \
errcode_bibconvert)
raise InvenioWebSubmitFunctionError(err_msg)
## Bibconvert doesn't escape stuff for XML. Read the dummy record into
## memory, replace any "&" or "<" with "&amp;" and "&lt;", then re-write
## the dummy MARC XML record to the current dir:
try:
fp_dummyrec = open("%s/%s" % (curdir, \
CFG_WEBSUBMIT_DUMMY_XML_NAME), "r")
record_text = fp_dummyrec.read()
fp_dummyrec.close()
except IOError:
## Couldn't read the contents of dummy_marcxml_rec.
err_msg = "Error: Unable to create dummy MARC XML record [%s/%s]. " \
"Bibconvert reported no error, but the record was " \
"unreadable later." % (curdir, CFG_WEBSUBMIT_DUMMY_XML_NAME)
register_exception(req=req_obj, prefix=err_msg)
raise InvenioWebSubmitFunctionError(err_msg)
# Escape XML-reserved chars and clean the unsupported ones (mainly
# control characters)
record_text = wash_for_xml(record_text)
## Replace the "&":
record_text = record_text.replace("&amp;","&")
record_text = record_text.replace("&","&amp;")
## Now replace the "<":
record_text = record_text.replace("<","&lt;")
## Having replaced "<" everywhere in the record, put it back in known
## MARC XML tags:
record_text = record_text.replace("&lt;record","<record")
record_text = record_text.replace("&lt;/record","</record")
record_text = record_text.replace("&lt;datafield","<datafield")
record_text = record_text.replace("&lt;/datafield","</datafield")
record_text = record_text.replace("&lt;controlfield","<controlfield")
record_text = record_text.replace("&lt;/controlfield","</controlfield")
record_text = record_text.replace("&lt;subfield","<subfield")
record_text = record_text.replace("&lt;/subfield","</subfield")
## Finally, re-write the dummy MARC XML record to the current submission's
## working directory:
try:
fp_dummyrec = open("%s/%s" % (curdir, \
CFG_WEBSUBMIT_DUMMY_XML_NAME), "w")
fp_dummyrec.write(record_text)
fp_dummyrec.flush()
fp_dummyrec.close()
except IOError, err:
## Unable to write the dummy MARC XML record to curdir.
err_msg = "Error: Unable to create dummy MARC XML record [%s/%s]. " \
"After having escaped its data contents for XML, it could " \
"not be written back to the submission's working directory." \
% (curdir, CFG_WEBSUBMIT_DUMMY_XML_NAME)
register_exception(req=req_obj, prefix=err_msg)
raise InvenioWebSubmitFunctionError(err_msg)
## Return an empty string:
return ""
diff --git a/invenio/legacy/websubmit/functions/Move_Files_to_Storage.py b/invenio/legacy/websubmit/functions/Move_Files_to_Storage.py
index 4e362e522..82a439bac 100644
--- a/invenio/legacy/websubmit/functions/Move_Files_to_Storage.py
+++ b/invenio/legacy/websubmit/functions/Move_Files_to_Storage.py
@@ -1,270 +1,270 @@
## This file is part of Invenio.
## Copyright (C) 2007, 2008, 2009, 2010, 2011 CERN.
##
## Invenio is free software; you can redistribute it and/or
## modify it under the terms of the GNU General Public License as
## published by the Free Software Foundation; either version 2 of the
## License, or (at your option) any later version.
##
## Invenio is distributed in the hope that it will be useful, but
## WITHOUT ANY WARRANTY; without even the implied warranty of
## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
## General Public License for more details.
##
## You should have received a copy of the GNU General Public License
## along with Invenio; if not, write to the Free Software Foundation, Inc.,
## 59 Temple Place, Suite 330, Boston, MA 02111-1307, USA.
"""Function for archiving files"""
__revision__ = "$Id$"
from invenio.bibdocfile import \
BibRecDocs, \
decompose_file, \
InvenioBibDocFileError, \
CFG_BIBDOCFILE_DEFAULT_ICON_SUBFORMAT
import os
import re
from invenio.websubmit_icon_creator import create_icon
from invenio.legacy.websubmit.config import InvenioWebSubmitFunctionWarning
from invenio.legacy.websubmit.functions.Shared_Functions import get_dictionary_from_string, \
createRelatedFormats
-from invenio.errorlib import register_exception
+from invenio.ext.logging import register_exception
from invenio.config import CFG_BINDIR
from invenio.dbquery import run_sql
from invenio.shellutils import run_shell_command
def Move_Files_to_Storage(parameters, curdir, form, user_info=None):
"""
The function moves files received from the standard submission's
form through file input element(s). The document are assigned a
'doctype' (or category) corresponding to the file input element
(eg. a file uploaded throught 'DEMOPIC_FILE' will go to
'DEMOPIC_FILE' doctype/category).
Websubmit engine builds the following file organization in the
directory curdir/files:
curdir/files
|
_____________________________________________________________________
| | |
./file input 1 element's name ./file input 2 element's name ....
(for eg. 'DEMOART_MAILFILE') (for eg. 'DEMOART_APPENDIX')
| |
test1.pdf test2.pdf
There is only one instance of all possible extension(pdf, gz...) in each part
otherwise we may encounter problems when renaming files.
+ parameters['rename']: if given, all the files in curdir/files
are renamed. parameters['rename'] is of the form:
<PA>elemfilename[re]</PA>* where re is an regexp to select(using
re.sub) what part of the elem file has to be selected.
e.g: <PA>file:TEST_FILE_RN</PA>
+ parameters['documenttype']: if given, other formats are created.
It has 2 possible values: - if "picture" icon in gif format is created
- if "fulltext" ps, gz .... formats are created
+ parameters['paths_and_suffixes']: directories to look into and
corresponding suffix to add to every file inside. It must have
the same structure as a Python dictionnary of the following form
{'FrenchAbstract':'french', 'EnglishAbstract':''}
The keys are the file input element name from the form <=>
directories in curdir/files The values associated are the
suffixes which will be added to all the files in
e.g. curdir/files/FrenchAbstract
+ parameters['iconsize'] need only if 'icon' is selected in
parameters['documenttype']
+ parameters['paths_and_restrictions']: the restrictions to apply
to each uploaded file. The parameter must have the same
structure as a Python dictionnary of the following form:
{'DEMOART_APPENDIX':'restricted'}
Files not specified in this parameter are not restricted.
The specified restrictions can include a variable that can be
replaced at runtime, for eg:
{'DEMOART_APPENDIX':'restricted to <PA>file:SuE</PA>'}
+ parameters['paths_and_doctypes']: if a doctype is specified,
the file will be saved under the 'doctype/collection' instead
of under the default doctype/collection given by the name
of the upload element that was used on the websubmit interface.
to configure the doctype in websubmit, enter the value as in a
dictionnary, for eg:
{'PATHS_SWORD_UPL' : 'PUSHED_TO_ARXIV'} -> from
Demo_Export_Via_Sword [DEMOSWR] Document Types
"""
global sysno
paths_and_suffixes = parameters['paths_and_suffixes']
paths_and_restrictions = parameters['paths_and_restrictions']
rename = parameters['rename']
documenttype = parameters['documenttype']
iconsizes = parameters['iconsize'].split(',')
paths_and_doctypes = parameters['paths_and_doctypes']
## Create an instance of BibRecDocs for the current recid(sysno)
bibrecdocs = BibRecDocs(sysno)
paths_and_suffixes = get_dictionary_from_string(paths_and_suffixes)
paths_and_restrictions = get_dictionary_from_string(paths_and_restrictions)
paths_and_doctypes = get_dictionary_from_string(paths_and_doctypes)
## Go through all the directories specified in the keys
## of parameters['paths_and_suffixes']
for path in paths_and_suffixes.keys():
## Check if there is a directory for the current path
if os.path.exists("%s/files/%s" % (curdir, path)):
## Retrieve the restriction to apply to files in this
## directory
restriction = paths_and_restrictions.get(path, '')
restriction = re.sub('<PA>(?P<content>[^<]*)</PA>',
get_pa_tag_content,
restriction)
## Go through all the files in curdir/files/path
for current_file in os.listdir("%s/files/%s" % (curdir, path)):
## retrieve filename and extension
dummy, filename, extension = decompose_file(current_file)
if extension and extension[0] != ".":
extension = '.' + extension
if len(paths_and_suffixes[path]) != 0:
extension = "_%s%s" % (paths_and_suffixes[path], extension)
## Build the new file name if rename parameter has been given
if rename:
filename = re.sub('<PA>(?P<content>[^<]*)</PA>', \
get_pa_tag_content, \
parameters['rename'])
if rename or len(paths_and_suffixes[path]) != 0 :
## Rename the file
try:
# Write the log rename_cmd
fd = open("%s/rename_cmd" % curdir, "a+")
fd.write("%s/files/%s/%s" % (curdir, path, current_file) + " to " +\
"%s/files/%s/%s%s" % (curdir, path, filename, extension) + "\n\n")
## Rename
os.rename("%s/files/%s/%s" % (curdir, path, current_file), \
"%s/files/%s/%s%s" % (curdir, path, filename, extension))
fd.close()
## Save the new name in a text file in curdir so that
## the new filename can be used by templates to created the recmysl
fd = open("%s/%s_RENAMED" % (curdir, path), "w")
fd.write("%s%s" % (filename, extension))
fd.close()
except OSError, err:
msg = "Cannot rename the file.[%s]"
msg %= str(err)
raise InvenioWebSubmitFunctionWarning(msg)
fullpath = "%s/files/%s/%s%s" % (curdir, path, filename, extension)
## Check if there is any existing similar file
if not bibrecdocs.check_file_exists(fullpath, extension):
bibdoc = bibrecdocs.add_new_file(fullpath, doctype=paths_and_doctypes.get(path, path), never_fail=True)
bibdoc.set_status(restriction)
## Fulltext
if documenttype == "fulltext":
additionalformats = createRelatedFormats(fullpath)
if len(additionalformats) > 0:
for additionalformat in additionalformats:
try:
bibrecdocs.add_new_format(additionalformat)
except InvenioBibDocFileError:
pass
## Icon
elif documenttype == "picture":
has_added_default_icon_subformat_p = False
for iconsize in iconsizes:
try:
iconpath, iconname = create_icon({
'input-file' : fullpath,
'icon-scale' : iconsize,
'icon-name' : None,
'icon-file-format' : None,
'multipage-icon' : False,
'multipage-icon-delay' : 100,
'verbosity' : 0,
})
except Exception, e:
register_exception(prefix='Impossible to create icon for %s (record %s)' % (fullpath, sysno), alert_admin=True)
continue
iconpath = os.path.join(iconpath, iconname)
docname = decompose_file(fullpath)[1]
try:
mybibdoc = bibrecdocs.get_bibdoc(docname)
except InvenioBibDocFileError:
mybibdoc = None
if iconpath is not None and mybibdoc is not None:
try:
icon_suffix = iconsize.replace('>', '').replace('<', '').replace('^', '').replace('!', '')
if not has_added_default_icon_subformat_p:
mybibdoc.add_icon(iconpath)
has_added_default_icon_subformat_p = True
else:
mybibdoc.add_icon(iconpath, subformat=CFG_BIBDOCFILE_DEFAULT_ICON_SUBFORMAT + "-" + icon_suffix)
## Save the new icon filename in a text file in curdir so that
## it can be used by templates to created the recmysl
try:
if not has_added_default_icon_subformat_p:
fd = open("%s/%s_ICON" % (curdir, path), "w")
else:
fd = open("%s/%s_ICON_%s" % (curdir, path, iconsize + '_' + icon_suffix), "w")
fd.write(os.path.basename(iconpath))
fd.close()
except OSError, err:
msg = "Cannot store icon filename.[%s]"
msg %= str(err)
raise InvenioWebSubmitFunctionWarning(msg)
except InvenioBibDocFileError, e:
# Most probably icon already existed.
pass
elif mybibdoc is not None:
mybibdoc.delete_icon()
# Update the MARC
bibdocfile_bin = os.path.join(CFG_BINDIR, 'bibdocfile --yes-i-know')
run_shell_command(bibdocfile_bin + " --fix-marc --recid=%s", (str(sysno),))
# Delete the HB BibFormat cache in the DB, so that the fulltext
# links do not point to possible dead files
run_sql("DELETE LOW_PRIORITY from bibfmt WHERE format='HB' AND id_bibrec=%s", (sysno,))
return ""
def get_pa_tag_content(pa_content):
"""Get content for <PA>XXX</PA>.
@param pa_content: MatchObject for <PA>(.*)</PA>.
return: the content of the file possibly filtered by an regular expression
if pa_content=file[re]:a_file => first line of file a_file matching re
if pa_content=file*p[re]:a_file => all lines of file a_file, matching re,
separated by - (dash) char.
"""
pa_content = pa_content.groupdict()['content']
sep = '-'
out = ''
if pa_content.startswith('file'):
filename = ""
regexp = ""
if "[" in pa_content:
split_index_start = pa_content.find("[")
split_index_stop = pa_content.rfind("]")
regexp = pa_content[split_index_start+1:split_index_stop]
filename = pa_content[split_index_stop+2:]## ]:
else :
filename = pa_content.split(":")[1]
if os.path.exists(os.path.join(curdir, filename)):
fp = open(os.path.join(curdir, filename), 'r')
if pa_content[:5] == "file*":
out = sep.join(map(lambda x: re.split(regexp, x.strip())[-1], fp.readlines()))
else:
out = re.split(regexp, fp.readline().strip())[-1]
fp.close()
return out
diff --git a/invenio/legacy/websubmit/functions/Move_Revised_Files_to_Storage.py b/invenio/legacy/websubmit/functions/Move_Revised_Files_to_Storage.py
index ef90a1517..3fe75c5e5 100644
--- a/invenio/legacy/websubmit/functions/Move_Revised_Files_to_Storage.py
+++ b/invenio/legacy/websubmit/functions/Move_Revised_Files_to_Storage.py
@@ -1,420 +1,420 @@
## $Id: Move_Revised_Files_to_Storage.py,v 1.20 2009/03/26 13:48:42 jerome Exp $
## This file is part of Invenio.
## Copyright (C) 2009, 2010, 2011 CERN.
##
## Invenio is free software; you can redistribute it and/or
## modify it under the terms of the GNU General Public License as
## published by the Free Software Foundation; either version 2 of the
## License, or (at your option) any later version.
##
## Invenio is distributed in the hope that it will be useful, but
## WITHOUT ANY WARRANTY; without even the implied warranty of
## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
## General Public License for more details.
##
## You should have received a copy of the GNU General Public License
## along with Invenio; if not, write to the Free Software Foundation, Inc.,
## 59 Temple Place, Suite 330, Boston, MA 02111-1307, USA.
"""WebSubmit function - Archives uploaded files
TODO:
- Add parameter 'elementNameToFilename' so that files to revise can
be matched by name instead of doctype.
- Icons are created only for uploaded files, but not for related format
created on the fly.
"""
__revision__ = "$Id$"
import time
import os
from invenio.bibdocfile import \
InvenioBibDocFileError, \
BibRecDocs
-from invenio.errorlib import register_exception
+from invenio.ext.logging import register_exception
from invenio.websubmit_icon_creator import \
create_icon, InvenioWebSubmitIconCreatorError
from invenio.config import CFG_BINDIR
from invenio.dbquery import run_sql
from invenio.legacy.websubmit.functions.Shared_Functions import \
createRelatedFormats
from invenio.bibdocfile_managedocfiles import get_description_and_comment
def Move_Revised_Files_to_Storage(parameters, curdir, form, user_info=None):
"""
The function revises the files of a record with the newly uploaded
files.
This function can work only if you can define a mapping from the
WebSubmit element name that uploads the file, to the doctype of
the file. In most cases, the doctype is equivalent to the element
name, or just map to 'Main' doctype. That is typically the case if
you use the Move_Files_to_Storage.py function to upload the files
at submission step. For eg. with the DEMOBOO submission of the
Atlantis Demo site, a file is uploaded thanks to the DEMOBOO_FILE
element/File input, which is mapped to doctype DEMOBOO_FILE.
The function ignores files for which multiple files exist for a
single doctype in the record, or when several files are uploaded
with the same element name. If the record to revise does not have
a corresponding file, the file is inserted
This function is similar to Move_Uploaded_Files_to_Storage.py,
excepted that Move_Uploaded_Files_to_Storage relies on files
uploaded from the web interface created by
Create_Upload_Files_Interface.py, while this function relies on
the files uploaded by a regular WebSubmit page that you have built
from WebSubmit admin:
Regular WebSubmit interface --(upload file)--> Move_Revised_Files_to_Storage.py
Create_Upload_Files_Interface.py --(upload file)--> Move_Uploaded_Files_to_Storage.py
The main advantages of this function over the functions
Create_Upload_Files_Interface.py/Move_Uploaded_Files_to_Storage is
that it lets you customize the display of your submission in the
way you want, which could be simpler for your users if you usually
only upload a few and fixed number of files per record. The
disadvantages are that this function is not capable of : deleting
files, adding an alternative format to a file, add a variable
number of files, does not allow to set permissions at the level of
file, does not support user comments, renaming, etc.
@param parameters:(dictionary) - must contain:
+ elementNameToDoctype: maps an element/field name to a doctype.
Eg. the file uploaded from the
DEMOBOO_FILE element (input file tag)
should revise the file with document
type (doctype) "Main":
DEMOBOO_FILE=Main|DEMOBOO_FILE_2=ADDITIONAL
('=' separates element name and doctype
'|' separates each doctype/element name group)
In most cases, the element name == doctype:
DEMOBOO_FILE=DEMOBOO_FILE|DEMOBOO_FILE_2=DEMOBOO_FILE_2
+ createIconDoctypes: the list of doctypes for which an icon
should be created when revising the file.
Eg:
Figure|Graph
('|' separated values)
Use '*' for all doctypes
+ iconsize: size of the icon to create (when applicable)
+ keepPreviousVersionDoctypes: the list of doctypes for which
the function should keep previous
versions visible when revising a
file.
Eg:
Main|Additional
('|' separated values)
Default is all
+ createRelatedFormats: if uploaded files get converted to
whatever format we can (1) or not (0)
"""
# pylint: disable=E0602
# sysno is defined in the WebSubmit functions sandbox.
global sysno
bibrecdocs = BibRecDocs(int(sysno))
# Wash function parameters
(element_name_and_doctype, create_icon_doctypes, iconsize,
keep_previous_version_doctypes, createRelatedFormats_p) = \
wash_function_parameters(parameters, curdir)
for element_name, doctype in element_name_and_doctype:
_do_log(curdir, "Processing " + element_name)
# Check if there is a corresponding file
file_path = os.path.join(curdir, 'files', element_name,
read_file(curdir, element_name))
if file_path and os.path.exists(file_path):
# Now identify which file to revise
files_in_record = bibrecdocs.list_bibdocs(doctype)
if len(files_in_record) == 1:
# Ok, we can revise
bibdoc_name = bibrecdocs.get_docname(files_in_record[0].id)
revise(bibrecdocs, curdir, sysno, file_path,
bibdoc_name, doctype, iconsize,
create_icon_doctypes,
keep_previous_version_doctypes,
createRelatedFormats_p)
elif len(files_in_record) == 0:
# We must add the file
add(bibrecdocs, curdir, sysno, file_path,
doctype, iconsize, create_icon_doctypes,
createRelatedFormats_p)
else:
_do_log(curdir, " %s ignored, because multiple files found for same doctype %s in record %s: %s" %\
(element_name, doctype, sysno,
', '.join(files_in_record)))
else:
_do_log(curdir, " No corresponding file found (%s)" % file_path)
# Update the MARC
bibdocfile_bin = os.path.join(CFG_BINDIR, 'bibdocfile --yes-i-know')
os.system(bibdocfile_bin + " --fix-marc --recid=" + sysno)
# Delete the HB BibFormat cache in the DB, so that the fulltext
# links do not point to possible dead files
run_sql("DELETE LOW_PRIORITY from bibfmt WHERE format='HB' AND id_bibrec=%s", (sysno,))
# pylint: enable=E0602
def add(bibrecdocs, curdir, sysno, file_path, doctype,
iconsize, create_icon_doctypes, createRelatedFormats_p):
"""
Adds the file using bibdocfile
"""
try:
# Add file
bibdoc = bibrecdocs.add_new_file(file_path,
doctype,
never_fail=True)
_do_log(curdir, ' Added ' + bibrecdocs.get_docname(bibdoc.id) + ': ' + \
file_path)
# Add icon
iconpath = ''
if doctype in create_icon_doctypes or \
'*' in create_icon_doctypes:
iconpath = _create_icon(file_path, iconsize)
if iconpath is not None:
bibdoc.add_icon(iconpath)
_do_log(curdir, ' Added icon to ' + \
bibrecdocs.get_docname(bibdoc.id) + ': ' + iconpath)
# Automatically create additional formats when
# possible.
additional_formats = []
if createRelatedFormats_p:
additional_formats = createRelatedFormats(file_path,
overwrite=False)
for additional_format in additional_formats:
bibdoc.add_new_format(additional_format,
bibrecdocs.get_docname(bibdoc.id))
# Log
_do_log(curdir, ' Added format ' + additional_format + \
' to ' + bibrecdocs.get_docname(bibdoc.id) + ': ' + iconpath)
except InvenioBibDocFileError, e:
# Format already existed. How come? We should
# have checked this in Create_Upload_Files_Interface.py
register_exception(prefix='Move_Revised_Files_to_Storage ' \
'tried to add already existing file %s ' \
'to record %i. %s' % \
(file_path, sysno, curdir),
alert_admin=True)
def revise(bibrecdocs, curdir, sysno, file_path, bibdoc_name, doctype,
iconsize, create_icon_doctypes,
keep_previous_version_doctypes, createRelatedFormats_p):
"""
Revises the given bibdoc with a new file
"""
try:
# Retrieve the current description and comment, or they
# will be lost when revising
latest_files = bibrecdocs.list_bibdocs(doctype)[0].list_latest_files()
prev_desc, prev_comment = get_description_and_comment(latest_files)
if doctype in keep_previous_version_doctypes:
# Standard procedure, keep previous version
bibdoc = bibrecdocs.add_new_version(file_path,
bibdoc_name,
prev_desc,
prev_comment)
_do_log(curdir, ' Revised ' + bibrecdocs.get_docname(bibdoc.id) + \
' with : ' + file_path)
else:
# Soft-delete previous versions, and add new file
# (we need to get the doctype before deleting)
if bibrecdocs.has_docname_p(bibdoc_name):
# Delete only if bibdoc originally
# existed
bibrecdocs.delete_bibdoc(bibdoc_name)
_do_log(curdir, ' Deleted ' + bibdoc_name)
try:
bibdoc = bibrecdocs.add_new_file(file_path,
doctype,
bibdoc_name,
never_fail=True,
description=prev_desc,
comment=prev_comment)
_do_log(curdir, ' Added ' + bibrecdocs.get_docname(bibdoc.id) + ': ' + \
file_path)
except InvenioBibDocFileError, e:
_do_log(curdir, str(e))
register_exception(prefix='Move_Uploaded_Files_to_Storage ' \
'tried to revise a file %s ' \
'named %s in record %i. %s' % \
(file_path, bibdoc_name, sysno, curdir),
alert_admin=True)
# Add icon
iconpath = ''
if doctype in create_icon_doctypes or \
'*' in create_icon_doctypes:
iconpath = _create_icon(file_path, iconsize)
if iconpath is not None:
bibdoc.add_icon(iconpath)
_do_log(curdir, 'Added icon to ' + \
bibrecdocs.get_docname(bibdoc.id) + ': ' + iconpath)
# Automatically create additional formats when
# possible.
additional_formats = []
if createRelatedFormats_p:
additional_formats = createRelatedFormats(file_path,
overwrite=False)
for additional_format in additional_formats:
bibdoc.add_new_format(additional_format,
bibdoc_name,
prev_desc,
prev_comment)
# Log
_do_log(curdir, ' Addeded format ' + additional_format + \
' to ' + bibrecdocs.get_docname(bibdoc.id) + ': ' + iconpath)
except InvenioBibDocFileError, e:
# Format already existed. How come? We should
# have checked this in Create_Upload_Files_Interface.py
register_exception(prefix='Move_Revised_Files_to_Storage ' \
'tried to revise a file %s ' \
'named %s in record %i. %s' % \
(file_path, bibdoc_name, sysno, curdir),
alert_admin=True)
def wash_function_parameters(parameters, curdir):
"""
Returns the functions (admin-defined) parameters washed and
initialized properly, as a tuple:
Parameters:
check Move_Revised_Files_to_Storage(..) docstring
Returns:
tuple (element_name_and_doctype, create_icon_doctypes, iconsize,
keep_previous_version_doctypes, createRelatedFormats_p)
"""
# The mapping element name -> doctype.
# '|' is used to separate mapping groups, and '=' to separate
# element name and doctype.
# Eg: DEMOBOO_FILE=Main|DEMOBOO_FILEADDITIONAL=Additional File
element_name_and_doctype = [mapping.strip().split("=") for mapping \
in parameters['elementNameToDoctype'].split('|') \
if mapping.strip() != '']
# The list of doctypes for which we want to create an icon
# (list of values separated by "|")
create_icon_doctypes = [doctype.strip() for doctype \
in parameters['createIconDoctypes'].split('|') \
if doctype.strip() != '']
# If we should create additional formats when applicable (1) or
# not (0)
try:
createRelatedFormats_p = int(parameters['createRelatedFormats'])
except ValueError, e:
createRelatedFormats_p = False
# Icons size
iconsize = parameters.get('iconsize')
# The list of doctypes for which we want to keep previous versions
# of files visible.
# (list of values separated by "|")
keep_previous_version_doctypes = [doctype.strip() for doctype \
in parameters['keepPreviousVersionDoctypes'].split('|') \
if doctype.strip() != '']
if not keep_previous_version_doctypes:
# Nothing specified: keep all by default
keep_previous_version_doctypes = [doctype for (elem, doctype) \
in element_name_and_doctype]
return (element_name_and_doctype, create_icon_doctypes, iconsize,
keep_previous_version_doctypes, createRelatedFormats_p)
def _do_log(log_dir, msg):
"""
Log what we have done, in case something went wrong.
Nice to compare with bibdocactions.log
Should be removed when the development is over.
"""
log_file = os.path.join(log_dir, 'performed_actions.log')
file_desc = open(log_file, "a+")
file_desc.write("%s --> %s\n" %(time.strftime("%Y-%m-%d %H:%M:%S"), msg))
file_desc.close()
def _create_icon(file_path, icon_size, format='gif', verbosity=9):
"""
Creates icon of given file.
Returns path to the icon. If creation fails, return None, and
register exception (send email to admin).
Parameters:
- file_path : *str* full path to icon
- icon_size : *int* the scaling information to be used for the
creation of the new icon.
- verbosity : *int* the verbosity level under which the program
is to run;
"""
icon_path = None
try:
filename = os.path.splitext(os.path.basename(file_path))[0]
(icon_dir, icon_name) = create_icon(
{'input-file':file_path,
'icon-name': "icon-%s" % filename,
'multipage-icon': False,
'multipage-icon-delay': 0,
'icon-scale': icon_size,
'icon-file-format': format,
'verbosity': verbosity})
icon_path = icon_dir + os.sep + icon_name
except InvenioWebSubmitIconCreatorError, e:
register_exception(prefix='Icon for file %s could not be created: %s' % \
(file_path, str(e)),
alert_admin=False)
return icon_path
def read_file(curdir, filename):
"""
Reads a file in curdir.
Returns None if does not exist, cannot be read, or if file is not
really in curdir
"""
try:
file_path = os.path.abspath(os.path.join(curdir, filename))
if not file_path.startswith(curdir):
return None
file_desc = file(file_path, 'r')
content = file_desc.read()
file_desc.close()
except:
content = None
return content
diff --git a/invenio/legacy/websubmit/functions/Print_Success_APP.py b/invenio/legacy/websubmit/functions/Print_Success_APP.py
index 408e8b17b..c76d7a209 100644
--- a/invenio/legacy/websubmit/functions/Print_Success_APP.py
+++ b/invenio/legacy/websubmit/functions/Print_Success_APP.py
@@ -1,140 +1,140 @@
## This file is part of Invenio.
## Copyright (C) 2004, 2005, 2006, 2007, 2008, 2010, 2011 CERN.
##
## Invenio is free software; you can redistribute it and/or
## modify it under the terms of the GNU General Public License as
## published by the Free Software Foundation; either version 2 of the
## License, or (at your option) any later version.
##
## Invenio is distributed in the hope that it will be useful, but
## WITHOUT ANY WARRANTY; without even the implied warranty of
## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
## General Public License for more details.
##
## You should have received a copy of the GNU General Public License
## along with Invenio; if not, write to the Free Software Foundation, Inc.,
## 59 Temple Place, Suite 330, Boston, MA 02111-1307, USA.
"""Return a message to the user's browser saying that their decision was taken
into account. Intended for use in an approval submission (the referee should
be the recipient of any message created by this function.)
"""
__revision__ = "$Id$"
import os
import cgi
-from invenio.errorlib import register_exception
+from invenio.ext.logging import register_exception
def Print_Success_APP(parameters, curdir, form, user_info=None):
"""Return a message to be displayed by the referee's browser after (s)he
has refereed an item.
@param parameters: (dictionary) - parameters needed by this function.
Contains:
+ decision_file: (string) - the name of the file in which the
referee's decision is stored.
+ newrnin: (string) - the name of the file in which the
new report number is stored.
@param curdir: (string) - the current submission's working directory.
@param form: (dictionary) - submitted form values.
@param user_info: (dictionary) - information about the user.
@return: (string) - a message to be displayed by the user's browser.
"""
global rn ## Unfortunately, it's necessary to use the magic "rn" global:
## Get the name of the decision file:
try:
decision_filename = parameters['decision_file']
except KeyError:
decision_filename = ""
## If a new report number has been generated, retrieve it:
try:
newrnpath = parameters['newrnin']
except KeyError:
register_exception()
newrnpath = ""
else:
if newrnpath in (None, "None"):
newrnpath = ""
newrnpath = os.path.basename(newrnpath)
newrn = ""
if newrnpath != "" and os.path.exists("%s/%s" % (curdir, newrnpath)):
try:
fp = open("%s/%s" % (curdir, newrnpath) , "r")
newrn = fp.read()
fp.close()
except IOError:
register_exception()
newrn = ""
else:
newrn = ""
## Now try to read the decision from the decision_filename:
if decision_filename in (None, "", "NULL"):
## We don't have a name for the decision file.
## For backward compatibility reasons, try to read the decision from
## a file called 'decision' in curdir:
if os.path.exists("%s/decision" % curdir):
try:
fh_decision = open("%s/decision" % curdir, "r")
decision = fh_decision.read()
fh_decision.close()
except IOError:
## Unable to open the decision file
exception_prefix = "Error in WebSubmit function " \
"Print_Success_APP. Tried to open " \
"decision file [%s/decision] but was " \
"unable to." % curdir
register_exception(prefix=exception_prefix)
decision = ""
else:
decision = decision.strip()
else:
decision = ""
else:
## Try to read the decision from the decision file:
try:
fh_decision = open("%s/%s" % (curdir, decision_filename), "r")
decision = fh_decision.read()
fh_decision.close()
except IOError:
## Oops, unable to open the decision file.
decision = ""
exception_prefix = "Error in WebSubmit function " \
"Print_Success_APP. Tried to open decision " \
"file [%s/%s] but was unable to." \
% (curdir, decision_filename)
register_exception(prefix=exception_prefix)
else:
decision = decision.strip()
## Create the message:
if decision != "":
additional_info_approve = "The item will now be integrated into " \
"the relevant collection with the " \
"reference number <b>%s</b>." \
% ((newrn == "" and cgi.escape(rn)) or \
cgi.escape(newrn))
msg = "<br /><div>Your decision was: <b>%(decision)s</b>.<br />\n" \
"It has been taken into account.<br />\n" \
"%(additional-info)s</div><br />\n" \
% { 'decision' : cgi.escape(decision),
'additional-info' : ((decision == "approve" and \
additional_info_approve) \
or ""),
}
else:
## Since the decision could not be read from the decision file, we will
## just display a generic "thank you for your decision" message.
## FIXME: We should really report this to WebSubmit core.
msg = "<br /><div>Thank you for your decision.</div><br />\n"
## Return the message to WebSubmit core.
return msg
diff --git a/invenio/legacy/websubmit/functions/Register_Approval_Request.py b/invenio/legacy/websubmit/functions/Register_Approval_Request.py
index 0d7648730..630ceb3a2 100644
--- a/invenio/legacy/websubmit/functions/Register_Approval_Request.py
+++ b/invenio/legacy/websubmit/functions/Register_Approval_Request.py
@@ -1,397 +1,397 @@
## This file is part of Invenio.
## Copyright (C) 2008, 2010, 2011 CERN.
##
## Invenio is free software; you can redistribute it and/or
## modify it under the terms of the GNU General Public License as
## published by the Free Software Foundation; either version 2 of the
## License, or (at your option) any later version.
##
## Invenio is distributed in the hope that it will be useful, but
## WITHOUT ANY WARRANTY; without even the implied warranty of
## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
## General Public License for more details.
##
## You should have received a copy of the GNU General Public License
## along with Invenio; if not, write to the Free Software Foundation, Inc.,
## 59 Temple Place, Suite 330, Boston, MA 02111-1307, USA.
"""At the time of a "request for approval" submission, register the request in
the WebSubmit "Approvals" DB (sbmAPPROVAL).
"""
__revision__ = "$Id$"
import sre_constants
import os
import cgi
import re
from invenio.websubmit_dblayer import register_new_approval_request, \
get_simple_approval_status, \
update_approval_request_status
from invenio.legacy.websubmit.functions.Shared_Functions import ParamFromFile
from invenio.legacy.websubmit.config import InvenioWebSubmitFunctionError, \
InvenioWebSubmitFunctionStop
-from invenio.errorlib import register_exception
+from invenio.ext.logging import register_exception
from invenio.config import CFG_SITE_SUPPORT_EMAIL
def Register_Approval_Request(parameters, curdir, form, user_info=None):
"""
This function is used at the time of a "request for approval" submission
in order to register the request in the WebSubmit "Approvals" DB
(sbmAPPROVAL).
At the time of approval request, the document could be in one of
several different approval "states" and depending upon that state,
the action taken by this function differs. The states are as
follows:
* Approval for the document has never been requested.
-> In this case, a new row for the document is inserted into the
approvals table with the "waiting" state.
* Approval of the document has previously been requested and it is
still in the "waiting" state.
-> In this case, the date of last request for the document is
updated in the approvals table.
* Approval of the document has previously been requested, but the
document was rejected.
-> In this case, the function will halt the submission with a
message informing the user that approval of the document was
already rejected.
* Approval of the document has previously been requested and it has
been approved.
-> In this case, the function will halt the submission with a
message informing the user that the document has already
been approved and that no further action is necessary.
* Approval of the document has previously been requested, but the
request withdrawn.
-> In this case, the function will update the "approval status"
of the document to "waiting" and will return a message
informing the user that although the approval request was
previously withdrawn, it has been requested again.
@param categ_file_appreq: (string) - some document types are
separated into different categories, each of which has its own
referee(s).
In such document types, it's necessary to know the document-
type's category in order to choose the referee.
This parameter provides a means by which the category information
can be extracted from a file in the current submission's working
directory. It should therefore be a filename.
@param categ_rnseek_appreq: (string) - some document types are
separated into different categories, each of which has its own
referee(s).
In such document types, it's necessary to know the document-
type's category in order to choose the referee.
This parameter provides a means by which the category information
can be extracted from the document's reference number.
It is infact a string that will be compiled into a regexp and
an attempt will be made to match it agains the document's reference
number starting from the left-most position.
The only pre-requisite is that the segment in which the category is
sought should be indicated with <CATEGORY>.
Thus, an example might be as follows:
ATL(-COM)?-<CATEGORY>-.+
This would allow "PHYS" in the following reference number to be
recognised as the category:
ATL-COM-PHYS-2008-001
@param note_file_appreq: (string) - the name of the file from which
any "notes" to be added into the approval request's "note" field in
the database are to be read. (File must be in the current submission's
working directory.)
@return: (string) - a message for the user.
@Exceptions raised: + InvenioWebSubmitFunctionStop when the submission
should be halted.
+ InvenioWebSubmitFunctionError when an unexpected
error has been encountered and execution cannot
continue.
"""
## Get the reference number (as global rn - sorry!) and the document type:
global rn
doctype = form['doctype']
## A string variable to contain any information that should be displayed
## in the user's browser:
info_out = ""
########
## Get the parameters from the list:
########
## Get the name of the category file:
#######
try:
## If it has been provided, get the name of the file in which the
## category is stored:
category_file = parameters["categ_file_appreq"]
except KeyError:
## No value given for the category file:
category_file = None
else:
if category_file is not None:
category_file = str(category_file)
category_file = os.path.basename(category_file).strip()
if category_file == "":
category_file = None
########
## Get the name of the "note" file and read its value:
########
note = "" ## variable to hold a note to be added to the approval request's
## details in the DB.
try:
note_file = parameters["note_file_appreq"]
except KeyError:
## No value given for the category file:
note_file = None
else:
if note_file is not None:
note_file = str(note_file)
note_file = os.path.basename(note_file).strip()
if note_file == "":
note_file = None
if note_file is not None:
note = ParamFromFile("%s/%s" % (curdir, note_file))
########
## Get the regexp that is used to find the category in the report number:
########
try:
## If it has been provided, get the regexp used for identifying
## a document-type's category from its reference number:
category_rn_regexp = parameters["categ_rnseek_appreq"]
except KeyError:
## No value given for the category regexp:
category_rn_regexp = None
else:
if category_rn_regexp is not None:
category_rn_regexp = str(category_rn_regexp).strip()
if category_rn_regexp == "":
category_rn_regexp = None
#######
## Resolve the document type's category:
##
## This is a long process. The end result is that the category is extracted
## either from a file in curdir, or from the report number.
## If it's taken from the report number, the admin must configure the
## function to accept a regular expression that is used to find the
## category in the report number.
##
if category_file is not None and category_rn_regexp is not None:
## It is not valid to have both a category file and a pattern
## describing how to extract the category from a report number.
## raise an InvenioWebSubmitFunctionError
msg = "Error in Register_Approval_Request function: received " \
"instructions to search for the document's category in " \
"both its report number AND in a category file. Could " \
"not determine which to use - please notify the " \
"administrator."
raise InvenioWebSubmitFunctionError(msg)
elif category_file is not None:
## Attempt to recover the category information from a file in the
## current submission's working directory:
category = ParamFromFile("%s/%s" % (curdir, category_file))
if category is not None:
category = category.strip()
if category in (None, ""):
## The category cannot be resolved.
msg = "Error in Register_Approval_Request function: received " \
"instructions to search for the document's category in " \
"a category file, but could not recover the category " \
"from that file. An approval request therefore cannot " \
"be registered for the document."
raise InvenioWebSubmitFunctionError(msg)
elif category_rn_regexp is not None:
## Attempt to recover the category information from the document's
## reference number using the regexp in category_rn_regexp:
##
## Does the category regexp contain the key-phrase "<CATEG>"?
if category_rn_regexp.find("<CATEG>") != -1:
## Yes. Replace "<CATEG>" with "(?P<category>.+?)".
## For example, this:
## ATL(-COM)?-<CATEG>-
## Will be transformed into this:
## ATL(-COM)?-(?P<category>.+?)-
category_rn_final_regexp = \
category_rn_regexp.replace("<CATEG>", r"(?P<category>.+?)", 1)
else:
## The regexp for category didn't contain "<CATEG>", but this is
## mandatory.
msg = "Error in Register_Approval_Request function: The " \
"[%(doctype)s] submission has been configured to search " \
"for the document type's category in its reference number, " \
"using a poorly formed search expression (no marker for " \
"the category was present.) Since the document's category " \
"therefore cannot be retrieved, an approval request cannot " \
"be registered for it. Please report this problem to the " \
"administrator." \
% { 'doctype' : doctype, }
raise InvenioWebSubmitFunctionError(msg)
##
try:
## Attempt to compile the regexp for finding the category:
re_categ_from_rn = re.compile(category_rn_final_regexp)
except sre_constants.error:
## The expression passed to this function could not be compiled
## into a regexp. Register this exception and raise an
## InvenioWebSubmitFunctionError:
exception_prefix = "Error in Register_Approval_Request function: " \
"The [%(doctype)s] submission has been " \
"configured to search for the document type's " \
"category in its reference number, using the " \
"following regexp: /%(regexp)s/. This regexp, " \
"however, could not be compiled correctly " \
"(created it from %(categ-search-term)s.)" \
% { 'doctype' : doctype, \
'regexp' : category_rn_final_regexp, \
'categ-search-term' : category_rn_regexp, }
register_exception(prefix=exception_prefix)
msg = "Error in Register_Approval_Request function: The " \
"[%(doctype)s] submission has been configured to search " \
"for the document type's category in its reference number, " \
"using a poorly formed search expression. Since the " \
"document's category therefore cannot be retrieved, an " \
"approval request cannot be registered for it. Please " \
"report this problem to the administrator." \
% { 'doctype' : doctype, }
raise InvenioWebSubmitFunctionError(msg)
else:
## Now attempt to recover the category from the RN string:
m_categ_from_rn = re_categ_from_rn.match(rn)
if m_categ_from_rn is not None:
## The pattern matched in the string.
## Extract the category from the match:
try:
category = m_categ_from_rn.group("category")
except IndexError:
## There was no "category" group. That group is mandatory.
exception_prefix = \
"Error in Register_Approval_Request function: The " \
"[%(doctype)s] submission has been configured to " \
"search for the document type's category in its " \
"reference number using the following regexp: " \
"/%(regexp)s/. The search produced a match, but " \
"there was no \"category\" group in the match " \
"object although this group is mandatory. The " \
"regexp was compiled from the following string: " \
"[%(categ-search-term)s]." \
% { 'doctype' : doctype, \
'regexp' : category_rn_final_regexp, \
'categ-search-term' : category_rn_regexp, }
register_exception(prefix=exception_prefix)
msg = "Error in Register_Approval_Request function: The " \
"[%(doctype)s] submission has been configured to " \
"search for the document type's category in its " \
"reference number, using a poorly formed search " \
"expression (there was no category marker). Since " \
"the document's category therefore cannot be " \
"retrieved, an approval request cannot be " \
"registered for it. Please report this problem to " \
"the administrator." \
% { 'doctype' : doctype, }
raise InvenioWebSubmitFunctionError(msg)
else:
category = category.strip()
if category == "":
msg = "Error in Register_Approval_Request function: " \
"The [%(doctype)s] submission has been " \
"configured to search for the document type's " \
"category in its reference number, but no " \
"category was found. The request for approval " \
"cannot be registered. Please report this " \
"problem to the administrator." \
% { 'doctype' : doctype, }
raise InvenioWebSubmitFunctionError(msg)
else:
## No match. Cannot find the category and therefore cannot
## continue:
msg = "Error in Register_Approval_Request function: The " \
"[%(doctype)s] submission has been configured to " \
"search for the document type's category in its " \
"reference number, but no match was made. The request " \
"for approval cannot be registered. Please report " \
"this problem to the administrator." \
% { 'doctype' : doctype, }
raise InvenioWebSubmitFunctionError(msg)
else:
## The document type has no category.
category = ""
##
## End of category recovery
#######
#######
##
## Query the "approvals" DB table to determine whether approval of this
## document has already been requested:
approval_status = get_simple_approval_status(doctype, rn)
if approval_status is None:
## Approval has never been requested for this document. Register the
## new request.
register_new_approval_request(doctype, category, rn, note)
elif approval_status.lower() == "approved":
## This document has already been approved. Stop and inform the user
## of this.
msg = """
<br />
<div>
<span style="color: red;">Note:</span> The document %s has already been
Approved.<br />
No further approval is necessary - no further action will be taken.
</div>
""" % cgi.escape(rn)
raise InvenioWebSubmitFunctionStop(msg)
elif approval_status.lower() == "rejected":
## This document has already been rejected. Stop and inform the user
## of this.
msg = """
<br />
<div>
<span style="color: red;">Note:</span> Approval of the document [%s] has
previously been rejected.<br />
Approval has NOT been resubmitted and no further action will be taken.<br />
If you believe this to be an error, please contact %s, quoting the<br />
document's report-number [%s] and describing the problem.
</div>
""" % (cgi.escape(rn), cgi.escape(CFG_SITE_SUPPORT_EMAIL), cgi.escape(rn))
raise InvenioWebSubmitFunctionStop(msg)
elif approval_status.lower() == "withdrawn":
## An approval request for this document type was already made at some
## point. Update it and inform the user that the approval request has
## been logged despite having been previously withdrawn:
update_approval_request_status(doctype, rn, note=note)
info_out += """
<br />
<div>
<span style="color: red;">Note:</span> An approval request for this document
had previously been withdrawn.<br />
Approval has been requested again.
</div>
"""
elif approval_status.lower() == "waiting":
## An approval request for this document has already been registered
## but it is awaiting a decision.
## Update the date/time of the last request and inform the user that
## although approval had already been requested for this document,
## their approval request has been made again.
update_approval_request_status(doctype, rn, note=note)
info_out += """
<br />
<div>
<span style="color: red;">Note:</span> Although a request for the approval
of this document had already been submitted, your new request has been
registered.<br />
</div>
"""
else:
## The document had an unrecognised "status". Raise an error.
msg = "Error in Register_Approval_Request function: The " \
"[%(reportnum)s] document has an unknown approval status " \
"(%(status)s). Unable to request its approval. Please report " \
"this problem to the administrator." \
% { 'reportnum' : rn,
'status' : approval_status, }
raise InvenioWebSubmitFunctionError(msg)
##
## Finished - return any message to be displayed on the user's screen.
return info_out
diff --git a/invenio/legacy/websubmit/functions/Second_Report_Number_Generation.py b/invenio/legacy/websubmit/functions/Second_Report_Number_Generation.py
index 301cec303..58f4db951 100644
--- a/invenio/legacy/websubmit/functions/Second_Report_Number_Generation.py
+++ b/invenio/legacy/websubmit/functions/Second_Report_Number_Generation.py
@@ -1,380 +1,380 @@
# -*- coding: utf-8 -*-
## This file is part of Invenio.
## Copyright (C) 2008, 2009, 2010, 2011 CERN.
##
## Invenio is free software; you can redistribute it and/or
## modify it under the terms of the GNU General Public License as
## published by the Free Software Foundation; either version 2 of the
## License, or (at your option) any later version.
##
## Invenio is distributed in the hope that it will be useful, but
## WITHOUT ANY WARRANTY; without even the implied warranty of
## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
## General Public License for more details.
##
## You should have received a copy of the GNU General Public License
## along with Invenio; if not, write to the Free Software Foundation, Inc.,
## 59 Temple Place, Suite 330, Boston, MA 02111-1307, USA.
"""This function is based upon Report_Number_Generation.
Whereas Report_Number_Generation is used to generate the FIRST report-number
for a record, this function should be used to generate the SECOND report
number, should one be required.
A good example of its use may be when a document has been approved and is
given another report number after approval.
The generated report number will be saved into a file (name specified in one
of the function's parameters) in the submission's working directory.
"""
__revision__ = "$Id$"
import cgi
import re
import os
import time
from invenio.config import CFG_SITE_SUPPORT_EMAIL
from invenio.legacy.websubmit.config import InvenioWebSubmitFunctionError
-from invenio.errorlib import register_exception
+from invenio.ext.logging import register_exception
from invenio.legacy.websubmit.functions.Report_Number_Generation import create_reference
def Second_Report_Number_Generation(parameters, curdir, form, user_info=None):
"""
This function's task is to generate a SECONDARY report number.
Some document types require more than one report number. The function
"Report_Number_Generation" should be used to generate the PRIMARY
report number (for various reasons, including the fact that that
function populates the global "rn" with the document's main report
number). This function then, should be used to generate the secondary
report number.
This function doesn't populate any global variable with the secondary
report number, meaning that it should only be called if a secondary
report number actually needs to be generated by the system (i.e. not
if the user is to supply this secondary report number via the submission
form.
A use case for this function could be when a document is approved as
an official note of some sort. Before approval for example, it could
be classed as a Communication and have its own "Communication number".
At approval time however, it could be given a new "official note"
number, and this function could be used to generate the new number.
So in short, the function behaves rather like Report_Number_Generation
and was infact based upon that function. It:
+ Creates a "report number" according to some format and using some
counter files.
+ Writes that new "report number" to a file in the submission's
working directory.
(** If the "second report number" file already exists in the
submission's working directory, it merely exits silently.)
Parameters:
@param 2nd_counterpath: (string) - the path to the counter file that
is used to create the report number.
The counter path can make use of <PA></PA> in order to specify
some value that should be included in the path:
<PA>yy</PA> --> Include the year in the path
<PA>categ</PA> --> Include the submission's category in the path.
<PA>file[re]:name_of_file[regular expression to match]</PA> -->
Include the first line of file (in curdir), matching [re]
<PA>file*[re]:name_of_file [regular expression to match]</PA> -->
Include all the lines of a file (in curdir), matching [re]
separated by - (dash) char.
@param 2nd_rn_file: (string) - the name of the file that is to be
created containing the secondary report number. The file will be
created in the submission's working directory.
@param 2nd_rn_format: (string) - The format according to which the
secondary report number will be created.
@param 2nd_rncateg_file: (string) - the name of the file (in the
submission's working directory) that contains the category of the
document.
The value in this file can be put into the report number by
including <PA>categ</PA> anywhere that it is needed in the report-
number format.
@param 2nd_rn_yeargen: (string) - the instruction used for generating
the year if one is to be used in the report number.
The parameter should take either the value "AUTO" - in which case
the year component of the report number will be a 4-digit
representation for the current year - or - the name of a file in
the submission's working directory that contains the year that should
be included in the report number.
Note, if the parameter contains the name of a file, it will be assumed
that if the length of its contents is 10 chars, its value will be a
date in the format "dd/mm/yyyy" and the last 4 characters will be
taken as the year. Otherwise if the length is not 10 characters, it
will be assumed that the file simply contained a year and its
contents will be taken as-is. If the file cannot be opened, an
InvenioWebSubmitFunctionError exception will be raised. If no value
is provided for this parameter, the year in the format YYYY will be
used.
The value that is finally created using this parameter for year will
be used in the final report number anywhere that the format contains
<PA>yy</PA>.
Note:
Tages that use <PA></PA> can take values as follows:
<PA>yy</PA> --> Include the year (as determined by 2nd_rn_yeargen).
<PA>categ</PA> --> Include the submission's category.
<PA>file[re]:name_of_file[regular expression to match]</PA> -->
Include the first line of file (in curdir), matching [re]
<PA>file*[re]:name_of_file [regular expression to match]</PA> -->
Include all the lines of a file (in curdir), matching [re]
separated by - (dash) char.
@param 2nd_nb_length: (string) the number of digits for the
report number. Eg: '3' for XXX-YYYY-025 or '4' for
XXX-YYYY-0025. If more are needed (all available digits have
been used), the length is automatically extended. Choose 1 to
never have leading zeros. Default length: 3.
@return: (string) - empty string.
@Exceptions raised: InvenioWebSubmitFunctionError - upon unexpected
error.
"""
######################
## Internal function definition:
######################
def get_pa_tag_content(pa_content):
"""Get content for <PA>XXX</PA>.
@param pa_content: MatchObject for <PA>(.*)</PA>.
return: if pa_content=yy => 4 digits year
if pa_content=categ =>category
if pa_content=file[re]:a_file => first line of file a_file matching re
if pa_content=file*p[re]:a_file => all lines of file a_file, matching re,
separated by - (dash) char.
"""
pa_content=pa_content.groupdict()['content']
sep = '-'
out = ''
if pa_content == 'yy':
out = yy
elif pa_content == 'categ':
out = category
elif pa_content.startswith('file'):
filename = ""
with_regexp = 0
regexp = ""
if "[" in pa_content:
with_regexp = 1
split_index_start = pa_content.find("[")
split_index_stop = pa_content.rfind("]")
regexp = pa_content[split_index_start+1:split_index_stop]
filename = pa_content[split_index_stop+2:]#]:
else :
filename = pa_content.split(":")[1]
if os.path.exists(os.path.join(curdir, filename)):
fp = open(os.path.join(curdir, filename), 'r')
if pa_content[:5]=="file*":
out = sep.join(map(lambda x: re.split(regexp, x.strip())[-1], fp.readlines()))
else:
out = re.split(regexp, fp.readline().strip())[-1]
fp.close()
return out
######################
## End of internal function definition:
######################
document_type = form['doctype']
access_number = form['access']
############
## Get parameter values and sanitize them:
############
############
## Report number length
############
new_nb_length = 3
if parameters.has_key('2nd_nb_length') and \
parameters['2nd_nb_length'].isdigit():
new_nb_length = int(parameters['2nd_nb_length'])
############
## Category file name - when category is included in the new report number
############
try:
new_rn_categ_filename = parameters['2nd_rncateg_file']
except KeyError:
new_rn_categ_filename = ""
else:
if new_rn_categ_filename is None:
new_rn_categ_filename = ""
## Get the "basename" for the report-number file:
new_rn_categ_filename = os.path.basename(new_rn_categ_filename).strip()
if new_rn_categ_filename != "" and \
os.path.exists("%s/%s" % (curdir, new_rn_categ_filename)):
try:
fh_category = open("%s/%s" % (curdir, new_rn_categ_filename), "r")
category = fh_category.read()
fh_category.close()
except IOError:
register_exception()
else:
## No newlines in category:
category = category.replace("\n", "").replace("\r", "")
else:
category = ""
############
## Get the details of whether to automatically generate the year, or
## whether to get it from a file (if the report number uses a year.
############
try:
new_rn_yeargen = parameters['2nd_rn_yeargen']
except IOError:
new_rn_yeargen = ""
else:
if new_rn_yeargen is None:
new_rn_yeargen = ""
if new_rn_yeargen == "AUTO":
## If the function is configured to automatically generate the year,
## it should take the format "YYYY" (e.g. 2008). It should also be the
## current year:
yy = time.strftime("%Y")
elif new_rn_yeargen != "":
## Apparently, the value to be used for year should be taken from a
## file.
new_rn_yeargen = os.path.basename(new_rn_yeargen).strip()
if new_rn_yeargen != "" and \
os.path.exists("%s/%s" % (curdir, new_rn_yeargen)):
try:
fh_year = open("%s/%s" % (curdir, new_rn_yeargen), "r")
yy = fh_year.read()
fh_year.close()
except IOError:
err_msg = "Error in Second_Report_Number_Generation: It " \
"wasn't possible to open the file containing " \
"the year: [%s]. Please report this problem to " \
"[%s]." % (cgi.escape(new_rn_yeargen), \
cgi.escape(CFG_SITE_SUPPORT_EMAIL))
register_exception(prefix=err_msg)
raise InvenioWebSubmitFunctionError(err_msg)
else:
## It is assumed that the contents of the date file will be
## either the year (in the format YYYY) or the date (in the
## format DD/MM/YYYY). If it is 10 chars in length, we take
## the last 4, assuming that they are the year component of
## the date. If not, we take the whole string, assuming that
## it is just the year anyway.
yy = yy.strip()
if len(yy) == 10:
yy = yy[-4:]
elif new_rn_yeargen != "":
## Although a "valid" filename for the 2nd_rn_yeargen parameter had
## been provided, the file didn't exist.
err_msg = "Error in Second_Report_Number_Generation: It " \
"wasn't possible to open the file containing " \
"the year: [%s]. Please report this problem to " \
"[%s]." % (cgi.escape(new_rn_yeargen), \
cgi.escape(CFG_SITE_SUPPORT_EMAIL))
raise InvenioWebSubmitFunctionError(err_msg)
else:
## The filename provided for the 2nd_rn_yeargen parameter was
## invalid.
err_msg = "Error in Second_Report_Number_Generation: The " \
"function has been configured with an invalid " \
"filename for the year (2nd_rn_yeargen). Please " \
"report this problem to [%s], quoting the document " \
"type [%s]." \
% (cgi.escape(CFG_SITE_SUPPORT_EMAIL), \
cgi.escape(document_type))
raise InvenioWebSubmitFunctionError(err_msg)
else:
## No value for the year-generation parameter. Just use the current
## year.
yy = time.strftime("%Y")
############
## Counter Path:
############
try:
new_rn_counter_path = parameters['2nd_counterpath']
except KeyError:
new_rn_counter_path = ""
else:
if new_rn_counter_path is None:
new_rn_counter_path = ""
counter_path = re.sub('<PA>(?P<content>[^<]*)</PA>',
get_pa_tag_content,
new_rn_counter_path)
counter_path = counter_path.replace(" ", "").replace("\n", "")
## Counter path isn't allowed to contain "../" (no moving below the
## counters directory) and must not be empty. If either of these cases
## is true, it is considered to be an error:
if counter_path == "" or counter_path.find("../") != -1:
## Invalid counter path.
err_msg = "Error in Second_Report_Number_Generation: The function " \
"has been configured with an invalid value for " \
"2nd_counterpath. Please report this problem to " \
"[%s]." % cgi.escape(CFG_SITE_SUPPORT_EMAIL)
raise InvenioWebSubmitFunctionError(err_msg)
############
## New Report Number's File Name:
############
try:
new_rn_filename = parameters['2nd_rn_file']
except KeyError:
new_rn_filename = ""
else:
if new_rn_filename is None:
new_rn_filename = ""
## Get the "basename" for the report-number file:
new_rn_filename = os.path.basename(new_rn_filename).strip()
if new_rn_filename == "":
## No file name provided for the new report-number. This is
## considered to be an error.
err_msg = "Error in Second_Report_Number_Generation: The function " \
"has been configured with an invalid value for " \
"2nd_rn_file. Please report this problem to " \
"[%s]." % cgi.escape(CFG_SITE_SUPPORT_EMAIL)
raise InvenioWebSubmitFunctionError(err_msg)
############
## Report Number Format:
############
try:
new_rn_format = parameters['2nd_rn_format']
except KeyError:
new_rn_format = ""
else:
if new_rn_format is None:
new_rn_format = ""
new_rn_format = re.sub('<PA>(?P<content>[^<]*)</PA>',
get_pa_tag_content,
new_rn_format)
############
## End of treatment of parameters.
############
############
## Test to see whether the second report number file already exists:
if not os.path.exists("%s/%s" % (curdir, new_rn_filename)):
## The new report number file doesn't exist. Create it.
new_rn = create_reference(counter_path, new_rn_format, new_nb_length)
new_rn = re.compile('\s').sub('', new_rn)
## Write it to file:
# The file edsrn is created in the submission directory, and it stores the report number
try:
fh_new_rn_file = open("%s/%s" % (curdir, new_rn_filename), "w")
fh_new_rn_file.write(new_rn)
fh_new_rn_file.flush()
fh_new_rn_file.close()
except IOError:
## Unable to create the new report-number's file.
err_msg = "Error in Second_Report_Number_Generation: It " \
"wasn't possible to write out the newly generated " \
"'second' report number (%s) to the file [%s]. " \
"Please report this problem to [%s], quoting the " \
"document type [%s], the submission access number " \
"[%s] and the new report number [%s]."
register_exception(prefix=err_msg % (new_rn, \
new_rn_filename, \
CFG_SITE_SUPPORT_EMAIL, \
document_type, \
access_number, \
new_rn))
raise InvenioWebSubmitFunctionError(err_msg % \
(cgi.escape(new_rn), \
cgi.escape(new_rn_filename), \
cgi.escape(CFG_SITE_SUPPORT_EMAIL), \
cgi.escape(document_type), \
cgi.escape(access_number), \
cgi.escape(new_rn)))
return ""
diff --git a/invenio/legacy/websubmit/functions/Send_APP_Mail.py b/invenio/legacy/websubmit/functions/Send_APP_Mail.py
index 4b2e849b5..037209eb6 100644
--- a/invenio/legacy/websubmit/functions/Send_APP_Mail.py
+++ b/invenio/legacy/websubmit/functions/Send_APP_Mail.py
@@ -1,278 +1,278 @@
## This file is part of Invenio.
## Copyright (C) 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011 CERN.
##
## Invenio is free software; you can redistribute it and/or
## modify it under the terms of the GNU General Public License as
## published by the Free Software Foundation; either version 2 of the
## License, or (at your option) any later version.
##
## Invenio is distributed in the hope that it will be useful, but
## WITHOUT ANY WARRANTY; without even the implied warranty of
## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
## General Public License for more details.
##
## You should have received a copy of the GNU General Public License
## along with Invenio; if not, write to the Free Software Foundation, Inc.,
## 59 Temple Place, Suite 330, Boston, MA 02111-1307, USA.
__revision__ = "$Id$"
## Description: function Send_APP_Mail
## This function send an email informing the original
## submitter of a document that the referee has approved/
## rejected the document. The email is also sent to the
## referee for checking.
## Author: T.Baron
## PARAMETERS:
## newrnin: name of the file containing the 2nd reference
## addressesAPP: email addresses to which the email will
## be sent (additionally to the author)
## categformatAPP: variable needed to derive the addresses
## mentioned above
import os
import re
from invenio.config import CFG_SITE_NAME, \
CFG_SITE_URL, \
CFG_SITE_SUPPORT_EMAIL, \
CFG_CERN_SITE, \
CFG_SITE_RECORD
from invenio.access_control_admin import acc_get_role_users, acc_get_role_id
from invenio.dbquery import run_sql
from invenio.legacy.websubmit.config import CFG_WEBSUBMIT_COPY_MAILS_TO_ADMIN
-from invenio.errorlib import register_exception
+from invenio.ext.logging import register_exception
from invenio.search_engine import print_record
from invenio.ext.email import scheduled_send_email
from invenio.bibtask import bibtask_allocate_sequenceid
## The field in which to search for the record submitter/owner's email address:
if CFG_CERN_SITE:
## This is a CERN site - we use 859__f for submitter/record owner's email:
CFG_WEBSUBMIT_RECORD_OWNER_EMAIL = "859__f"
else:
## Non-CERN site. Use 8560_f for submitter/record owner's email:
CFG_WEBSUBMIT_RECORD_OWNER_EMAIL = "8560_f"
def Send_APP_Mail (parameters, curdir, form, user_info=None):
"""
This function send an email informing the original submitter of a
document that the referee has approved/ rejected the document. The
email is also sent to the referee for checking.
Parameters:
* addressesAPP: email addresses of the people who will receive
this email (comma separated list). this parameter may contain
the <CATEG> string. In which case the variable computed from
the [categformatAFP] parameter replaces this string.
eg.: "<CATEG>-email@cern.ch"
* categformatAPP contains a regular expression used to compute
the category of the document given the reference of the
document.
eg.: if [categformatAFP]="TEST-<CATEG>-.*" and the reference
of the document is "TEST-CATEGORY1-2001-001", then the computed
category equals "CATEGORY1"
* newrnin: Name of the file containing the 2nd reference of the
approved document (if any).
* edsrn: Name of the file containing the reference of the
approved document.
"""
global titlevalue,authorvalue, emailvalue,sysno,rn
FROMADDR = '%s Submission Engine <%s>' % (CFG_SITE_NAME,CFG_SITE_SUPPORT_EMAIL)
sequence_id = bibtask_allocate_sequenceid(curdir)
doctype = form['doctype']
titlevalue = titlevalue.replace("\n"," ")
authorvalue = authorvalue.replace("\n","; ")
# variables declaration
categformat = parameters['categformatAPP']
otheraddresses = parameters['addressesAPP']
newrnpath = parameters['newrnin']
## Get the name of the decision file:
try:
decision_filename = parameters['decision_file']
except KeyError:
decision_filename = ""
## Get the name of the comments file:
try:
comments_filename = parameters['comments_file']
except KeyError:
comments_filename = ""
## Now try to read the comments from the comments_filename:
if comments_filename in (None, "", "NULL"):
## We don't have a name for the comments file.
## For backward compatibility reasons, try to read the comments from
## a file called 'COM' in curdir:
if os.path.exists("%s/COM" % curdir):
try:
fh_comments = open("%s/COM" % curdir, "r")
comment = fh_comments.read()
fh_comments.close()
except IOError:
## Unable to open the comments file
exception_prefix = "Error in WebSubmit function " \
"Send_APP_Mail. Tried to open " \
"comments file [%s/COM] but was " \
"unable to." % curdir
register_exception(prefix=exception_prefix)
comment = ""
else:
comment = comment.strip()
else:
comment = ""
else:
## Try to read the comments from the comments file:
if os.path.exists("%s/%s" % (curdir, comments_filename)):
try:
fh_comments = open("%s/%s" % (curdir, comments_filename), "r")
comment = fh_comments.read()
fh_comments.close()
except IOError:
## Oops, unable to open the comments file.
comment = ""
exception_prefix = "Error in WebSubmit function " \
"Send_APP_Mail. Tried to open comments " \
"file [%s/%s] but was unable to." \
% (curdir, comments_filename)
register_exception(prefix=exception_prefix)
else:
comment = comment.strip()
else:
comment = ""
## Now try to read the decision from the decision_filename:
if decision_filename in (None, "", "NULL"):
## We don't have a name for the decision file.
## For backward compatibility reasons, try to read the decision from
## a file called 'decision' in curdir:
if os.path.exists("%s/decision" % curdir):
try:
fh_decision = open("%s/decision" % curdir, "r")
decision = fh_decision.read()
fh_decision.close()
except IOError:
## Unable to open the decision file
exception_prefix = "Error in WebSubmit function " \
"Send_APP_Mail. Tried to open " \
"decision file [%s/decision] but was " \
"unable to." % curdir
register_exception(prefix=exception_prefix)
decision = ""
else:
decision = decision.strip()
else:
decision = ""
else:
## Try to read the decision from the decision file:
try:
fh_decision = open("%s/%s" % (curdir, decision_filename), "r")
decision = fh_decision.read()
fh_decision.close()
except IOError:
## Oops, unable to open the decision file.
decision = ""
exception_prefix = "Error in WebSubmit function " \
"Send_APP_Mail. Tried to open decision " \
"file [%s/%s] but was unable to." \
% (curdir, decision_filename)
register_exception(prefix=exception_prefix)
else:
decision = decision.strip()
if os.path.exists("%s/%s" % (curdir,newrnpath)):
fp = open("%s/%s" % (curdir,newrnpath) , "r")
newrn = fp.read()
fp.close()
else:
newrn = ""
# Document name
res = run_sql("SELECT ldocname FROM sbmDOCTYPE WHERE sdocname=%s", (doctype,))
docname = res[0][0]
# retrieve category
categformat = categformat.replace("<CATEG>", "([^-]*)")
m_categ_search = re.match(categformat, rn)
if m_categ_search is not None:
if len(m_categ_search.groups()) > 0:
## Found a match for the category of this document. Get it:
category = m_categ_search.group(1)
else:
## This document has no category.
category = "unknown"
else:
category = "unknown"
## Get the referee email address:
if CFG_CERN_SITE:
## The referees system in CERN now works with listbox membership.
## List names should take the format
## "service-cds-referee-doctype-category@cern.ch"
## Make sure that your list exists!
## FIXME - to be replaced by a mailing alias in webaccess in the
## future.
referee_listname = "service-cds-referee-%s" % doctype.lower()
if category != "":
referee_listname += "-%s" % category.lower()
referee_listname += "@cern.ch"
addresses = referee_listname
else:
# Build referee's email address
refereeaddress = ""
# Try to retrieve the referee's email from the referee's database
for user in acc_get_role_users(acc_get_role_id("referee_%s_%s" % (doctype,category))):
refereeaddress += user[1] + ","
# And if there is a general referee
for user in acc_get_role_users(acc_get_role_id("referee_%s_*" % doctype)):
refereeaddress += user[1] + ","
refereeaddress = re.sub(",$","",refereeaddress)
# Creation of the mail for the referee
otheraddresses = otheraddresses.replace("<CATEG>",category)
addresses = ""
if refereeaddress != "":
addresses = refereeaddress + ","
if otheraddresses != "":
addresses += otheraddresses
else:
addresses = re.sub(",$","",addresses)
## Add the record's submitter(s) into the list of recipients:
## Get the email address(es) of the record submitter(s)/owner(s) from
## the record itself:
record_owners = print_record(sysno, 'tm', \
[CFG_WEBSUBMIT_RECORD_OWNER_EMAIL]).strip()
if record_owners != "":
record_owners_list = record_owners.split("\n")
record_owners_list = [email.lower().strip() \
for email in record_owners_list]
else:
#if the record owner can not be retrieved from the metadata
#(in case the record has not been inserted yet),
#try to use the global variable emailvalue
try:
record_owners_list = [emailvalue]
except NameError:
record_owners_list = []
record_owners = ",".join([owner for owner in record_owners_list])
if record_owners != "":
addresses += ",%s" % record_owners
if decision == "approve":
mailtitle = "%s has been approved" % rn
mailbody = "The %s %s has been approved." % (docname,rn)
mailbody += "\nIt will soon be accessible here:\n\n<%s/%s/%s>" % (CFG_SITE_URL,CFG_SITE_RECORD,sysno)
else:
mailtitle = "%s has been rejected" % rn
mailbody = "The %s %s has been rejected." % (docname,rn)
if rn != newrn and decision == "approve" and newrn != "":
mailbody += "\n\nIts new reference number is: %s" % newrn
mailbody += "\n\nTitle: %s\n\nAuthor(s): %s\n\n" % (titlevalue,authorvalue)
if comment != "":
mailbody += "Comments from the referee:\n%s\n" % comment
# Send mail to referee if any recipients or copy to admin
if addresses or CFG_WEBSUBMIT_COPY_MAILS_TO_ADMIN:
scheduled_send_email(FROMADDR, addresses, mailtitle, mailbody,
copy_to_admin=CFG_WEBSUBMIT_COPY_MAILS_TO_ADMIN,
other_bibtasklet_arguments=['-I', str(sequence_id)])
return ""
diff --git a/invenio/legacy/websubmit/functions/Send_Delete_Mail.py b/invenio/legacy/websubmit/functions/Send_Delete_Mail.py
index 787d28596..b9107a7db 100644
--- a/invenio/legacy/websubmit/functions/Send_Delete_Mail.py
+++ b/invenio/legacy/websubmit/functions/Send_Delete_Mail.py
@@ -1,159 +1,159 @@
## This file is part of Invenio.
## Copyright (C) 2008, 2010, 2011 CERN.
##
## Invenio is free software; you can redistribute it and/or
## modify it under the terms of the GNU General Public License as
## published by the Free Software Foundation; either version 2 of the
## License, or (at your option) any later version.
##
## Invenio is distributed in the hope that it will be useful, but
## WITHOUT ANY WARRANTY; without even the implied warranty of
## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
## General Public License for more details.
##
## You should have received a copy of the GNU General Public License
## along with Invenio; if not, write to the Free Software Foundation, Inc.,
## 59 Temple Place, Suite 330, Boston, MA 02111-1307, USA.
"""The function in this module sends a mail to the user (and admins if
required) saying that a record has been deleted from the repository.
"""
__revision__ = "$Id$"
import os
-from invenio.errorlib import register_exception
+from invenio.ext.logging import register_exception
from invenio.webuser import email_valid_p
from invenio.config import CFG_SITE_SUPPORT_EMAIL, CFG_SITE_NAME
from invenio.legacy.websubmit.config import CFG_WEBSUBMIT_COPY_MAILS_TO_ADMIN
from invenio.ext.email import send_email
CFG_MAIL_BODY = """
**This is an automated mail from %(site-name)s**
The following record was deleted from %(site-name)s:
Report number: %(report-number)s
It was deleted by %(deleter)s.
Please note that there may be a short delay before the record
disappears from its collection. It should be gone by tomorrow morning
at the latest.
Thankyou."""
def Send_Delete_Mail(parameters, curdir, form, user_info=None):
"""
In the event of a record having been deleted, this function is used
to the mail the submitter (and possibly the record "managers")
informing them about the record's deletion.
@parameters:
+ edsrn: The name of the file in the current submission's
working directory, in which the record's report number
is stored.
+ record_managers: A comma-separated string of the email
addresses of the record's managers. If given,
they will be (blind*) copied into the mail.
* At this time, they are only blind copied
because of send_email's behaviour of
blind copying everyone if "To" contains
multiple addresses. Anyway, blind was
wanted . . .
@return: empty string.
@Exceptions raised: None.
"""
## Get any addresses to which the mail should be copied:
## Get report number:
report_number_file = parameters["edsrn"]
report_number = \
Send_Delete_Mail_read_file("%s/%s" % \
(curdir, \
report_number_file)).strip()
########
## Get the "record_managers" parameter AND WASH THE EMAIL ADDRESSES
## TO BE SURE THAT THEY'RE VALID:
raw_record_managers = parameters["record_managers"]
record_managers = ""
try:
## We assume that the email addresses of item managers are
## separated by commas.
raw_record_managers_list = raw_record_managers.split(",")
for manager in raw_record_managers_list:
manager_address = manager.strip()
## Test that this manager's email address is OK, adding it if so:
if email_valid_p(manager_address):
## This address is OK - add it to the string of manager
## addresses:
record_managers += "%s," % manager_address
## Strip the trailing comma from record_managers (if there is one):
record_managers = record_managers.strip().rstrip(",")
except AttributeError:
## record_managers doesn't seem to be a string? Treat it as
## though it were empty:
record_managers = ""
##
########
## User email address:
user_email = user_info["email"]
## Concatenate the user's email address with the managers' addresses.
## Note: What we want to do here is send the mail to the user as "To"
## and to the managers as "bcc". At the time of writing though,
## send_email doesn't appear to allow email headers. It does have a
## strange behaviour though: If "To" contains more than one address,
## comma separated, ALL addresses will be put in "bcc" and the mail
## will appear to be sent to "undisclosed recipients".
if record_managers != "":
if user_email != "guest":
email_recipients = "%s,%s" % (user_email, record_managers)
else:
## Can't send mails to "guest"! Send only to managers.
email_recipients = record_managers
elif user_email == "guest":
## The user is a guest and there are no managers to send the mail
## to. Drop out quietly.
return ""
else:
## No managers to send the mail to. Send it only to the user.
email_recipients = user_email
mail_subj = "Document %s deleted from %s" \
% (report_number, CFG_SITE_NAME)
mail_body = CFG_MAIL_BODY % \
{ 'report-number' : report_number,
'deleter' : user_email,
'site-name' : CFG_SITE_NAME,
}
send_email(CFG_SITE_SUPPORT_EMAIL,
email_recipients,
mail_subj,
mail_body,
copy_to_admin=CFG_WEBSUBMIT_COPY_MAILS_TO_ADMIN)
##
return ""
def Send_Delete_Mail_read_file(filename):
"""Read a file from a path and return it as a string.
@param filename: (string) - the full path to the file to be read.
@return: (string) - the file's contents.
"""
file_contents = ""
if os.access("%s" % filename, os.R_OK):
try:
file_contents = open("%s" % filename, "r").read()
except IOError:
## There was a problem reading the file. Register the exception
## so that the admin is informed.
err_msg = """Error in a WebSubmit function. An unexpected """ \
"""error was encountered when trying to read from """ \
"""the file [%s].""" % filename
register_exception(prefix=err_msg)
return file_contents
diff --git a/invenio/legacy/websubmit/functions/Shared_Functions.py b/invenio/legacy/websubmit/functions/Shared_Functions.py
index 3f87a21a1..ad6590fc9 100644
--- a/invenio/legacy/websubmit/functions/Shared_Functions.py
+++ b/invenio/legacy/websubmit/functions/Shared_Functions.py
@@ -1,268 +1,268 @@
## This file is part of Invenio.
## Copyright (C) 2007, 2008, 2009, 2010, 2011 CERN.
##
## Invenio is free software; you can redistribute it and/or
## modify it under the terms of the GNU General Public License as
## published by the Free Software Foundation; either version 2 of the
## License, or (at your option) any later version.
##
## Invenio is distributed in the hope that it will be useful, but
## WITHOUT ANY WARRANTY; without even the implied warranty of
## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
## General Public License for more details.
##
## You should have received a copy of the GNU General Public License
## along with Invenio; if not, write to the Free Software Foundation, Inc.,
## 59 Temple Place, Suite 330, Boston, MA 02111-1307, USA.
"""Functions shared by websubmit_functions"""
__revision__ = "$Id$"
import os
import cgi
import glob
import sys
from logging import DEBUG
from invenio.config import \
CFG_PATH_CONVERT, \
CFG_SITE_LANG
from invenio.bibdocfile import decompose_file
-from invenio.errorlib import register_exception
+from invenio.ext.logging import register_exception
from invenio.websubmit_file_converter import convert_file, InvenioWebSubmitFileConverterError, get_missing_formats, get_file_converter_logger
from invenio.legacy.websubmit.config import InvenioWebSubmitFunctionError
from invenio.dbquery import run_sql
from invenio.bibsched import server_pid
from invenio.base.i18n import gettext_set_language
from invenio.search_engine import get_record
from invenio.legacy.bibrecord import record_get_field_values, record_get_field_value
def createRelatedFormats(fullpath, overwrite=True, debug=False):
"""Given a fullpath, this function extracts the file's extension and
finds in which additional format the file can be converted and converts it.
@param fullpath: (string) complete path to file
@param overwrite: (bool) overwrite already existing formats
Return a list of the paths to the converted files
"""
file_converter_logger = get_file_converter_logger()
old_logging_level = file_converter_logger.getEffectiveLevel()
if debug:
file_converter_logger.setLevel(DEBUG)
try:
createdpaths = []
basedir, filename, extension = decompose_file(fullpath)
extension = extension.lower()
if debug:
print >> sys.stderr, "basedir: %s, filename: %s, extension: %s" % (basedir, filename, extension)
filelist = glob.glob(os.path.join(basedir, '%s*' % filename))
if debug:
print >> sys.stderr, "filelist: %s" % filelist
missing_formats = get_missing_formats(filelist)
if debug:
print >> sys.stderr, "missing_formats: %s" % missing_formats
for path, formats in missing_formats.iteritems():
if debug:
print >> sys.stderr, "... path: %s, formats: %s" % (path, formats)
for aformat in formats:
if debug:
print >> sys.stderr, "...... aformat: %s" % aformat
newpath = os.path.join(basedir, filename + aformat)
if debug:
print >> sys.stderr, "...... newpath: %s" % newpath
try:
convert_file(path, newpath)
createdpaths.append(newpath)
except InvenioWebSubmitFileConverterError, msg:
if debug:
print >> sys.stderr, "...... Exception: %s" % msg
register_exception(alert_admin=True)
finally:
if debug:
file_converter_logger.setLevel(old_logging_level)
return createdpaths
def createIcon(fullpath, iconsize):
"""Given a fullpath, this function extracts the file's extension and
if the format is compatible it converts it to icon.
@param fullpath: (string) complete path to file
Return the iconpath if successful otherwise None
"""
basedir = os.path.dirname(fullpath)
filename = os.path.basename(fullpath)
filename, extension = os.path.splitext(filename)
if extension == filename:
extension == ""
iconpath = "%s/icon-%s.gif" % (basedir, filename)
if os.path.exists(fullpath) and extension.lower() in ['.pdf', '.gif', '.jpg', '.jpeg', '.ps']:
os.system("%s -scale %s %s %s" % (CFG_PATH_CONVERT, iconsize, fullpath, iconpath))
if os.path.exists(iconpath):
return iconpath
else:
return None
def get_dictionary_from_string(dict_string):
"""Given a string version of a "dictionary", split the string into a
python dictionary.
For example, given the following string:
{'TITLE' : 'EX_TITLE', 'AUTHOR' : 'EX_AUTHOR', 'REPORTNUMBER' : 'EX_RN'}
A dictionary in the following format will be returned:
{
'TITLE' : 'EX_TITLE',
'AUTHOR' : 'EX_AUTHOR',
'REPORTNUMBER' : 'EX_RN',
}
@param dict_string: (string) - the string version of the dictionary.
@return: (dictionary) - the dictionary build from the string.
"""
try:
# Evaluate the dictionary string in an empty local/global
# namespaces. An empty '__builtins__' variable is still
# provided, otherwise Python will add the real one for us,
# which would access to undesirable functions, such as
# 'file()', 'open()', 'exec()', etc.
evaluated_dict = eval(dict_string, {"__builtins__": {}}, {})
except:
evaluated_dict = {}
# Check that returned value is a dict. Do not check with
# isinstance() as we do not even want to match subclasses of dict.
if type(evaluated_dict) is dict:
return evaluated_dict
else:
return {}
def ParamFromFile(afile):
""" Pipe a multi-line file into a single parameter"""
parameter = ''
afile = afile.strip()
if afile == '': return parameter
try:
fp = open(afile, "r")
lines = fp.readlines()
for line in lines:
parameter = parameter + line
fp.close()
except IOError:
pass
return parameter
def write_file(filename, filedata):
"""Open FILENAME and write FILEDATA to it."""
filename1 = filename.strip()
try:
of = open(filename1,'w')
except IOError:
raise InvenioWebSubmitFunctionError('Cannot open ' + filename1 + ' to write')
of.write(filedata)
of.close()
return ""
def get_nice_bibsched_related_message(curdir, ln=CFG_SITE_LANG):
"""
@return: a message suitable to display to the user, explaining the current
status of the system.
@rtype: string
"""
bibupload_id = ParamFromFile(os.path.join(curdir, 'bibupload_id'))
if not bibupload_id:
## No BibUpload scheduled? Then we don't care about bibsched
return ""
## Let's get an estimate about how many processes are waiting in the queue.
## Our bibupload might be somewhere in it, but it's not really so important
## WRT informing the user.
_ = gettext_set_language(ln)
res = run_sql("SELECT id,proc,runtime,status,priority FROM schTASK WHERE (status='WAITING' AND runtime<=NOW()) OR status='SLEEPING'")
pre = _("Note that your submission as been inserted into the bibliographic task queue and is waiting for execution.\n")
if server_pid():
## BibSched is up and running
msg = _("The task queue is currently running in automatic mode, and there are currently %s tasks waiting to be executed. Your record should be available within a few minutes and searchable within an hour or thereabouts.\n") % (len(res))
else:
msg = _("Because of a human intervention or a temporary problem, the task queue is currently set to the manual mode. Your submission is well registered but may take longer than usual before it is fully integrated and searchable.\n")
return pre + msg
def txt2html(msg):
"""Transform newlines into paragraphs."""
rows = msg.split('\n')
rows = [cgi.escape(row) for row in rows]
rows = "<p>" + "</p><p>".join(rows) + "</p>"
return rows
def get_all_values_in_curdir(curdir):
"""
Return a dictionary with all the content of curdir.
@param curdir: the path to the current directory.
@type curdir: string
@return: the content
@rtype: dict
"""
ret = {}
for filename in os.listdir(curdir):
if not filename.startswith('.') and os.path.isfile(os.path.join(curdir, filename)):
ret[filename] = open(os.path.join(curdir, filename)).read().strip()
return ret
def get_current_record(curdir, system_number_file='SN'):
"""
Return the current record (in case it's being modified).
@param curdir: the path to the current directory.
@type curdir: string
@param system_number_file: is the name of the file on disk in curdir, that
is supposed to contain the record id.
@type system_number_file: string
@return: the record
@rtype: as in L{get_record}
"""
if os.path.exists(os.path.join(curdir, system_number_file)):
recid = open(os.path.join(curdir, system_number_file)).read().strip()
if recid:
recid = int(recid)
return get_record(recid)
return {}
def retrieve_field_values(curdir, field_name, separator=None, system_number_file='SN', tag=None):
"""
This is a handy function to retrieve values either from the current
submission directory, when a form has been just submitted, or from
an existing record (e.g. during MBI action).
@param curdir: is the current submission directory.
@type curdir: string
@param field_name: is the form field name that might exists on disk.
@type field_name: string
@param separator: is an optional separator. If it exists, it will be used
to retrieve multiple values contained in the field.
@type separator: string
@param system_number_file: is the name of the file on disk in curdir, that
is supposed to contain the record id.
@type system_number_file: string
@param tag: is the full MARC tag (tag+ind1+ind2+code) that should
contain values. If not specified, only values in curdir will
be retrieved.
@type tag: 6-chars
@return: the field value(s).
@rtype: list of strings.
@note: if field_name exists in curdir it will take precedence over
retrieving the values from the record.
"""
field_file = os.path.join(curdir, field_name)
if os.path.exists(field_file):
field_value = open(field_file).read()
if separator is not None:
return [value.strip() for value in field_value.split(separator) if value.strip()]
else:
return [field_value.strip()]
elif tag is not None:
system_number_file = os.path.join(curdir, system_number_file)
if os.path.exists(system_number_file):
recid = int(open(system_number_file).read().strip())
record = get_record(recid)
if separator:
return record_get_field_values(record, tag[:3], tag[3], tag[4], tag[5])
else:
return [record_get_field_value(record, tag[:3], tag[3], tag[4], tag[5])]
return []
diff --git a/invenio/legacy/websubmit/functions/Stamp_Replace_Single_File_Approval.py b/invenio/legacy/websubmit/functions/Stamp_Replace_Single_File_Approval.py
index ffef7f618..4a5089608 100644
--- a/invenio/legacy/websubmit/functions/Stamp_Replace_Single_File_Approval.py
+++ b/invenio/legacy/websubmit/functions/Stamp_Replace_Single_File_Approval.py
@@ -1,511 +1,511 @@
## This file is part of Invenio.
## Copyright (C) 2008, 2010, 2011 CERN.
##
## Invenio is free software; you can redistribute it and/or
## modify it under the terms of the GNU General Public License as
## published by the Free Software Foundation; either version 2 of the
## License, or (at your option) any later version.
##
## Invenio is distributed in the hope that it will be useful, but
## WITHOUT ANY WARRANTY; without even the implied warranty of
## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
## General Public License for more details.
##
## You should have received a copy of the GNU General Public License
## along with Invenio; if not, write to the Free Software Foundation, Inc.,
## 59 Temple Place, Suite 330, Boston, MA 02111-1307, USA.
"""Stamp_Replace_Single_File_Approval: A function to allow a single file
that is already attached to a record to be stamped at approval time.
"""
__revision__ = "$Id$"
from invenio.bibdocfile import BibRecDocs, InvenioBibDocFileError
-from invenio.errorlib import register_exception
+from invenio.ext.logging import register_exception
from invenio import websubmit_file_stamper
from invenio.legacy.websubmit.config import InvenioWebSubmitFunctionWarning, \
InvenioWebSubmitFunctionError, InvenioWebSubmitFileStamperError
import os.path
import re
import cgi
import time
def Stamp_Replace_Single_File_Approval(parameters, \
curdir, \
form, \
user_info=None):
"""
This function is intended to be called when a document has been
approved and needs to be stamped.
The function should be used when there is ONLY ONE file to be
stamped after approval (for example, the "main file").
The name of the file to be stamped should be known and should be stored
in a file in the submission's working directory (without the extension).
Generally, this will work our fine as the main file is named after the
report number of the document, this will be stored in the report number
file.
@param parameters: (dictionary) - must contain:
+ latex_template: (string) - the name of the LaTeX template that
should be used for the creation of the stamp.
+ latex_template_vars: (string) - a string-ified dictionary
of variables to be replaced in the LaTeX template and the
values (or names of files in curdir containing the values)
with which to replace them. Use prefix 'FILE:' to specify
that the stamped value must be read from a file in
submission directory instead of being a fixed value to
stamp.
E.G.:
{ 'TITLE' : 'FILE:DEMOTHESIS_TITLE',
'DATE' : 'FILE:DEMOTHESIS_DATE'
}
+ file_to_be_stamped: (string) - this is the name of a file in the
submission's working directory that contains the name of the
bibdocfile that is to be stamped.
+ new_file_name: (string) - this is the name of a file in the
submission's working directory that contains the name that is to
be given to the file after it has been stamped. If empty, or if
that file doesn't exist, the file will not be renamed after
stamping.
+ switch_file: (string) - when this value is set, specifies
the name of a file that will swith on/off the
stamping. The stamp will be applied if the file exists in
the submission directory and is not empty. If the file
cannot be found or is empty, the stamp is not applied.
Useful for eg. if you want to let your users control the
stamping with a checkbox on your submission page.
Leave this parameter empty to always stamp by default.
+ stamp: (string) - the type of stamp to be applied to the file.
should be one of:
+ first (only the first page is stamped);
+ all (all pages are stamped);
+ coverpage (a separate cover-page is added to the file as a
first page);
+ layer: (string) - the position of the stamp. Should be one of:
+ background (invisible if original file has a white
-not transparent- background layer)
+ foreground (on top of the stamped file. If the stamp
does not have a transparent background, will hide all
of the document layers)
The default value is 'background'.
"""
############
## Definition of important variables:
############
## The file stamper needs to be called with a dictionary of options of
## the following format:
## { 'latex-template' : "", ## TEMPLATE_NAME
## 'latex-template-var' : {}, ## TEMPLATE VARIABLES
## 'input-file' : "", ## INPUT FILE
## 'output-file' : "", ## OUTPUT FILE
## 'stamp' : "", ## STAMP TYPE
## 'layer' : "", ## LAYER TO STAMP
## 'verbosity' : 0, ## VERBOSITY (we don't care about it)
## }
file_stamper_options = { 'latex-template' : "",
'latex-template-var' : { },
'input-file' : "",
'output-file' : "",
'stamp' : "",
'layer' : "",
'verbosity' : 0,
}
## Check if stamping is enabled
switch_file = parameters.get('switch_file', '')
if switch_file:
# Good, a "switch file" was specified. Check if it exists, and
# it its value is not empty.
if not _read_in_file(os.path.join(curdir, switch_file)):
# File does not exist, or is emtpy. Silently abort
# stamping.
return ""
## Submission access number:
access = _read_in_file("%s/access" % curdir)
## record ID for the current submission. It is found in the special file
## "SN" (sysno) in curdir:
recid = _read_in_file("%s/SN" % curdir)
try:
recid = int(recid)
except ValueError:
## No record ID. Cannot continue.
err_msg = "Error in Stamp_Replace_Single_File_Approval: " \
"Cannot recover record ID from the submission's working " \
"directory. Stamping cannot be carried out. The " \
"submission ID is [%s]." % cgi.escape(access)
register_exception(prefix=err_msg)
raise InvenioWebSubmitFunctionError(err_msg)
############
## Resolution of function parameters:
############
## The name of the LaTeX template to be used for stamp creation:
latex_template = "%s" % ((type(parameters['latex_template']) is str \
and parameters['latex_template']) or "")
## A string containing the variables/values that should be substituted
## in the final (working) LaTeX template:
latex_template_vars_string = "%s" % \
((type(parameters['latex_template_vars']) is str \
and parameters['latex_template_vars']) or "")
## The type of stamp to be applied to the file(s):
stamp = "%s" % ((type(parameters['stamp']) is str and \
parameters['stamp'].lower()) or "")
## The layer to use for stamping:
try:
layer = parameters['layer']
except KeyError:
layer = "background"
if not layer in ('background', 'foreground'):
layer = "background"
## Get the name of the file to be stamped from the file indicated in
## the file_to_be_stamped parameter:
try:
file_to_stamp_file = parameters['file_to_be_stamped']
except KeyError:
file_to_stamp_file = ""
else:
if file_to_stamp_file is None:
file_to_stamp_file = ""
## Get the "basename" for the file to be stamped (it's mandatory that it
## be in curdir):
file_to_stamp_file = os.path.basename(file_to_stamp_file).strip()
name_file_to_stamp = _read_in_file("%s/%s" % (curdir, file_to_stamp_file))
name_file_to_stamp.replace("\n", "").replace("\r", "")
##
## Get the name to be given to the file after it has been stamped (if there
## is one.) Once more, it will be found in a file in curdir:
try:
new_file_name_file = parameters['new_file_name']
except KeyError:
new_file_name_file = ""
else:
if new_file_name_file is None:
new_file_name_file = ""
## Get the "basename" for the file containing the new file name. (It's
## mandatory that it be in curdir):
new_file_name_file = os.path.basename(new_file_name_file).strip()
new_file_name = _read_in_file("%s/%s" % (curdir, new_file_name_file))
############
## Begin:
############
##
## If no name for the file to stamp, warning.
if name_file_to_stamp == "":
wrn_msg = "Warning in Stamp_Replace_Single_File_Approval: " \
"It was not possible to recover a valid name for the " \
"file to be stamped. Stamping could not, therefore, be " \
"carried out. The submission ID is [%s]." \
% access
raise InvenioWebSubmitFunctionWarning(wrn_msg)
##
## The file to be stamped is a bibdoc. We will only stamp it (a) if it
## exists; and (b) if it is a PDF file. So, get the path (in the bibdocs
## tree) to the file to be stamped:
##
## First get the object representing the bibdocs belonging to this record:
bibrecdocs = BibRecDocs(recid)
try:
bibdoc_file_to_stamp = bibrecdocs.get_bibdoc("%s" % name_file_to_stamp)
except InvenioBibDocFileError:
## Couldn't get a bibdoc object for this filename. Probably the file
## that we wanted to stamp wasn't attached to this record.
wrn_msg = "Warning in Stamp_Replace_Single_File_Approval: " \
"It was not possible to recover a bibdoc object for the " \
"filename [%s] when trying to stamp the main file. " \
"Stamping could not be carried out. The submission ID is " \
"[%s] and the record ID is [%s]." \
% (name_file_to_stamp, access, recid)
register_exception(prefix=wrn_msg)
raise InvenioWebSubmitFunctionWarning(wrn_msg)
## Get the BibDocFile object for the PDF version of the bibdoc to be
## stamped:
try:
bibdocfile_file_to_stamp = bibdoc_file_to_stamp.get_file("pdf")
except InvenioBibDocFileError:
## This bibdoc doesn't have a physical file with the extension ".pdf"
## (take note of the lower-case extension - the bibdocfile library
## is case-sensitive with respect to filenames. Log that there was
## no "pdf" and check for a file with extension "PDF":
wrn_msg = "Warning in Stamp_Replace_Single_File_Approval: " \
"It wasn't possible to recover a PDF BibDocFile object " \
"for the file with the name [%s], using the extension " \
"[pdf] - note the lower case - the bibdocfile library " \
"relies upon the case of an extension. The submission ID " \
"is [%s] and the record ID is [%s]. Going to try " \
"looking for a file with a [PDF] extension before giving " \
"up . . . " \
% (name_file_to_stamp, access, recid)
register_exception(prefix=wrn_msg)
try:
bibdocfile_file_to_stamp = bibdoc_file_to_stamp.get_file("PDF")
except InvenioBibDocFileError:
wrn_msg = "Warning in Stamp_Replace_Single_File_Approval: " \
"It wasn't possible to recover a PDF " \
"BibDocFile object for the file with the name [%s], " \
"using the extension [PDF] - note the upper case. " \
"Had previously tried searching for [pdf] - now " \
"giving up. Stamping could not be carried out. " \
"The submission ID is [%s] and the record ID is [%s]." \
% (name_file_to_stamp, access, recid)
register_exception(prefix=wrn_msg)
raise InvenioWebSubmitFunctionWarning(wrn_msg)
############
## Go ahead and prepare the details for the LaTeX stamp template and its
## variables:
############
## Strip the LaTeX filename into the basename (All templates should be
## in the template repository):
latex_template = os.path.basename(latex_template)
## Convert the string of latex template variables into a dictionary
## of search-term/replacement-term pairs:
latex_template_vars = get_dictionary_from_string(latex_template_vars_string)
## For each of the latex variables, check in `CURDIR' for a file with that
## name. If found, use it's contents as the template-variable's value.
## If not, just use the raw value string already held by the template
## variable:
latex_template_varnames = latex_template_vars.keys()
for varname in latex_template_varnames:
## Get this variable's value:
varvalue = latex_template_vars[varname].strip()
if not ((varvalue.find("date(") == 0 and varvalue[-1] == ")") or \
(varvalue.find("include(") == 0 and varvalue[-1] == ")")) \
and varvalue != "":
## We don't want to interfere with date() or include() directives,
## so we only do this if the variable value didn't contain them:
##
## Is this variable value the name of a file in the current
## submission's working directory, from which a literal value for
## use in the template should be extracted? If yes, it will
## begin with "FILE:". If no, we leave the value exactly as it is.
if varvalue.upper().find("FILE:") == 0:
## The value to be used is to be taken from a file. Clean the
## file name and if it's OK, extract that value from the file.
##
seekvalue_fname = varvalue[5:].strip()
seekvalue_fname = os.path.basename(seekvalue_fname).strip()
if seekvalue_fname != "":
## Attempt to extract the value from the file:
if os.access("%s/%s" % (curdir, seekvalue_fname), \
os.R_OK|os.F_OK):
## The file exists. Extract its value:
try:
repl_file_val = \
open("%s/%s" \
% (curdir, seekvalue_fname), "r").readlines()
except IOError:
## The file was unreadable.
err_msg = "Error in Stamp_Replace_Single_File_" \
"Approval: The function attempted to " \
"read a LaTex template variable " \
"value from the following file in the " \
"current submission's working " \
"directory: [%s]. However, an " \
"unexpected error was encountered " \
"when doing so. Please inform the " \
"administrator." \
% seekvalue_fname
register_exception(req=user_info['req'])
raise InvenioWebSubmitFunctionError(err_msg)
else:
final_varval = ""
for line in repl_file_val:
final_varval += line
final_varval = final_varval.rstrip()
## Replace the variable value with that which has
## been read from the file:
latex_template_vars[varname] = final_varval
else:
## The file didn't actually exist in the current
## submission's working directory. Use an empty
## value:
latex_template_vars[varname] = ""
else:
## The filename was not valid.
err_msg = "Error in Stamp_Replace_Single_File_Approval: " \
"The function was configured to read a LaTeX " \
"template variable from a file with the " \
"following instruction: [%s --> %s]. The " \
"filename, however, was not considered valid. " \
"Please report this to the administrator." \
% (varname, varvalue)
raise InvenioWebSubmitFunctionError(err_msg)
## Put the 'fixed' values into the file_stamper_options dictionary:
file_stamper_options['latex-template'] = latex_template
file_stamper_options['latex-template-var'] = latex_template_vars
file_stamper_options['stamp'] = stamp
file_stamper_options['layer'] = layer
## Put the input file and output file into the file_stamper_options
## dictionary:
file_stamper_options['input-file'] = bibdocfile_file_to_stamp.fullpath
file_stamper_options['output-file'] = bibdocfile_file_to_stamp.get_full_name()
##
## Before attempting to stamp the file, log the dictionary of arguments
## that will be passed to websubmit_file_stamper:
try:
fh_log = open("%s/websubmit_file_stamper-calls-options.log" \
% curdir, "a+")
fh_log.write("%s\n" % file_stamper_options)
fh_log.flush()
fh_log.close()
except IOError:
## Unable to log the file stamper options.
exception_prefix = "Unable to write websubmit_file_stamper " \
"options to log file " \
"%s/websubmit_file_stamper-calls-options.log" \
% curdir
register_exception(prefix=exception_prefix)
try:
## Try to stamp the file:
(stamped_file_path_only, stamped_file_name) = \
websubmit_file_stamper.stamp_file(file_stamper_options)
except InvenioWebSubmitFileStamperError:
## It wasn't possible to stamp this file.
## Register the exception along with an informational message:
wrn_msg = "Warning in Stamp_Replace_Single_File_Approval: " \
"There was a problem stamping the file with the name [%s] " \
"and the fullpath [%s]. The file has not been stamped. " \
"The submission ID is [%s] and the record ID is [%s]." \
% (name_file_to_stamp, \
file_stamper_options['input-file'], \
access, \
recid)
register_exception(prefix=wrn_msg)
raise InvenioWebSubmitFunctionWarning(wrn_msg)
else:
## Stamping was successful. The BibDocFile must now be revised with
## the latest (stamped) version of the file:
file_comment = "Stamped by WebSubmit: %s" \
% time.strftime("%d/%m/%Y", time.localtime())
try:
dummy = \
bibrecdocs.add_new_version("%s/%s" \
% (stamped_file_path_only, \
stamped_file_name), \
name_file_to_stamp, \
comment=file_comment, \
flags=('STAMPED', ))
except InvenioBibDocFileError:
## Unable to revise the file with the newly stamped version.
wrn_msg = "Warning in Stamp_Replace_Single_File_Approval: " \
"After having stamped the file with the name [%s] " \
"and the fullpath [%s], it wasn't possible to revise " \
"that file with the newly stamped version. Stamping " \
"was unsuccessful. The submission ID is [%s] and the " \
"record ID is [%s]." \
% (name_file_to_stamp, \
file_stamper_options['input-file'], \
access, \
recid)
register_exception(prefix=wrn_msg)
raise InvenioWebSubmitFunctionWarning(wrn_msg)
else:
## File revised. If the file should be renamed after stamping,
## do so.
if new_file_name != "":
try:
bibrecdocs.change_name(newname = new_file_name, docid = bibdoc_file_to_stamp.id)
except (IOError, InvenioBibDocFileError):
## Unable to change the name
wrn_msg = "Warning in Stamp_Replace_Single_File_Approval" \
": After having stamped and revised the file " \
"with the name [%s] and the fullpath [%s], it " \
"wasn't possible to rename it to [%s]. The " \
"submission ID is [%s] and the record ID is " \
"[%s]." \
% (name_file_to_stamp, \
file_stamper_options['input-file'], \
new_file_name, \
access, \
recid)
## Finished.
return ""
def get_dictionary_from_string(dict_string):
"""Given a string version of a "dictionary", split the string into a
python dictionary.
For example, given the following string:
{'TITLE' : 'EX_TITLE', 'AUTHOR' : 'EX_AUTHOR', 'REPORTNUMBER' : 'EX_RN'}
A dictionary in the following format will be returned:
{
'TITLE' : 'EX_TITLE',
'AUTHOR' : 'EX_AUTHOR',
'REPORTNUMBER' : 'EX_RN',
}
@param dict_string: (string) - the string version of the dictionary.
@return: (dictionary) - the dictionary build from the string.
"""
## First, strip off the leading and trailing spaces and braces:
dict_string = dict_string.strip(" {}")
## Next, split the string on commas (,) that have not been escaped
## So, the following string: """'hello' : 'world', 'click' : 'here'"""
## will be split into the following list:
## ["'hello' : 'world'", " 'click' : 'here'"]
##
## However, the string """'hello\, world' : '!', 'click' : 'here'"""
## will be split into: ["'hello\, world' : '!'", " 'click' : 'here'"]
## I.e. the comma that was escaped in the string has been kept.
##
## So basically, split on unescaped parameters at first:
key_vals = re.split(r'(?<!\\),', dict_string)
## Now we should have a list of "key" : "value" terms. For each of them,
## check it is OK. If not in the format "Key" : "Value" (quotes are
## optional), discard it. As with the comma separator in the previous
## splitting, this one splits on the first colon (:) ONLY.
final_dictionary = {}
for key_value_string in key_vals:
## Split the pair apart, based on the first ":":
key_value_pair = key_value_string.split(":", 1)
## check that the length of the new list is 2:
if len(key_value_pair) != 2:
## There was a problem with the splitting - pass this pair
continue
## The split was made.
## strip white-space, single-quotes and double-quotes from around the
## key and value pairs:
key_term = key_value_pair[0].strip(" '\"")
value_term = key_value_pair[1].strip(" '\"")
## Is the left-side (key) term empty?
if len(key_term) == 0:
continue
## Now, add the search-replace pair to the dictionary of
## search-replace terms:
final_dictionary[key_term] = value_term
return final_dictionary
def _read_in_file(filepath):
"""Read the contents of a file into a string in memory.
@param filepath: (string) - the path to the file to be read in.
@return: (string) - the contents of the file.
"""
if filepath != "" and \
os.path.exists("%s" % filepath):
try:
fh_filepath = open("%s" % filepath, "r")
file_contents = fh_filepath.read()
fh_filepath.close()
except IOError:
register_exception()
file_contents = ""
else:
file_contents = ""
return file_contents
diff --git a/invenio/legacy/websubmit/functions/Stamp_Uploaded_Files.py b/invenio/legacy/websubmit/functions/Stamp_Uploaded_Files.py
index 3404c1a21..e5bc15365 100644
--- a/invenio/legacy/websubmit/functions/Stamp_Uploaded_Files.py
+++ b/invenio/legacy/websubmit/functions/Stamp_Uploaded_Files.py
@@ -1,521 +1,521 @@
## This file is part of Invenio.
## Copyright (C) 2008, 2010, 2011 CERN.
##
## Invenio is free software; you can redistribute it and/or
## modify it under the terms of the GNU General Public License as
## published by the Free Software Foundation; either version 2 of the
## License, or (at your option) any later version.
##
## Invenio is distributed in the hope that it will be useful, but
## WITHOUT ANY WARRANTY; without even the implied warranty of
## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
## General Public License for more details.
##
## You should have received a copy of the GNU General Public License
## along with Invenio; if not, write to the Free Software Foundation, Inc.,
## 59 Temple Place, Suite 330, Boston, MA 02111-1307, USA.
"""Stamp_Uploaded_Files: A WebSubmit Function whose job is to stamp given
files that were uploaded during a submission.
"""
__revision__ = "$Id$"
-from invenio.errorlib import register_exception
+from invenio.ext.logging import register_exception
from invenio import websubmit_file_stamper
from invenio.legacy.websubmit.config import InvenioWebSubmitFunctionWarning, \
InvenioWebSubmitFunctionError, InvenioWebSubmitFileStamperError
import os.path, shutil, re
def Stamp_Uploaded_Files(parameters, curdir, form, user_info=None):
"""
Stamp certain files that have been uploaded during a submission.
@param parameters: (dictionary) - must contain:
+ latex_template: (string) - the name of the LaTeX template that
should be used for the creation of the stamp.
+ latex_template_vars: (string) - a string-ified dictionary
of variables to be replaced in the LaTeX template and the
values (or names of files in curdir containing the values)
with which to replace them. Use prefix 'FILE:' to specify
that the stamped value must be read from a file in
submission directory instead of being a fixed value to
stamp.
E.G.:
{ 'TITLE' : 'FILE:DEMOTHESIS_TITLE',
'DATE' : 'FILE:DEMOTHESIS_DATE'
}
+ files_to_be_stamped: (string) - The directories in which files
should be stamped: This is a comma-separated list of directory
names. E.g.:
DEMOTHESIS_MAIN,DEMOTHESIS_ADDITIONAL
(If you use Create_Upload_Files_Interface function, you
should know that uploaded files goes under a subdirectory
'updated/' of the /files/ folder in submission directory:
in this case you have to specify this component in the
parameter. For eg:
updated/DEMOTHESIS_MAIN,updated/DEMOTHESIS_ADDITIONAL)
+ stamp: (string) - the type of stamp to be applied to the files.
should be one of:
+ first (only the first page is stamped);
+ all (all pages are stamped);
+ coverpage (a separate cover-page is added to the file as a
first page);
+ layer: (string) - the position of the stamp. Should be one of:
+ background (invisible if original file has a white -
not transparent- background layer)
+ foreground (on top of the stamped file. If the stamp
does not have a transparent background,
will hide all of the document layers)
+ switch_file: (string) - when this value is set, specifies
the name of a file that will swith on/off the
stamping. The 'switch_file' must contain the names defined
in 'files_to_be_stamped' (comma-separated values). Stamp
will be applied only to files referenced in the
switch_file. No stamp is applied if the switch_file is
missing from the submission directory.
However if the no switch_file is defined in this variable
(parameter is left empty), stamps are applied according
the variable 'files_to_be_stamped'.
Useful for eg. if you want to let your users control the
stamping with a checkbox on your submission page.
If all goes according to plan, for each directory in which files are to
be stamped, the original, unstamped files should be found in a
directory 'files_before_stamping/DIRNAME', and the stamped versions
should be found under 'files/DIRNAME'. E.g., for DEMOTHESIS_Main:
- Unstamped: files_before_stamping/DEMOTHESIS_Main
- Stamped: files/DEMOTHESIS_Main
"""
## The file stamper needs to be called with a dictionary of options of
## the following format:
## { 'latex-template' : "", ## TEMPLATE_NAME
## 'latex-template-var' : {}, ## TEMPLATE VARIABLES
## 'input-file' : "", ## INPUT FILE
## 'output-file' : "", ## OUTPUT FILE
## 'stamp' : "", ## STAMP TYPE
## 'layer' : "", ## LAYER TO STAMP
## 'verbosity' : 0, ## VERBOSITY (we don't care about it)
## }
file_stamper_options = { 'latex-template' : "",
'latex-template-var' : { },
'input-file' : "",
'output-file' : "",
'stamp' : "",
'layer' : "",
'verbosity' : 0,
}
## A dictionary of arguments to be passed to visit_for_stamping:
visit_for_stamping_arguments = { 'curdir' : curdir,
'file_stamper_options' : \
file_stamper_options,
'user_info' : user_info
}
## Start by getting the parameter-values from WebSubmit:
## The name of the LaTeX template to be used for stamp creation:
latex_template = "%s" % ((type(parameters['latex_template']) is str \
and parameters['latex_template']) or "")
## A string containing the variables/values that should be substituted
## in the final (working) LaTeX template:
latex_template_vars_string = "%s" % \
((type(parameters['latex_template_vars']) is str \
and parameters['latex_template_vars']) or "")
## The type of stamp to be applied to the file(s):
stamp = "%s" % ((type(parameters['stamp']) is str and \
parameters['stamp'].lower()) or "")
## The layer to use for stamping:
try:
layer = parameters['layer']
except KeyError:
layer = "background"
if not layer in ('background', 'foreground'):
layer = "background"
## The directories in which files should be stamped:
## This is a comma-separated list of directory names. E.g.:
## DEMOTHESIS_MAIN,DEMOTHESIS_ADDITIONAL
stamp_content_of = "%s" % ((type(parameters['files_to_be_stamped']) \
is str and parameters['files_to_be_stamped']) \
or "")
## Now split the list (of directories in which to stamp files) on commas:
if stamp_content_of.strip() != "":
stamping_locations = stamp_content_of.split(",")
else:
stamping_locations = []
## Check if stamping is enabled
switch_file = parameters.get('switch_file', '')
if switch_file:
# Good, a "switch file" was specified. Check if it exists, and
# it its value is not empty.
switch_file_content = ''
try:
fd = file(os.path.join(curdir, switch_file))
switch_file_content = fd.read().split(',')
fd.close()
except:
switch_file_content = ''
if not switch_file_content:
# File does not exist, or is emtpy. Silently abort
# stamping.
return ""
else:
stamping_locations = [location for location in stamping_locations \
if location in switch_file_content]
if len(stamping_locations) == 0:
## If there are no items to be stamped, don't continue:
return ""
## Strip the LaTeX filename into the basename (All templates should be
## in the template repository):
latex_template = os.path.basename(latex_template)
## Convert the string of latex template variables into a dictionary
## of search-term/replacement-term pairs:
latex_template_vars = get_dictionary_from_string(latex_template_vars_string)
## For each of the latex variables, check in `CURDIR' for a file with that
## name. If found, use it's contents as the template-variable's value.
## If not, just use the raw value string already held by the template
## variable:
latex_template_varnames = latex_template_vars.keys()
for varname in latex_template_varnames:
## Get this variable's value:
varvalue = latex_template_vars[varname].strip()
if not ((varvalue.find("date(") == 0 and varvalue[-1] == ")") or \
(varvalue.find("include(") == 0 and varvalue[-1] == ")")) \
and varvalue != "":
## We don't want to interfere with date() or include() directives,
## so we only do this if the variable value didn't contain them:
##
## Is this variable value the name of a file in the current
## submission's working directory, from which a literal value for
## use in the template should be extracted? If yes, it will
## begin with "FILE:". If no, we leave the value exactly as it is.
if varvalue.upper().find("FILE:") == 0:
## The value to be used is to be taken from a file. Clean the
## file name and if it's OK, extract that value from the file.
##
seekvalue_fname = varvalue[5:].strip()
seekvalue_fname = os.path.basename(seekvalue_fname).strip()
if seekvalue_fname != "":
## Attempt to extract the value from the file:
if os.access("%s/%s" % (curdir, seekvalue_fname), \
os.R_OK|os.F_OK):
## The file exists. Extract its value:
try:
repl_file_val = \
open("%s/%s" \
% (curdir, seekvalue_fname), "r").readlines()
except IOError:
## The file was unreadable.
err_msg = "Error in Stamp_Uploaded_Files: The " \
"function attempted to read a LaTex " \
"template variable value from the " \
"following file in the current " \
"submission's working directory: " \
"[%s]. However, an unexpected error " \
"was encountered when doing so. " \
"Please inform the administrator." \
% seekvalue_fname
register_exception(req=user_info['req'])
raise InvenioWebSubmitFunctionError(err_msg)
else:
final_varval = ""
for line in repl_file_val:
final_varval += line
final_varval = final_varval.rstrip()
## Replace the variable value with that which has
## been read from the file:
latex_template_vars[varname] = final_varval
else:
## The file didn't actually exist in the current
## submission's working directory. Use an empty
## value:
latex_template_vars[varname] = ""
else:
## The filename was not valid.
err_msg = "Error in Stamp_Uploaded_Files: The function " \
"was configured to read a LaTeX template " \
"variable from a file with the following " \
"instruction: [%s --> %s]. The filename, " \
"however, was not considered valid. Please " \
"report this to the administrator." \
% (varname, varvalue)
raise InvenioWebSubmitFunctionError(err_msg)
## Put the 'fixed' values into the file_stamper_options dictionary:
file_stamper_options['latex-template'] = latex_template
file_stamper_options['latex-template-var'] = latex_template_vars
file_stamper_options['stamp'] = stamp
file_stamper_options['layer'] = layer
for stampdir in stamping_locations:
## Create the full path to the stamp directory - it is considered
## to be under 'curdir' - the working directory for the current
## submission:
path_to_stampdir = "%s/files/%s" % (curdir, stampdir.strip())
## Call os.path.walk, passing it the path to the directory to be
## walked, the visit_for_stamping function (which will call the
## file-stamper for each file within that directory), and the
## dictionary of options to be passed to the file-stamper:
try:
os.path.walk(path_to_stampdir, \
visit_for_stamping, \
visit_for_stamping_arguments)
except InvenioWebSubmitFunctionWarning:
## Unable to stamp the files in stampdir. Register the exception
## and continue to try to stamp the files in the other stampdirs:
## FIXME - The original exception was registered in 'visit'.
## Perhaps we should just send the message contained in this
## warning to the admin?
register_exception(req=user_info['req'])
continue
except InvenioWebSubmitFunctionError, err:
## Unexpected error in stamping. The admin should be contacted
## because it has resulted in an unstable situation with the
## files. They are no longer in a well-defined state - some may
## have been lost and manual intervention by the admin is needed.
## FIXME - should this be reported here, or since we propagate it
## up to websubmit_engine anyway, should we let it register it?
register_exception(req=user_info['req'])
raise err
return ""
def visit_for_stamping(visit_for_stamping_arguments, dirname, filenames):
"""Visitor function called by os.path.walk.
This function takes a directory and a list of files in that directory
and for each file, calls the websubmit_file_stamper on it.
When a file is stamped, the original is moved away into a directory
of unstamped files and the new, stamped version is moved into its
place.
@param visit_for_stamping_arguments: (dictionary) of arguments needed
by this function. Must contain 'curdir', 'user_info' and
'file_stamper_options' members.
@param dirname: (string) - the path to the directory in which the
files are to be stamped.
@param filenames: (list) - the names of each file in dirname. An
attempt will be made to stamp each of these files.
@Exceptions Raised:
+ InvenioWebSubmitFunctionWarning;
+ InvenioWebSubmitFunctionError;
"""
## Get the dictionary of options to pass to the stamper:
file_stamper_options = visit_for_stamping_arguments['file_stamper_options']
## Create a directory to store original files before stamping:
dirname_files_pre_stamping = dirname.replace("/files/", \
"/files_before_stamping/", 1)
if not os.path.exists(dirname_files_pre_stamping):
try:
os.makedirs(dirname_files_pre_stamping)
except OSError, err:
## Unable to make a directory in which to store the unstamped
## files.
## Register the exception:
exception_prefix = "Unable to stamp files in [%s]. Couldn't " \
"create directory in which to store the " \
"original, unstamped files." \
% dirname
register_exception(prefix=exception_prefix)
## Since we can't make a directory for the unstamped files,
## we can't continue to stamp them.
## Skip the stamping of the contents of this directory by raising
## a warning:
msg = "Warning: A problem occurred when stamping files in [%s]. " \
"Unable to create directory to store the original, " \
"unstamped files. Got this error: [%s]. This means the " \
"files in this directory were not stamped." \
% (dirname, str(err))
raise InvenioWebSubmitFunctionWarning(msg)
## Loop through each file in the directory and attempt to stamp it:
for file_to_stamp in filenames:
## Get the path to the file to be stamped and put it into the
## dictionary of options that will be passed to stamp_file:
path_to_subject_file = "%s/%s" % (dirname, file_to_stamp)
file_stamper_options['input-file'] = path_to_subject_file
## Just before attempting to stamp the file, log the dictionary of
## options (file_stamper_options) that will be passed to websubmit-
## file-stamper:
try:
fh_log = open("%s/websubmit_file_stamper-calls-options.log" \
% visit_for_stamping_arguments['curdir'], "a+")
fh_log.write("%s\n" % file_stamper_options)
fh_log.flush()
fh_log.close()
except IOError:
## Unable to log the file stamper options.
exception_prefix = "Unable to write websubmit_file_stamper " \
"options to log file " \
"%s/websubmit_file_stamper-calls-options.log" \
% visit_for_stamping_arguments['curdir']
register_exception(prefix=exception_prefix)
try:
## Try to stamp the file:
(stamped_file_path_only, stamped_file_name) = \
websubmit_file_stamper.stamp_file(file_stamper_options)
except InvenioWebSubmitFileStamperError:
## It wasn't possible to stamp this file.
## Register the exception along with an informational message:
exception_prefix = "A problem occurred when stamping [%s]. The " \
"stamping of this file was unsuccessful." \
% path_to_subject_file
register_exception(prefix=exception_prefix)
## Skip this file, moving on to the next:
continue
else:
## Stamping was successful.
path_to_stamped_file = "%s/%s" % (stamped_file_path_only, \
stamped_file_name)
## Move the unstamped file from the "files" directory into the
## "files_before_stamping" directory:
try:
shutil.move(path_to_subject_file, "%s/%s" \
% (dirname_files_pre_stamping, file_to_stamp))
except IOError:
## Couldn't move the original file away from the "files"
## directory. Log the problem and continue on to the next
## file:
exception_prefix = "A problem occurred when stamping [%s]. " \
"The file was sucessfully stamped, and " \
"can be found here: [%s]. Unfortunately " \
"though, it could not be copied back to " \
"the current submission's working " \
"directory because the unstamped version " \
"could not be moved out of the way (tried " \
"to move it from here [%s] to here: " \
"[%s/%s]). The stamping of this file was " \
"unsuccessful." \
% (path_to_subject_file, \
path_to_stamped_file, \
path_to_subject_file, \
dirname_files_pre_stamping, \
file_to_stamp)
register_exception(prefix=exception_prefix)
continue
else:
## The original file has been moved into the files before
## stamping directory. Now try to copy the stamped file into
## the files directory:
try:
shutil.copy(path_to_stamped_file, "%s/%s" \
% (dirname, file_to_stamp))
except IOError:
## Even though the original, unstamped file was moved away
## from the files directory, the stamped-version couldn't
## be moved into its place. Register the exception:
exception_prefix = "A problem occurred when stamping " \
"[%s]. The file was sucessfully " \
"stamped, and can be found here: " \
"[%s]. Unfortunately though, it " \
"could not be copied back to the " \
"current submission's working " \
"directory." % (path_to_subject_file, \
path_to_stamped_file)
register_exception(prefix=exception_prefix)
## Because it wasn't possible to move the stamped file
## into the files directory, attempt to move the original,
## unstamped file back into the files directory:
try:
shutil.move("%s/%s" % (dirname_files_pre_stamping, \
file_to_stamp), \
path_to_stamped_file)
except IOError, err:
## It wasn't possible even to move the original file
## back to the files directory. Register the
## exception and stop the stamping process - it isn't
## safe to continue:
exeption_prefix = "A problem occurred when stamping " \
"[%s]. The file was sucessfully " \
"stamped, and can be found here: " \
"[%s]. Unfortunately though, it " \
"could not be copied back to the " \
"current submission's working " \
"directory. Additionionally, the " \
"original, unstamped file " \
"could not be moved back to the " \
"files directory, from the files-" \
"before-stamping directory. It " \
"can now be found here: [%s/%s]. " \
"Stamping cannot continue and " \
"manual intervention is necessary " \
"because the file cannot be " \
"attached to the record." \
% (path_to_subject_file, \
path_to_stamped_file, \
dirname_files_pre_stamping, \
file_to_stamp)
register_exception(prefix=exeption_prefix)
## Raise an InvenioWebSubmitFunctionError, stopping
## further stamping, etc:
raise InvenioWebSubmitFunctionError(exception_prefix)
def get_dictionary_from_string(dict_string):
"""Given a string version of a "dictionary", split the string into a
python dictionary.
For example, given the following string:
{'TITLE' : 'EX_TITLE', 'AUTHOR' : 'EX_AUTHOR', 'REPORTNUMBER' : 'EX_RN'}
A dictionary in the following format will be returned:
{
'TITLE' : 'EX_TITLE',
'AUTHOR' : 'EX_AUTHOR',
'REPORTNUMBER' : 'EX_RN',
}
@param dict_string: (string) - the string version of the dictionary.
@return: (dictionary) - the dictionary build from the string.
"""
## First, strip off the leading and trailing spaces and braces:
dict_string = dict_string.strip(" {}")
## Next, split the string on commas (,) that have not been escaped
## So, the following string: """'hello' : 'world', 'click' : 'here'"""
## will be split into the following list:
## ["'hello' : 'world'", " 'click' : 'here'"]
##
## However, the string """'hello\, world' : '!', 'click' : 'here'"""
## will be split into: ["'hello\, world' : '!'", " 'click' : 'here'"]
## I.e. the comma that was escaped in the string has been kept.
##
## So basically, split on unescaped parameters at first:
key_vals = re.split(r'(?<!\\),', dict_string)
## Now we should have a list of "key" : "value" terms. For each of them,
## check it is OK. If not in the format "Key" : "Value" (quotes are
## optional), discard it. As with the comma separator in the previous
## splitting, this one splits on the first colon (:) ONLY.
final_dictionary = {}
for key_value_string in key_vals:
## Split the pair apart, based on the first ":":
key_value_pair = key_value_string.split(":", 1)
## check that the length of the new list is 2:
if len(key_value_pair) != 2:
## There was a problem with the splitting - pass this pair
continue
## The split was made.
## strip white-space, single-quotes and double-quotes from around the
## key and value pairs:
key_term = key_value_pair[0].strip(" '\"")
value_term = key_value_pair[1].strip(" '\"")
## Is the left-side (key) term empty?
if len(key_term) == 0:
continue
## Now, add the search-replace pair to the dictionary of
## search-replace terms:
final_dictionary[key_term] = value_term
return final_dictionary
diff --git a/invenio/legacy/websubmit/functions/Video_Processing.py b/invenio/legacy/websubmit/functions/Video_Processing.py
index 322c97fce..02a7c9ae6 100644
--- a/invenio/legacy/websubmit/functions/Video_Processing.py
+++ b/invenio/legacy/websubmit/functions/Video_Processing.py
@@ -1,114 +1,114 @@
## This file is part of Invenio.
## Copyright (C) 2011 CERN.
##
## Invenio is free software; you can redistribute it and/or
## modify it under the terms of the GNU General Public License as
## published by the Free Software Foundation; either version 2 of the
## License, or (at your option) any later version.
##
## Invenio is distributed in the hope that it will be useful, but
## WITHOUT ANY WARRANTY; without even the implied warranty of
## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
## General Public License for more details.
##
## You should have received a copy of the GNU General Public License
## along with Invenio; if not, write to the Free Software Foundation, Inc.,
## 59 Temple Place, Suite 330, Boston, MA 02111-1307, USA.
"""WebSubmit function - Video processing.
"""
__revision__ = "$Id$"
import os
from invenio.utils.json import json_decode_file
-from invenio.errorlib import register_exception
+from invenio.ext.logging import register_exception
from invenio.modules.encoder.config import CFG_BIBENCODE_TEMPLATE_BATCH_SUBMISSION
from invenio.modules.encoder.utils import generate_timestamp
from invenio.modules.encoder.batch_engine import create_job_from_dictionary
from invenio.config import CFG_SITE_ADMIN_EMAIL
def Video_Processing(parameters, curdir, form, user_info=None):
"""
Perform all the required processing of the video.
Parameters are:
* "batch_template": to specify the absolute path to a
configuration describe which manipulation should the uploaded file
receive. If empty, will use by default
etc/bibencode/batch_template_submission.json
* "aspect": to specify in which form element the aspect will be available
* "title": to specify in which form element the title will be available
"""
## Read the batch template for submissions
if parameters.get('batch_template'):
try:
batch_template = json_decode_file(parameters.get('batch_template'))
except:
register_exception(prefix="The given batch template was not readable")
raise
else:
batch_template = json_decode_file(CFG_BIBENCODE_TEMPLATE_BATCH_SUBMISSION)
## Handle the filepath
file_storing_path = os.path.join(curdir, "files", str(user_info['uid']), "NewFile", 'filepath')
try:
fp = open(file_storing_path)
fullpath = fp.read()
fp.close()
batch_template['input'] = fullpath
except:
register_exception(prefix="The file containing the path to the video was not readable")
raise
## Handle the filename
file_storing_name = os.path.join(curdir, "files", str(user_info['uid']), "NewFile", 'filename')
try:
fp = open(file_storing_name)
filename = fp.read()
fp.close()
batch_template['bibdoc_master_docname'] = os.path.splitext(os.path.split(filename)[1])[0]
batch_template['bibdoc_master_extension'] = os.path.splitext(filename)[1]
batch_template['submission_filename'] = filename
except:
register_exception(prefix="The file containing the original filename of the video was not readable")
raise
## Handle the aspect ratio
if parameters.get('aspect'):
try:
file_storing_aspect = os.path.join(curdir, parameters.get('aspect'))
fp = open(file_storing_aspect)
aspect = fp.read()
fp.close()
batch_template['aspect'] = aspect
except:
register_exception(prefix="The file containing the ascpect ratio of the video was not readable")
raise
else:
batch_template['aspect'] = None
## Handle the title
if parameters.get('title'):
try:
file_storing_title = os.path.join(curdir, parameters['title'])
fp = open(file_storing_title)
title = fp.read()
fp.close()
except:
register_exception(prefix="The file containing the title of the video was not readable")
raise
else:
batch_template['submission_title'] = None
## Set the rest
batch_template['notify_admin'] = CFG_SITE_ADMIN_EMAIL
batch_template['notify_user'] = user_info['email']
batch_template['recid'] = sysno
timestamp = generate_timestamp()
job_filename = "submission_%d_%s.job" % (sysno, timestamp)
create_job_from_dictionary(batch_template, job_filename)
diff --git a/invenio/legacy/websubmit/functions/Withdraw_Approval_Request.py b/invenio/legacy/websubmit/functions/Withdraw_Approval_Request.py
index 3df0ed301..97d7ad609 100644
--- a/invenio/legacy/websubmit/functions/Withdraw_Approval_Request.py
+++ b/invenio/legacy/websubmit/functions/Withdraw_Approval_Request.py
@@ -1,388 +1,388 @@
## This file is part of Invenio.
## Copyright (C) 2008, 2010, 2011 CERN.
##
## Invenio is free software; you can redistribute it and/or
## modify it under the terms of the GNU General Public License as
## published by the Free Software Foundation; either version 2 of the
## License, or (at your option) any later version.
##
## Invenio is distributed in the hope that it will be useful, but
## WITHOUT ANY WARRANTY; without even the implied warranty of
## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
## General Public License for more details.
##
## You should have received a copy of the GNU General Public License
## along with Invenio; if not, write to the Free Software Foundation, Inc.,
## 59 Temple Place, Suite 330, Boston, MA 02111-1307, USA.
"""At the time of a "withdraw approval request " submission, register the
withdrawal of the request in the WebSubmit "Approvals" DB (sbmAPPROVAL).
"""
__revision__ = "$Id$"
import time
import sre_constants
import os
import cgi
import re
from invenio.websubmit_dblayer import get_simple_approval_status, \
update_approval_request_status
from invenio.legacy.websubmit.functions.Shared_Functions import ParamFromFile
from invenio.legacy.websubmit.config import InvenioWebSubmitFunctionError, \
InvenioWebSubmitFunctionStop
-from invenio.errorlib import register_exception
+from invenio.ext.logging import register_exception
from invenio.config import CFG_SITE_SUPPORT_EMAIL
def Withdraw_Approval_Request(parameters, curdir, form, user_info=None):
"""
This function is used in a "withdraw approval request" submission
in order to register the withdral of the request in the WebSubmit
"Approvals" DB (sbmAPPROVAL).
At the time of the approval request withdrawal, the document could be
in one of several different approval "states" and depending upon that
state, the action taken by this function differs. The states are as
follows:
* Approval of the document has previously been requested and it is
still in the "waiting" state.
-> In this case, the status of the document in the sbmAPPROVAL
table is set to "withdrawn".
* Approval for the document has never been requested.
-> In this case, there is nothing to do.
* Approval of the document has previously been requested, but the
document was rejected.
-> In this case, it's too late to withdraw the approval request
and there is nothing left to do.
* Approval of the document has previously been requested and it has
been approved.
-> In this case, it's too late to withdraw the approval request
and there is nothing left to do.
* Approval of the document has previously been requested, but the
request withdrawn.
-> In this case, there is nothing to do.
@param categ_file_withd: (string) - some document types are
separated into different categories, each of which has its own
referee(s).
In such document types, it's necessary to know the document-
type's category in order to choose the referee.
This parameter provides a means by which the category information
can be extracted from a file in the current submission's working
directory. It should therefore be a filename.
@param categ_rnseek_withd: (string) - some document types are
separated into different categories, each of which has its own
referee(s).
In such document types, it's necessary to know the document-
type's category in order to choose the referee.
This parameter provides a means by which the category information
can be extracted from the document's reference number.
It is infact a string that will be compiled into a regexp and
an attempt will be made to match it agains the document's reference
number starting from the left-most position.
The only pre-requisite is that the segment in which the category is
sought should be indicated with <CATEGORY>.
Thus, an example might be as follows:
ATL(-COM)?-<CATEGORY>-.+
This would allow "PHYS" in the following reference number to be
recognised as the category:
ATL-COM-PHYS-2008-001
@return: (string) - a message for the user.
@Exceptions raised: + InvenioWebSubmitFunctionStop when the submission
should be halted.
+ InvenioWebSubmitFunctionError when an unexpected
error has been encountered and execution cannot
continue.
"""
## Get the reference number (as global rn - sorry!) and the document type:
global rn
doctype = form['doctype']
## A string variable to contain any information that should be displayed
## in the user's browser:
info_out = ""
########
## Get the parameters from the list:
########
## Get the name of the category file:
#######
try:
## If it has been provided, get the name of the file in which the
## category is stored:
category_file = parameters["categ_file_withd"]
except KeyError:
## No value given for the category file:
category_file = None
else:
if category_file is not None:
category_file = str(category_file)
category_file = os.path.basename(category_file).strip()
if category_file == "":
category_file = None
########
## Get the regexp that is used to find the category in the report number:
########
try:
## If it has been provided, get the regexp used for identifying
## a document-type's category from its reference number:
category_rn_regexp = parameters["categ_rnseek_withd"]
except KeyError:
## No value given for the category regexp:
category_rn_regexp = None
else:
if category_rn_regexp is not None:
category_rn_regexp = str(category_rn_regexp).strip()
if category_rn_regexp == "":
category_rn_regexp = None
#######
## Resolve the document type's category:
##
## This is a long process. The end result is that the category is extracted
## either from a file in curdir, or from the report number.
## If it's taken from the report number, the admin must configure the
## function to accept a regular expression that is used to find the
## category in the report number.
##
if category_file is not None and category_rn_regexp is not None:
## It is not valid to have both a category file and a pattern
## describing how to extract the category from a report number.
## raise an InvenioWebSubmitFunctionError
msg = "Error in Withdraw_Approval_Request function: received " \
"instructions to search for the document's category in " \
"both its report number AND in a category file. Could " \
"not determine which to use - please notify " \
"%(suppt-email)s." \
% { 'suppt-email' : cgi.escape(CFG_SITE_SUPPORT_EMAIL), }
raise InvenioWebSubmitFunctionError(msg)
elif category_file is not None:
## Attempt to recover the category information from a file in the
## current submission's working directory:
category = ParamFromFile("%s/%s" % (curdir, category_file))
if category is not None:
category = category.strip()
if category in (None, ""):
## The category cannot be resolved.
msg = "Error in Withdraw_Approval_Request function: received " \
"instructions to search for the document's category in " \
"a category file, but could not recover the category " \
"from that file. The approval request therefore cannot " \
"be withdrawn for the document. Please report this " \
"problem to %(suppt-email)s." \
% { 'suppt-email' : cgi.escape(CFG_SITE_SUPPORT_EMAIL), }
raise InvenioWebSubmitFunctionError(msg)
elif category_rn_regexp is not None:
## Attempt to recover the category information from the document's
## reference number using the regexp in category_rn_regexp:
##
## Does the category regexp contain the key-phrase "<CATEG>"?
if category_rn_regexp.find("<CATEG>") != -1:
## Yes. Replace "<CATEG>" with "(?P<category>.+?)".
## For example, this:
## ATL(-COM)?-<CATEG>-
## Will be transformed into this:
## ATL(-COM)?-(?P<category>.+?)-
category_rn_final_regexp = \
category_rn_regexp.replace("<CATEG>", r"(?P<category>.+?)", 1)
else:
## The regexp for category didn't contain "<CATEG>", but this is
## mandatory.
msg = "Error in Withdraw_Approval_Request function: The " \
"[%(doctype)s] submission has been configured to search " \
"for the document type's category in its reference number, " \
"using a poorly formed search expression (no marker for " \
"the category was present.) Since the document's category " \
"therefore cannot be retrieved, its approval request " \
"cannot be withdrawn. Please report this problem to " \
"%(suppt-email)s." \
% { 'doctype' : cgi.escape(doctype),
'suppt-email' : cgi.escape(CFG_SITE_SUPPORT_EMAIL), }
raise InvenioWebSubmitFunctionError(msg)
##
try:
## Attempt to compile the regexp for finding the category:
re_categ_from_rn = re.compile(category_rn_final_regexp)
except sre_constants.error:
## The expression passed to this function could not be compiled
## into a regexp. Register this exception and raise an
## InvenioWebSubmitFunctionError:
exception_prefix = "Error in Withdraw_Approval_Request function: " \
"The [%(doctype)s] submission has been " \
"configured to search for the document type's " \
"category in its reference number, using the " \
"following regexp: /%(regexp)s/. This regexp, " \
"however, could not be compiled correctly " \
"(created it from %(categ-search-term)s.)" \
% { 'doctype' : doctype, \
'regexp' : category_rn_final_regexp, \
'categ-search-term' : category_rn_regexp, }
register_exception(prefix=exception_prefix)
msg = "Error in Withdraw_Approval_Request function: The " \
"[%(doctype)s] submission has been configured to search " \
"for the document type's category in its reference number, " \
"using a poorly formed search expression. Since the " \
"document's category therefore cannot be retrieved, its " \
"approval request cannot be withdrawn. Please " \
"report this problem to %(suppt-email)s." \
% { 'doctype' : cgi.escape(doctype),
'suppt-email' : cgi.escape(CFG_SITE_SUPPORT_EMAIL), }
raise InvenioWebSubmitFunctionError(msg)
else:
## Now attempt to recover the category from the RN string:
m_categ_from_rn = re_categ_from_rn.match(rn)
if m_categ_from_rn is not None:
## The pattern matched in the string.
## Extract the category from the match:
try:
category = m_categ_from_rn.group("category")
except IndexError:
## There was no "category" group. That group is mandatory.
exception_prefix = \
"Error in Withdraw_Approval_Request function: The " \
"[%(doctype)s] submission has been configured to " \
"search for the document type's category in its " \
"reference number using the following regexp: " \
"/%(regexp)s/. The search produced a match, but " \
"there was no \"category\" group in the match " \
"object although this group is mandatory. The " \
"regexp was compiled from the following string: " \
"[%(categ-search-term)s]." \
% { 'doctype' : doctype, \
'regexp' : category_rn_final_regexp, \
'categ-search-term' : category_rn_regexp, }
register_exception(prefix=exception_prefix)
msg = "Error in Withdraw_Approval_Request function: The " \
"[%(doctype)s] submission has been configured to " \
"search for the document type's category in its " \
"reference number, using a poorly formed search " \
"expression (there was no category marker). Since " \
"the document's category therefore cannot be " \
"retrieved, its approval request cannot be " \
"withdrawn. Please report this problem to " \
"%(suppt-email)s." \
% { 'doctype' : cgi.escape(doctype),
'suppt-email' : \
cgi.escape(CFG_SITE_SUPPORT_EMAIL),}
raise InvenioWebSubmitFunctionError(msg)
else:
category = category.strip()
if category == "":
msg = "Error in Withdraw_Approval_Request function: " \
"The [%(doctype)s] submission has been " \
"configured to search for the document type's " \
"category in its reference number, but no " \
"category was found. The request for approval " \
"cannot be withdrawn. Please report this " \
"problem to %(suppt-email)s." \
% { 'doctype' : cgi.escape(doctype),
'suppt-email' : \
cgi.escape(CFG_SITE_SUPPORT_EMAIL),}
raise InvenioWebSubmitFunctionError(msg)
else:
## No match. Cannot find the category and therefore cannot
## continue:
msg = "Error in Withdraw_Approval_Request function: The " \
"[%(doctype)s] submission has been configured to " \
"search for the document type's category in its " \
"reference number, but no match was made. The request " \
"for approval cannot be withdrawn. Please report " \
"this problem to %(suppt-email)s." \
% { 'doctype' : cgi.escape(doctype),
'suppt-email' : cgi.escape(CFG_SITE_SUPPORT_EMAIL),}
raise InvenioWebSubmitFunctionError(msg)
else:
## The document type has no category.
category = ""
##
## End of category recovery
#######
#######
##
## Query the "approvals" DB table to determine whether approval of this
## document has already been requested:
approval_status = get_simple_approval_status(doctype, rn)
if approval_status is None:
## Approval has never been requested for this document.
## One cannot withdraw an approval request that was never made.
msg = """
<br />
<div>
<span style="color: red;">Note:</span> A request for the approval of the
document [%s] has never been made.<br />
There is nothing to do.
</div>
""" % cgi.escape(rn)
raise InvenioWebSubmitFunctionStop(msg)
elif approval_status.lower() == "approved":
## This document has already been approved. It's too late to withdraw
## the approval request.
msg = """
<br />
<div>
<span style="color: red;">Note:</span> The document [%s] has already been
approved.<br />
It is too late to withdraw the approval request.<br />
If you believe this to be an error, please contact %s, quoting the<br />
document's report-number [%s] and describing the problem.
</div>
""" % (cgi.escape(rn), cgi.escape(CFG_SITE_SUPPORT_EMAIL), cgi.escape(rn))
raise InvenioWebSubmitFunctionStop(msg)
elif approval_status.lower() == "rejected":
## This document has already been rejected. It's too late to withdraw
## the approval request.
msg = """
<br />
<div>
<span style="color: red;">Note:</span> The document [%s] has already been
rejected.<br />
It is too late to withdraw the approval request.<br />
If you believe this to be an error, please contact %s, quoting the<br />
document's report-number [%s] and describing the problem.
</div>
""" % (cgi.escape(rn), cgi.escape(CFG_SITE_SUPPORT_EMAIL), cgi.escape(rn))
raise InvenioWebSubmitFunctionStop(msg)
elif approval_status.lower() == "withdrawn":
## The approval request for this document has already been withdrawn.
msg = """
<br />
<div>
<span style="color: red;">Note:</span> The approval request for the document
[%s] has already been withdrawn.<br />
There is nothing to do.
</div>
""" % cgi.escape(rn)
raise InvenioWebSubmitFunctionStop(msg)
elif approval_status.lower() == "waiting":
## Mark the approval request as withdrawn:
note = "Withdrawn by [%s]: %s\n#####\n" \
% (cgi.escape(user_info['email']), \
cgi.escape(time.strftime("%d/%m/%Y %H:%M:%S", \
time.localtime())))
update_approval_request_status(doctype, \
rn, \
note=note, \
status="withdrawn")
info_out += """
<br />
<div>
The approval request for the document [%s] has been withdrawn.
</div>
""" % cgi.escape(rn)
else:
## The document had an unrecognised "status". Raise an error.
msg = "Error in Withdraw_Approval_Request function: The " \
"[%(reportnum)s] document has an unknown approval status " \
"(%(status)s). Unable to withdraw the request for its " \
"approval. Please report this problem to the %(suppt-email)s." \
% { 'reportnum' : cgi.escape(rn),
'status' : cgi.escape(approval_status),
'suppt-email' : cgi.escape(CFG_SITE_SUPPORT_EMAIL), }
raise InvenioWebSubmitFunctionError(msg)
##
## Finished - return any message to be displayed on the user's screen.
return info_out
diff --git a/invenio/legacy/websubmit/inveniounoconv.py b/invenio/legacy/websubmit/inveniounoconv.py
index 592ac4c0b..7006b4df4 100644
--- a/invenio/legacy/websubmit/inveniounoconv.py
+++ b/invenio/legacy/websubmit/inveniounoconv.py
@@ -1,1187 +1,1187 @@
#!@OPENOFFICE_PYTHON@
##
## This file is part of Invenio.
## Copyright (C) 2011, 2012 CERN.
##
## Invenio is free software; you can redistribute it and/or
## modify it under the terms of the GNU General Public License as
## published by the Free Software Foundation; either version 2 of the
## License, or (at your option) any later version.
##
## Invenio is distributed in the hope that it will be useful, but
## WITHOUT ANY WARRANTY; without even the implied warranty of
## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
## General Public License for more details.
##
## You should have received a copy of the GNU General Public License
## along with Invenio; if not, write to the Free Software Foundation, Inc.,
## 59 Temple Place, Suite 330, Boston, MA 02111-1307, USA.
"""
Run-Unoconv-as-nobody wrapper.
"""
### This program is free software; you can redistribute it and/or modify
### it under the terms of the GNU General Public License as published by
### the Free Software Foundation; version 2 only
###
### This program is distributed in the hope that it will be useful,
### but WITHOUT ANY WARRANTY; without even the implied warranty of
### MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
### GNU General Public License for more details.
###
### You should have received a copy of the GNU General Public License
### along with this program; if not, write to the Free Software
### Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
### Copyright 2007-2010 Dag Wieers <dag@wieers.com>
from distutils.version import LooseVersion
import getopt
import glob
import os
import subprocess
import sys
import time
import signal
import errno
from invenio.flaskshell import *
from invenio.websubmit_file_converter import CFG_OPENOFFICE_TMPDIR
CFG_SOFFICE_PID = os.path.join(CFG_OPENOFFICE_TMPDIR, 'soffice.pid')
__version__ = "$Revision$"
# $Source$
VERSION = '0.6'
doctypes = ('document', 'graphics', 'presentation', 'spreadsheet')
global convertor, office, ooproc, product
ooproc = None
exitcode = 0
class Office:
def __init__(self, basepath, urepath, unopath, pyuno, binary, python, pythonhome):
self.basepath = basepath
self.urepath = urepath
self.unopath = unopath
self.pyuno = pyuno
self.binary = binary
self.python = python
self.pythonhome = pythonhome
def __str__(self):
return self.basepath
def __repr__(self):
return self.basepath
### The first thing we ought to do is find a suitable Office installation
### with a compatible pyuno library that we can import.
###
### See: http://user.services.openoffice.org/en/forum/viewtopic.php?f=45&t=36370&p=166783
def find_offices():
ret = []
extrapaths = []
### Try using UNO_PATH first (in many incarnations, we'll see what sticks)
if 'UNO_PATH' in os.environ:
extrapaths += [ os.environ['UNO_PATH'],
os.path.dirname(os.environ['UNO_PATH']),
os.path.dirname(os.path.dirname(os.environ['UNO_PATH'])) ]
else:
if os.name in ( 'nt', 'os2' ):
if 'PROGRAMFILES' in os.environ.keys():
extrapaths += glob.glob(os.environ['PROGRAMFILES']+'\\LibreOffice*') + \
glob.glob(os.environ['PROGRAMFILES']+'\\OpenOffice.org*')
if 'PROGRAMFILES(X86)' in os.environ.keys():
extrapaths += glob.glob(os.environ['PROGRAMFILES(X86)']+'\\LibreOffice*') + \
glob.glob(os.environ['PROGRAMFILES(X86)']+'\\OpenOffice.org*')
elif os.name in ( 'mac', ) or sys.platform in ( 'darwin', ):
extrapaths += [ '/Applications/LibreOffice.app/Contents',
'/Applications/NeoOffice.app/Contents',
'/Applications/OpenOffice.org.app/Contents' ]
else:
extrapaths += glob.glob('/usr/lib*/libreoffice*') + \
glob.glob('/usr/lib*/openoffice*') + \
glob.glob('/usr/lib*/ooo*') + \
glob.glob('/opt/libreoffice*') + \
glob.glob('/opt/openoffice*') + \
glob.glob('/opt/ooo*') + \
glob.glob('/usr/local/libreoffice*') + \
glob.glob('/usr/local/openoffice*') + \
glob.glob('/usr/local/ooo*') + \
glob.glob('/usr/local/lib/libreoffice*')
### Find a working set for python UNO bindings
for basepath in extrapaths:
if os.name in ( 'nt', 'os2' ):
officelibraries = ( 'pyuno.pyd', )
officebinaries = ( 'soffice.exe' ,)
pythonbinaries = ( 'python.exe', )
pythonhomes = ()
elif os.name in ( 'mac', ) or sys.platform in ( 'darwin', ):
officelibraries = ( 'pyuno.so', 'pyuno.dylib' )
officebinaries = ( 'soffice.bin', )
pythonbinaries = ( 'python.bin', 'python' )
pythonhomes = ( 'OOoPython.framework/Versions/*/lib/python*', )
else:
officelibraries = ( 'pyuno.so', )
officebinaries = ( 'soffice.bin', )
pythonbinaries = ( 'python.bin', 'python', )
pythonhomes = ( 'python-core-*', )
### Older LibreOffice/OpenOffice and Windows use basis-link/ or basis/
libpath = 'error'
for basis in ( 'basis-link', 'basis', '' ):
for lib in officelibraries:
if os.path.isfile(os.path.join(basepath, basis, 'program', lib)):
libpath = os.path.join(basepath, basis, 'program')
officelibrary = os.path.join(libpath, lib)
info(3, "Found %s in %s" % (lib, libpath))
# Break the inner loop...
break
# Continue if the inner loop wasn't broken.
else:
continue
# Inner loop was broken, break the outer.
break
else:
continue
### MacOSX have soffice binaries installed in MacOS subdirectory, not program
unopath = 'error'
for basis in ( 'basis-link', 'basis', '' ):
for bin in officebinaries:
if os.path.isfile(os.path.join(basepath, basis, 'program', bin)):
unopath = os.path.join(basepath, basis, 'program')
officebinary = os.path.join(unopath, bin)
info(3, "Found %s in %s" % (bin, unopath))
# Break the inner loop...
break
# Continue if the inner loop wasn't broken.
else:
continue
# Inner loop was broken, break the outer.
break
else:
continue
### Windows/MacOSX does not provide or need a URE/lib directory ?
urepath = ''
for basis in ( 'basis-link', 'basis', '' ):
for ure in ( 'ure-link', 'ure', 'URE', '' ):
if os.path.isfile(os.path.join(basepath, basis, ure, 'lib', 'unorc')):
urepath = os.path.join(basepath, basis, ure)
info(3, "Found %s in %s" % ('unorc', os.path.join(urepath, 'lib')))
# Break the inner loop...
break
# Continue if the inner loop wasn't broken.
else:
continue
# Inner loop was broken, break the outer.
break
pythonhome = None
for home in pythonhomes:
if glob.glob(os.path.join(libpath, home)):
pythonhome = glob.glob(os.path.join(libpath, home))[0]
info(3, "Found %s in %s" % (home, pythonhome))
break
# if not os.path.isfile(os.path.join(basepath, program, officebinary)):
# continue
# info(3, "Found %s in %s" % (officebinary, os.path.join(basepath, program)))
# if not glob.glob(os.path.join(basepath, basis, program, 'python-core-*')):
# continue
for pythonbinary in pythonbinaries:
if os.path.isfile(os.path.join(unopath, pythonbinary)):
info(3, "Found %s in %s" % (pythonbinary, unopath))
ret.append(Office(basepath, urepath, unopath, officelibrary, officebinary,
os.path.join(unopath, pythonbinary), pythonhome))
else:
info(3, "Considering %s" % basepath)
ret.append(Office(basepath, urepath, unopath, officelibrary, officebinary,
sys.executable, None))
return ret
def office_environ(office):
### Set PATH so that crash_report is found
os.environ['PATH'] = os.path.join(office.basepath, 'program') + os.pathsep + os.environ['PATH']
### Set UNO_PATH so that "officehelper.bootstrap()" can find soffice executable:
os.environ['UNO_PATH'] = office.unopath
### Set URE_BOOTSTRAP so that "uno.getComponentContext()" bootstraps a complete
### UNO environment
if os.name in ( 'nt', 'os2' ):
os.environ['URE_BOOTSTRAP'] = 'vnd.sun.star.pathname:' + os.path.join(office.basepath, 'program', 'fundamental.ini')
else:
os.environ['URE_BOOTSTRAP'] = 'vnd.sun.star.pathname:' + os.path.join(office.basepath, 'program', 'fundamentalrc')
### Set LD_LIBRARY_PATH so that "import pyuno" finds libpyuno.so:
if 'LD_LIBRARY_PATH' in os.environ:
os.environ['LD_LIBRARY_PATH'] = office.unopath + os.pathsep + \
os.path.join(office.urepath, 'lib') + os.pathsep + \
os.environ['LD_LIBRARY_PATH']
else:
os.environ['LD_LIBRARY_PATH'] = office.unopath + os.pathsep + \
os.path.join(office.urepath, 'lib')
if office.pythonhome:
for libpath in ( os.path.join(office.pythonhome, 'lib'),
os.path.join(office.pythonhome, 'lib', 'lib-dynload'),
os.path.join(office.pythonhome, 'lib', 'lib-tk'),
os.path.join(office.pythonhome, 'lib', 'site-packages'),
office.unopath):
sys.path.insert(0, libpath)
else:
### Still needed for system python using LibreOffice UNO bindings
### Although we prefer to use a system UNO binding in this case
sys.path.append(office.unopath)
def debug_office():
if 'URE_BOOTSTRAP' in os.environ:
print >> sys.stderr, 'URE_BOOTSTRAP=%s' % os.environ['URE_BOOTSTRAP']
if 'UNO_PATH' in os.environ:
print >> sys.stderr, 'UNO_PATH=%s' % os.environ['UNO_PATH']
if 'UNO_TYPES' in os.environ:
print >> sys.stderr, 'UNO_TYPES=%s' % os.environ['UNO_TYPES']
print 'PATH=%s' % os.environ['PATH']
if 'PYTHONHOME' in os.environ:
print >> sys.stderr, 'PYTHONHOME=%s' % os.environ['PYTHONHOME']
if 'PYTHONPATH' in os.environ:
print >> sys.stderr, 'PYTHONPATH=%s' % os.environ['PYTHONPATH']
if 'LD_LIBRARY_PATH' in os.environ:
print >> sys.stderr, 'LD_LIBRARY_PATH=%s' % os.environ['LD_LIBRARY_PATH']
def python_switch(office):
if office.pythonhome:
os.environ['PYTHONHOME'] = office.pythonhome
os.environ['PYTHONPATH'] = os.path.join(office.pythonhome, 'lib') + os.pathsep + \
os.path.join(office.pythonhome, 'lib', 'lib-dynload') + os.pathsep + \
os.path.join(office.pythonhome, 'lib', 'lib-tk') + os.pathsep + \
os.path.join(office.pythonhome, 'lib', 'site-packages') + os.pathsep + \
office.unopath
os.environ['HOME'] = CFG_OPENOFFICE_TMPDIR
os.environ['UNO_PATH'] = office.unopath
info(3, "-> Switching from %s to %s" % (sys.executable, office.python))
if os.name in ('nt', 'os2'):
### os.execv is broken on Windows and can't properly parse command line
### arguments and executable name if they contain whitespaces. subprocess
### fixes that behavior.
ret = subprocess.call([office.python] + sys.argv[0:])
sys.exit(ret)
else:
### Set LD_LIBRARY_PATH so that "import pyuno" finds libpyuno.so:
if 'LD_LIBRARY_PATH' in os.environ:
os.environ['LD_LIBRARY_PATH'] = office.unopath + os.pathsep + \
os.path.join(office.urepath, 'lib') + os.pathsep + \
os.environ['LD_LIBRARY_PATH']
else:
os.environ['LD_LIBRARY_PATH'] = office.unopath + os.pathsep + \
os.path.join(office.urepath, 'lib')
try:
os.execvpe(office.python, [office.python, ] + sys.argv[0:], os.environ)
except OSError:
### Mac OS X versions prior to 10.6 do not support execv in
### a process that contains multiple threads. Instead of
### re-executing in the current process, start a new one
### and cause the current process to exit. This isn't
### ideal since the new process is detached from the parent
### terminal and thus cannot easily be killed with ctrl-C,
### but it's better than not being able to autoreload at
### all.
### Unfortunately the errno returned in this case does not
### appear to be consistent, so we can't easily check for
### this error specifically.
ret = os.spawnvpe(os.P_WAIT, office.python, [office.python, ] + sys.argv[0:], os.environ)
sys.exit(ret)
class Fmt:
def __init__(self, doctype, name, extension, summary, filter):
self.doctype = doctype
self.name = name
self.extension = extension
self.summary = summary
self.filter = filter
def __str__(self):
return "%s [.%s]" % (self.summary, self.extension)
def __repr__(self):
return "%s/%s" % (self.name, self.doctype)
class FmtList:
def __init__(self):
self.list = []
def add(self, doctype, name, extension, summary, filter):
self.list.append(Fmt(doctype, name, extension, summary, filter))
def byname(self, name):
ret = []
for fmt in self.list:
if fmt.name == name:
ret.append(fmt)
return ret
def byextension(self, extension):
ret = []
for fmt in self.list:
if os.extsep + fmt.extension == extension:
ret.append(fmt)
return ret
def bydoctype(self, doctype, name):
ret = []
for fmt in self.list:
if fmt.name == name and fmt.doctype == doctype:
ret.append(fmt)
return ret
def display(self, doctype):
print >> sys.stderr, "The following list of %s formats are currently available:\n" % doctype
for fmt in self.list:
if fmt.doctype == doctype:
print >> sys.stderr, " %-8s - %s" % (fmt.name, fmt)
print >> sys.stderr
fmts = FmtList()
### TextDocument
fmts.add('document', 'bib', 'bib', 'BibTeX', 'BibTeX_Writer') ### 22
fmts.add('document', 'doc', 'doc', 'Microsoft Word 97/2000/XP', 'MS Word 97') ### 29
fmts.add('document', 'doc6', 'doc', 'Microsoft Word 6.0', 'MS WinWord 6.0') ### 24
fmts.add('document', 'doc95', 'doc', 'Microsoft Word 95', 'MS Word 95') ### 28
fmts.add('document', 'docbook', 'xml', 'DocBook', 'DocBook File') ### 39
fmts.add('document', 'docx', 'docx', 'Microsoft Office Open XML', 'Office Open XML Text')
fmts.add('document', 'docx7', 'docx', 'Microsoft Office Open XML', 'MS Word 2007 XML')
fmts.add('document', 'fodt', 'fodt', 'OpenDocument Text (Flat XML)', 'OpenDocument Text Flat XML')
fmts.add('document', 'html', 'html', 'HTML Document (OpenOffice.org Writer)', 'HTML (StarWriter)') ### 3
fmts.add('document', 'latex', 'ltx', 'LaTeX 2e', 'LaTeX_Writer') ### 31
fmts.add('document', 'mediawiki', 'txt', 'MediaWiki', 'MediaWiki')
fmts.add('document', 'odt', 'odt', 'ODF Text Document', 'writer8') ### 10
fmts.add('document', 'ooxml', 'xml', 'Microsoft Office Open XML', 'MS Word 2003 XML') ### 11
fmts.add('document', 'ott', 'ott', 'Open Document Text', 'writer8_template') ### 21
fmts.add('document', 'pdb', 'pdb', 'AportisDoc (Palm)', 'AportisDoc Palm DB')
fmts.add('document', 'pdf', 'pdf', 'Portable Document Format', 'writer_pdf_Export') ### 18
fmts.add('document', 'psw', 'psw', 'Pocket Word', 'PocketWord File')
fmts.add('document', 'rtf', 'rtf', 'Rich Text Format', 'Rich Text Format') ### 16
fmts.add('document', 'sdw', 'sdw', 'StarWriter 5.0', 'StarWriter 5.0') ### 23
fmts.add('document', 'sdw4', 'sdw', 'StarWriter 4.0', 'StarWriter 4.0') ### 2
fmts.add('document', 'sdw3', 'sdw', 'StarWriter 3.0', 'StarWriter 3.0') ### 20
fmts.add('document', 'stw', 'stw', 'Open Office.org 1.0 Text Document Template', 'writer_StarOffice_XML_Writer_Template') ### 9
fmts.add('document', 'sxw', 'sxw', 'Open Office.org 1.0 Text Document', 'StarOffice XML (Writer)') ### 1
fmts.add('document', 'text', 'txt', 'Text Encoded', 'Text (encoded)') ### 26
fmts.add('document', 'txt', 'txt', 'Text', 'Text') ### 34
fmts.add('document', 'uot', 'uot', 'Unified Office Format text','UOF text') ### 27
fmts.add('document', 'vor', 'vor', 'StarWriter 5.0 Template', 'StarWriter 5.0 Vorlage/Template') ### 6
fmts.add('document', 'vor4', 'vor', 'StarWriter 4.0 Template', 'StarWriter 4.0 Vorlage/Template') ### 5
fmts.add('document', 'vor3', 'vor', 'StarWriter 3.0 Template', 'StarWriter 3.0 Vorlage/Template') ### 4
fmts.add('document', 'xhtml', 'html', 'XHTML Document', 'XHTML Writer File') ### 33
### WebDocument
fmts.add('web', 'etext', 'txt', 'Text Encoded (OpenOffice.org Writer/Web)', 'Text (encoded) (StarWriter/Web)') ### 14
fmts.add('web', 'html10', 'html', 'OpenOffice.org 1.0 HTML Template', 'writer_web_StarOffice_XML_Writer_Web_Template') ### 11
fmts.add('web', 'html', 'html', 'HTML Document', 'HTML') ### 2
fmts.add('web', 'html', 'html', 'HTML Document Template', 'writerweb8_writer_template') ### 13
fmts.add('web', 'mediawiki', 'txt', 'MediaWiki', 'MediaWiki_Web') ### 9
fmts.add('web', 'pdf', 'pdf', 'PDF - Portable Document Format', 'writer_web_pdf_Export') ### 10
fmts.add('web', 'sdw3', 'sdw', 'StarWriter 3.0 (OpenOffice.org Writer/Web)', 'StarWriter 3.0 (StarWriter/Web)') ### 3
fmts.add('web', 'sdw4', 'sdw', 'StarWriter 4.0 (OpenOffice.org Writer/Web)', 'StarWriter 4.0 (StarWriter/Web)') ### 4
fmts.add('web', 'sdw', 'sdw', 'StarWriter 5.0 (OpenOffice.org Writer/Web)', 'StarWriter 5.0 (StarWriter/Web)') ### 5
fmts.add('web', 'txt', 'txt', 'OpenOffice.org Text (OpenOffice.org Writer/Web)', 'writerweb8_writer') ### 12
fmts.add('web', 'text10', 'txt', 'OpenOffice.org 1.0 Text Document (OpenOffice.org Writer/Web)', 'writer_web_StarOffice_XML_Writer') ### 15
fmts.add('web', 'text', 'txt', 'Text (OpenOffice.org Writer/Web)', 'Text (StarWriter/Web)') ### 8
fmts.add('web', 'vor4', 'vor', 'StarWriter/Web 4.0 Template', 'StarWriter/Web 4.0 Vorlage/Template') ### 6
fmts.add('web', 'vor', 'vor', 'StarWriter/Web 5.0 Template', 'StarWriter/Web 5.0 Vorlage/Template') ### 7
### Spreadsheet
fmts.add('spreadsheet', 'csv', 'csv', 'Text CSV', 'Text - txt - csv (StarCalc)') ### 16
fmts.add('spreadsheet', 'dbf', 'dbf', 'dBASE', 'dBase') ### 22
fmts.add('spreadsheet', 'dif', 'dif', 'Data Interchange Format', 'DIF') ### 5
fmts.add('spreadsheet', 'fods', 'fods', 'OpenDocument Spreadsheet (Flat XML)', 'OpenDocument Spreadsheet Flat XML')
fmts.add('spreadsheet', 'html', 'html', 'HTML Document (OpenOffice.org Calc)', 'HTML (StarCalc)') ### 7
fmts.add('spreadsheet', 'ods', 'ods', 'ODF Spreadsheet', 'calc8') ### 15
fmts.add('spreadsheet', 'ooxml', 'xml', 'Microsoft Excel 2003 XML', 'MS Excel 2003 XML') ### 23
fmts.add('spreadsheet', 'ots', 'ots', 'ODF Spreadsheet Template', 'calc8_template') ### 14
fmts.add('spreadsheet', 'pdf', 'pdf', 'Portable Document Format', 'calc_pdf_Export') ### 34
fmts.add('spreadsheet', 'pxl', 'pxl', 'Pocket Excel', 'Pocket Excel')
fmts.add('spreadsheet', 'sdc', 'sdc', 'StarCalc 5.0', 'StarCalc 5.0') ### 31
fmts.add('spreadsheet', 'sdc4', 'sdc', 'StarCalc 4.0', 'StarCalc 4.0') ### 11
fmts.add('spreadsheet', 'sdc3', 'sdc', 'StarCalc 3.0', 'StarCalc 3.0') ### 29
fmts.add('spreadsheet', 'slk', 'slk', 'SYLK', 'SYLK') ### 35
fmts.add('spreadsheet', 'stc', 'stc', 'OpenOffice.org 1.0 Spreadsheet Template', 'calc_StarOffice_XML_Calc_Template') ### 2
fmts.add('spreadsheet', 'sxc', 'sxc', 'OpenOffice.org 1.0 Spreadsheet', 'StarOffice XML (Calc)') ### 3
fmts.add('spreadsheet', 'uos', 'uos', 'Unified Office Format spreadsheet', 'UOF spreadsheet') ### 9
fmts.add('spreadsheet', 'vor3', 'vor', 'StarCalc 3.0 Template', 'StarCalc 3.0 Vorlage/Template') ### 18
fmts.add('spreadsheet', 'vor4', 'vor', 'StarCalc 4.0 Template', 'StarCalc 4.0 Vorlage/Template') ### 19
fmts.add('spreadsheet', 'vor', 'vor', 'StarCalc 5.0 Template', 'StarCalc 5.0 Vorlage/Template') ### 20
fmts.add('spreadsheet', 'xhtml', 'xhtml', 'XHTML', 'XHTML Calc File') ### 26
fmts.add('spreadsheet', 'xls', 'xls', 'Microsoft Excel 97/2000/XP', 'MS Excel 97') ### 12
fmts.add('spreadsheet', 'xls5', 'xls', 'Microsoft Excel 5.0', 'MS Excel 5.0/95') ### 8
fmts.add('spreadsheet', 'xls95', 'xls', 'Microsoft Excel 95', 'MS Excel 95') ### 10
fmts.add('spreadsheet', 'xlt', 'xlt', 'Microsoft Excel 97/2000/XP Template', 'MS Excel 97 Vorlage/Template') ### 6
fmts.add('spreadsheet', 'xlt5', 'xlt', 'Microsoft Excel 5.0 Template', 'MS Excel 5.0/95 Vorlage/Template') ### 28
fmts.add('spreadsheet', 'xlt95', 'xlt', 'Microsoft Excel 95 Template', 'MS Excel 95 Vorlage/Template') ### 21
### Graphics
fmts.add('graphics', 'bmp', 'bmp', 'Windows Bitmap', 'draw_bmp_Export') ### 21
fmts.add('graphics', 'emf', 'emf', 'Enhanced Metafile', 'draw_emf_Export') ### 15
fmts.add('graphics', 'eps', 'eps', 'Encapsulated PostScript', 'draw_eps_Export') ### 48
fmts.add('graphics', 'fodg', 'fodg', 'OpenDocument Drawing (Flat XML)', 'OpenDocument Drawing Flat XML')
fmts.add('graphics', 'gif', 'gif', 'Graphics Interchange Format', 'draw_gif_Export') ### 30
fmts.add('graphics', 'html', 'html', 'HTML Document (OpenOffice.org Draw)', 'draw_html_Export') ### 37
fmts.add('graphics', 'jpg', 'jpg', 'Joint Photographic Experts Group', 'draw_jpg_Export') ### 3
fmts.add('graphics', 'met', 'met', 'OS/2 Metafile', 'draw_met_Export') ### 43
fmts.add('graphics', 'odd', 'odd', 'OpenDocument Drawing', 'draw8') ### 6
fmts.add('graphics', 'otg', 'otg', 'OpenDocument Drawing Template', 'draw8_template') ### 20
fmts.add('graphics', 'pbm', 'pbm', 'Portable Bitmap', 'draw_pbm_Export') ### 14
fmts.add('graphics', 'pct', 'pct', 'Mac Pict', 'draw_pct_Export') ### 41
fmts.add('graphics', 'pdf', 'pdf', 'Portable Document Format', 'draw_pdf_Export') ### 28
fmts.add('graphics', 'pgm', 'pgm', 'Portable Graymap', 'draw_pgm_Export') ### 11
fmts.add('graphics', 'png', 'png', 'Portable Network Graphic', 'draw_png_Export') ### 2
fmts.add('graphics', 'ppm', 'ppm', 'Portable Pixelmap', 'draw_ppm_Export') ### 5
fmts.add('graphics', 'ras', 'ras', 'Sun Raster Image', 'draw_ras_Export') ## 31
fmts.add('graphics', 'std', 'std', 'OpenOffice.org 1.0 Drawing Template', 'draw_StarOffice_XML_Draw_Template') ### 53
fmts.add('graphics', 'svg', 'svg', 'Scalable Vector Graphics', 'draw_svg_Export') ### 50
fmts.add('graphics', 'svm', 'svm', 'StarView Metafile', 'draw_svm_Export') ### 55
fmts.add('graphics', 'swf', 'swf', 'Macromedia Flash (SWF)', 'draw_flash_Export') ### 23
fmts.add('graphics', 'sxd', 'sxd', 'OpenOffice.org 1.0 Drawing', 'StarOffice XML (Draw)') ### 26
fmts.add('graphics', 'sxd3', 'sxd', 'StarDraw 3.0', 'StarDraw 3.0') ### 40
fmts.add('graphics', 'sxd5', 'sxd', 'StarDraw 5.0', 'StarDraw 5.0') ### 44
fmts.add('graphics', 'sxw', 'sxw', 'StarOffice XML (Draw)', 'StarOffice XML (Draw)')
fmts.add('graphics', 'tiff', 'tiff', 'Tagged Image File Format', 'draw_tif_Export') ### 13
fmts.add('graphics', 'vor', 'vor', 'StarDraw 5.0 Template', 'StarDraw 5.0 Vorlage') ### 36
fmts.add('graphics', 'vor3', 'vor', 'StarDraw 3.0 Template', 'StarDraw 3.0 Vorlage') ### 35
fmts.add('graphics', 'wmf', 'wmf', 'Windows Metafile', 'draw_wmf_Export') ### 8
fmts.add('graphics', 'xhtml', 'xhtml', 'XHTML', 'XHTML Draw File') ### 45
fmts.add('graphics', 'xpm', 'xpm', 'X PixMap', 'draw_xpm_Export') ### 19
### Presentation
fmts.add('presentation', 'bmp', 'bmp', 'Windows Bitmap', 'impress_bmp_Export') ### 15
fmts.add('presentation', 'emf', 'emf', 'Enhanced Metafile', 'impress_emf_Export') ### 16
fmts.add('presentation', 'eps', 'eps', 'Encapsulated PostScript', 'impress_eps_Export') ### 17
fmts.add('presentation', 'fodp', 'fodp', 'OpenDocument Presentation (Flat XML)', 'OpenDocument Presentation Flat XML')
fmts.add('presentation', 'gif', 'gif', 'Graphics Interchange Format', 'impress_gif_Export') ### 18
fmts.add('presentation', 'html', 'html', 'HTML Document (OpenOffice.org Impress)', 'impress_html_Export') ### 43
fmts.add('presentation', 'jpg', 'jpg', 'Joint Photographic Experts Group', 'impress_jpg_Export') ### 19
fmts.add('presentation', 'met', 'met', 'OS/2 Metafile', 'impress_met_Export') ### 20
fmts.add('presentation', 'odg', 'odg', 'ODF Drawing (Impress)', 'impress8_draw') ### 29
fmts.add('presentation', 'odp', 'odp', 'ODF Presentation', 'impress8') ### 9
fmts.add('presentation', 'otp', 'otp', 'ODF Presentation Template', 'impress8_template') ### 38
fmts.add('presentation', 'pbm', 'pbm', 'Portable Bitmap', 'impress_pbm_Export') ### 21
fmts.add('presentation', 'pct', 'pct', 'Mac Pict', 'impress_pct_Export') ### 22
fmts.add('presentation', 'pdf', 'pdf', 'Portable Document Format', 'impress_pdf_Export') ### 23
fmts.add('presentation', 'pgm', 'pgm', 'Portable Graymap', 'impress_pgm_Export') ### 24
fmts.add('presentation', 'png', 'png', 'Portable Network Graphic', 'impress_png_Export') ### 25
fmts.add('presentation', 'potm', 'potm', 'Microsoft PowerPoint 2007/2010 XML Template', 'Impress MS PowerPoint 2007 XML Template')
fmts.add('presentation', 'pot', 'pot', 'Microsoft PowerPoint 97/2000/XP Template', 'MS PowerPoint 97 Vorlage') ### 3
fmts.add('presentation', 'ppm', 'ppm', 'Portable Pixelmap', 'impress_ppm_Export') ### 26
fmts.add('presentation', 'pptx', 'pptx', 'Microsoft PowerPoint 2007/2010 XML', 'Impress MS PowerPoint 2007 XML') ### 36
fmts.add('presentation', 'pps', 'pps', 'Microsoft PowerPoint 97/2000/XP (Autoplay)', 'MS PowerPoint 97 Autoplay') ### 36
fmts.add('presentation', 'ppt', 'ppt', 'Microsoft PowerPoint 97/2000/XP', 'MS PowerPoint 97') ### 36
fmts.add('presentation', 'pwp', 'pwp', 'PlaceWare', 'placeware_Export') ### 30
fmts.add('presentation', 'ras', 'ras', 'Sun Raster Image', 'impress_ras_Export') ### 27
fmts.add('presentation', 'sda', 'sda', 'StarDraw 5.0 (OpenOffice.org Impress)', 'StarDraw 5.0 (StarImpress)') ### 8
fmts.add('presentation', 'sdd', 'sdd', 'StarImpress 5.0', 'StarImpress 5.0') ### 6
fmts.add('presentation', 'sdd3', 'sdd', 'StarDraw 3.0 (OpenOffice.org Impress)', 'StarDraw 3.0 (StarImpress)') ### 42
fmts.add('presentation', 'sdd4', 'sdd', 'StarImpress 4.0', 'StarImpress 4.0') ### 37
fmts.add('presentation', 'sxd', 'sxd', 'OpenOffice.org 1.0 Drawing (OpenOffice.org Impress)', 'impress_StarOffice_XML_Draw') ### 31
fmts.add('presentation', 'sti', 'sti', 'OpenOffice.org 1.0 Presentation Template', 'impress_StarOffice_XML_Impress_Template') ### 5
fmts.add('presentation', 'svg', 'svg', 'Scalable Vector Graphics', 'impress_svg_Export') ### 14
fmts.add('presentation', 'svm', 'svm', 'StarView Metafile', 'impress_svm_Export') ### 13
fmts.add('presentation', 'swf', 'swf', 'Macromedia Flash (SWF)', 'impress_flash_Export') ### 34
fmts.add('presentation', 'sxi', 'sxi', 'OpenOffice.org 1.0 Presentation', 'StarOffice XML (Impress)') ### 41
fmts.add('presentation', 'tiff', 'tiff', 'Tagged Image File Format', 'impress_tif_Export') ### 12
fmts.add('presentation', 'uop', 'uop', 'Unified Office Format presentation', 'UOF presentation') ### 4
fmts.add('presentation', 'vor', 'vor', 'StarImpress 5.0 Template', 'StarImpress 5.0 Vorlage') ### 40
fmts.add('presentation', 'vor3', 'vor', 'StarDraw 3.0 Template (OpenOffice.org Impress)', 'StarDraw 3.0 Vorlage (StarImpress)') ###1
fmts.add('presentation', 'vor4', 'vor', 'StarImpress 4.0 Template', 'StarImpress 4.0 Vorlage') ### 39
fmts.add('presentation', 'vor5', 'vor', 'StarDraw 5.0 Template (OpenOffice.org Impress)', 'StarDraw 5.0 Vorlage (StarImpress)') ### 2
fmts.add('presentation', 'wmf', 'wmf', 'Windows Metafile', 'impress_wmf_Export') ### 11
fmts.add('presentation', 'xhtml', 'xml', 'XHTML', 'XHTML Impress File') ### 33
fmts.add('presentation', 'xpm', 'xpm', 'X PixMap', 'impress_xpm_Export') ### 10
class Options:
def __init__(self, args):
self.connection = None
self.debug = False
self.doctype = None
self.exportfilter = []
self.filenames = []
self.format = None
self.importfilter = ""
self.listener = False
self.nolaunch = False
self.kill = False
self.output = None
self.password = None
self.pipe = None
self.port = '2002'
self.server = 'localhost'
self.showlist = False
self.stdout = False
self.template = None
self.timeout = 6
self.verbose = 0
self.remove = None
### Get options from the commandline
try:
opts, args = getopt.getopt (args, 'c:Dd:e:f:hi:Llko:np:s:T:t:vr:',
['connection=', 'debug', 'doctype=', 'export', 'format=',
'help', 'import', 'listener', 'kill', 'no-launch', 'output=',
'outputpath', 'password=', 'pipe=', 'port=', 'server=',
'timeout=', 'show', 'stdout', 'template', 'verbose',
'version', 'remove='] )
except getopt.error, exc:
print 'unoconv: %s, try unoconv -h for a list of all the options' % str(exc)
sys.exit(255)
for opt, arg in opts:
if opt in ['-h', '--help']:
self.usage()
print
self.help()
sys.exit(1)
elif opt in ['-c', '--connection']:
self.connection = arg
elif opt in ['--debug']:
self.debug = True
elif opt in ['-d', '--doctype']:
self.doctype = arg
elif opt in ['-e', '--export']:
l = arg.split('=')
if len(l) == 2:
(name, value) = l
if value in ('True', 'true'):
self.exportfilter.append( PropertyValue( name, 0, True, 0 ) )
elif value in ('False', 'false'):
self.exportfilter.append( PropertyValue( name, 0, False, 0 ) )
else:
try:
self.exportfilter.append( PropertyValue( name, 0, int(value), 0 ) )
except ValueError:
self.exportfilter.append( PropertyValue( name, 0, value, 0 ) )
else:
print >> sys.stderr, 'Warning: Option %s cannot be parsed, ignoring.' % arg
# self.exportfilter = arg
elif opt in ['-f', '--format']:
self.format = arg
elif opt in ['-i', '--import']:
self.importfilter = arg
elif opt in ['-l', '--listener']:
self.listener = True
elif opt in ['-k', '--kill']:
self.kill = True
elif opt in ['-n', '--no-launch']:
self.nolaunch = True
elif opt in ['-o', '--output']:
self.output = arg
elif opt in ['--outputpath']:
print >> sys.stderr, 'Warning: This option is deprecated by --output.'
self.output = arg
elif opt in ['--password']:
self.password = arg
elif opt in ['--pipe']:
self.pipe = arg
elif opt in ['-p', '--port']:
self.port = arg
elif opt in ['-s', '--server']:
self.server = arg
elif opt in ['--show']:
self.showlist = True
elif opt in ['--stdout']:
self.stdout = True
elif opt in ['-t', '--template']:
self.template = arg
elif opt in ['-T', '--timeout']:
self.timeout = int(arg)
elif opt in ['-v', '--verbose']:
self.verbose = self.verbose + 1
elif opt in ['-r', '--remove']:
self.remove = arg
elif opt in ['--version']:
self.version()
sys.exit(255)
### Enable verbosity
if self.verbose >= 2:
print >> sys.stderr, 'Verbosity set to level %d' % self.verbose
self.filenames = args
if self.remove:
if os.path.exists(self.remove):
os.remove(self.remove)
print >> sys.stderr, "%s file created by OpenOffice was successfully removed." % self.remove
sys.stderr.flush()
sys.exit(0)
if self.kill:
from invenio.shellutils import run_shell_command
run_shell_command('killall %s', [os.path.basename(office.binary)])
time.sleep(1)
run_shell_command('killall -9 %s', [os.path.basename(office.binary)])
print >> sys.stderr, 'soffice.bin was hopefully already killed.'
sys.exit(0)
if not self.listener and not self.showlist and self.doctype != 'list' and not self.filenames:
print >> sys.stderr, 'unoconv: you have to provide a filename as argument'
print >> sys.stderr, 'Try `unoconv -h\' for more information.'
sys.exit(255)
### Set connection string
if not self.connection:
if not self.pipe:
self.connection = "socket,host=%s,port=%s;urp;StarOffice.ComponentContext" % (self.server, self.port)
# self.connection = "socket,host=%s,port=%s;urp;" % (self.server, self.port)
else:
self.connection = "pipe,name=%s;urp;StarOffice.ComponentContext" % (self.pipe)
### Make it easier for people to use a doctype (first letter is enough)
if self.doctype:
for doctype in doctypes:
if doctype.startswith(self.doctype):
self.doctype = doctype
### Check if the user request to see the list of formats
if self.showlist or self.format == 'list':
if self.doctype:
fmts.display(self.doctype)
else:
for t in doctypes:
fmts.display(t)
sys.exit(0)
### If no format was specified, probe it or provide it
if not self.format:
l = sys.argv[0].split('2')
if len(l) == 2:
self.format = l[1]
else:
self.format = 'pdf'
def version(self):
### Get office product information
product = uno.getComponentContext().ServiceManager.createInstance("com.sun.star.configuration.ConfigurationProvider").createInstanceWithArguments("com.sun.star.configuration.ConfigurationAccess", UnoProps(nodepath="/org.openoffice.Setup/Product"))
print 'unoconv %s' % VERSION
print 'Written by Dag Wieers <dag@wieers.com>'
print 'Patched to run within Invenio by <info@invenio-software.org>'
print 'Homepage at http://dag.wieers.com/home-made/unoconv/'
print
print 'platform %s/%s' % (os.name, sys.platform)
print 'python %s' % sys.version
print product.ooName, product.ooSetupVersion
print
print 'build revision $Rev$'
def usage(self):
print >> sys.stderr, 'usage: unoconv [options] file [file2 ..]'
def help(self):
print >> sys.stderr, '''Convert from and to any format supported by LibreOffice
unoconv options:
-c, --connection=string use a custom connection string
-d, --doctype=type specify document type
(document, graphics, presentation, spreadsheet)
-e, --export=name=value set export filter options
eg. -e PageRange=1-2
-f, --format=format specify the output format
-i, --import=string set import filter option string
eg. -i utf8
-l, --listener start a permanent listener to use by unoconv clients
-k, --kill kill any listener on the local machine (Invenio)
-r, --remove=filename remove a file created by LibreOffice (Invenio)
-n, --no-launch fail if no listener is found (default: launch one)
-o, --output=name output basename, filename or directory
--pipe=name alternative method of connection using a pipe
-p, --port=port specify the port (default: 2002)
to be used by client or listener
--password=string provide a password to decrypt the document
-s, --server=server specify the server address (default: localhost)
to be used by client or listener
--show list the available output formats
--stdout write output to stdout
-t, --template=file import the styles from template (.ott)
-T, --timeout=secs timeout after secs if connection to listener fails
-v, --verbose be more and more verbose (-vvv for debugging)
'''
class Convertor:
def __init__(self):
global exitcode, ooproc, office, product
unocontext = None
### Do the LibreOffice component dance
self.context = uno.getComponentContext()
self.svcmgr = self.context.ServiceManager
resolver = self.svcmgr.createInstanceWithContext("com.sun.star.bridge.UnoUrlResolver", self.context)
### Test for an existing connection
info(3, 'Connection type: %s' % op.connection)
try:
unocontext = resolver.resolve("uno:%s" % op.connection)
except NoConnectException, e:
# info(3, "Existing listener not found.\n%s" % e)
info(3, "Existing listener not found.")
if op.nolaunch:
die(113, "Existing listener not found. Unable start listener by parameters. Aborting.")
### Start our own OpenOffice instance
info(3, "Launching our own listener using %s." % office.binary)
try:
product = self.svcmgr.createInstance("com.sun.star.configuration.ConfigurationProvider").createInstanceWithArguments("com.sun.star.configuration.ConfigurationAccess", UnoProps(nodepath="/org.openoffice.Setup/Product"))
if product.ooName != "LibreOffice" or LooseVersion(product.ooSetupVersion) <= LooseVersion('3.3'):
ooproc = subprocess.Popen([office.binary, "-headless", "-invisible", "-nocrashreport", "-nodefault", "-nofirststartwizard", "-nologo", "-norestore", "-accept=%s" % op.connection], env=os.environ)
else:
ooproc = subprocess.Popen([office.binary, "--headless", "--invisible", "--nocrashreport", "--nodefault", "--nofirststartwizard", "--nologo", "--norestore", "--accept=%s" % op.connection], env=os.environ)
info(2, '%s listener successfully started. (pid=%s)' % (product.ooName, ooproc.pid))
### Try connection to it for op.timeout seconds (flakky OpenOffice)
timeout = 0
while timeout <= op.timeout:
### Is it already/still running ?
retcode = ooproc.poll()
if retcode != None:
info(3, "Process %s (pid=%s) exited with %s." % (office.binary, ooproc.pid, retcode))
break
try:
unocontext = resolver.resolve("uno:%s" % op.connection)
break
except NoConnectException:
time.sleep(0.5)
timeout += 0.5
except:
raise
else:
error("Failed to connect to %s (pid=%s) in %d seconds.\n%s" % (office.binary, ooproc.pid, op.timeout, e))
except Exception, e:
raise
error("Launch of %s failed.\n%s" % (office.binary, e))
if not unocontext:
die(251, "Unable to connect or start own listener. Aborting.")
### And some more LibreOffice magic
unosvcmgr = unocontext.ServiceManager
self.desktop = unosvcmgr.createInstanceWithContext("com.sun.star.frame.Desktop", unocontext)
self.cwd = unohelper.systemPathToFileUrl( os.getcwd() )
### List all filters
# self.filters = unosvcmgr.createInstanceWithContext( "com.sun.star.document.FilterFactory", unocontext)
# for filter in self.filters.getElementNames():
# print filter
# #print dir(filter), dir(filter.format)
def getformat(self, inputfn):
doctype = None
### Get the output format from mapping
if op.doctype:
outputfmt = fmts.bydoctype(op.doctype, op.format)
else:
outputfmt = fmts.byname(op.format)
if not outputfmt:
outputfmt = fmts.byextension(os.extsep + op.format)
### If no doctype given, check list of acceptable formats for input file ext doctype
### FIXME: This should go into the for-loop to match each individual input filename
if outputfmt:
inputext = os.path.splitext(inputfn)[1]
inputfmt = fmts.byextension(inputext)
if inputfmt:
for fmt in outputfmt:
if inputfmt[0].doctype == fmt.doctype:
doctype = inputfmt[0].doctype
outputfmt = fmt
break
else:
outputfmt = outputfmt[0]
# print >> sys.stderr, 'unoconv: format `%s\' is part of multiple doctypes %s, selecting `%s\'.' % (format, [fmt.doctype for fmt in outputfmt], outputfmt[0].doctype)
else:
outputfmt = outputfmt[0]
### No format found, throw error
if not outputfmt:
if doctype:
print >> sys.stderr, 'unoconv: format [%s/%s] is not known to unoconv.' % (op.doctype, op.format)
else:
print >> sys.stderr, 'unoconv: format [%s] is not known to unoconv.' % op.format
die(1)
return outputfmt
def convert(self, inputfn):
global exitcode
document = None
outputfmt = self.getformat(inputfn)
if op.verbose > 0:
print >> sys.stderr, 'Input file:', inputfn
if not os.path.exists(inputfn):
print >> sys.stderr, 'unoconv: file `%s\' does not exist.' % inputfn
exitcode = 1
try:
### Import phase
phase = "import"
### Load inputfile
inputprops = UnoProps(Hidden=True, ReadOnly=True, UpdateDocMode=QUIET_UPDATE, FilterOptions=op.importfilter)
# if op.password:
# info = UnoProps(algorithm-name="PBKDF2", salt="salt", iteration-count=1024, hash="hash")
# inputprops += UnoProps(ModifyPasswordInfo=info)
inputurl = unohelper.absolutize(self.cwd, unohelper.systemPathToFileUrl(inputfn))
# print dir(self.desktop)
document = self.desktop.loadComponentFromURL( inputurl , "_blank", 0, inputprops )
if not document:
raise UnoException("The document '%s' could not be opened." % inputurl, None)
### Import style template
phase = "import-style"
if op.template:
if os.path.exists(op.template):
info(1, "Template file: %s" % op.template)
templateprops = UnoProps(OverwriteStyles=True)
templateurl = unohelper.absolutize(self.cwd, unohelper.systemPathToFileUrl(op.template))
document.StyleFamilies.loadStylesFromURL(templateurl, templateprops)
else:
print >> sys.stderr, 'unoconv: template file `%s\' does not exist.' % op.template
exitcode = 1
### Update document links
phase = "update-links"
try:
document.updateLinks()
except AttributeError:
# the document doesn't implement the XLinkUpdate interface
pass
### Update document indexes
phase = "update-indexes"
try:
document.refresh()
indexes = document.getDocumentIndexes()
except AttributeError:
# the document doesn't implement the XRefreshable and/or
# XDocumentIndexesSupplier interfaces
pass
else:
for i in range(0, indexes.getCount()):
indexes.getByIndex(i).update()
info(1, "Selected output format: %s" % outputfmt)
info(2, "Selected office filter: %s" % outputfmt.filter)
info(2, "Used doctype: %s" % outputfmt.doctype)
### Export phase
phase = "export"
outputprops = UnoProps(FilterName=outputfmt.filter, OutputStream=OutputStream(), Overwrite=True)
# PropertyValue( "FilterData" , 0, ( PropertyValue( "SelectPdfVersion" , 0, 1 , uno.getConstantByName( "com.sun.star.beans.PropertyState.DIRECT_VALUE" ) ) ), uno.getConstantByName( "com.sun.star.beans.PropertyState.DIRECT_VALUE" ) ),
### Cannot use UnoProps for FilterData property
if op.exportfilter:
outputprops += ( PropertyValue( "FilterData", 0, uno.Any("[]com.sun.star.beans.PropertyValue", tuple( op.exportfilter ), ), 0 ), )
if outputfmt.filter == 'Text (encoded)':
outputprops += UnoProps(FilterOptions="UTF8, LF")
elif outputfmt.filter == 'Text':
outputprops += UnoProps(FilterOptions="UTF8")
elif outputfmt.filter == 'Text - txt - csv (StarCalc)':
outputprops += UnoProps(FilterOptions="44,34,0")
elif outputfmt.filter in ('writer_pdf_Export', 'impress_pdf_Export', 'calc_pdf_Export', 'draw_pdf_Export'):
outputprops += UnoProps(SelectPdfVersion=1)
if not op.stdout:
(outputfn, ext) = os.path.splitext(inputfn)
if not op.output:
outputfn = outputfn + os.extsep + outputfmt.extension
elif os.path.isdir(op.output):
outputfn = os.path.join(op.output, os.path.basename(outputfn) + os.extsep + outputfmt.extension)
elif len(op.filenames) > 1:
outputfn = op.output + os.extsep + outputfmt.extension
else:
outputfn = op.output
outputurl = unohelper.absolutize( self.cwd, unohelper.systemPathToFileUrl(outputfn) )
info(1, "Output file: %s" % outputfn)
else:
outputurl = "private:stream"
try:
document.storeToURL(outputurl, tuple(outputprops) )
except IOException, e:
- from invenio.errorlib import get_pretty_traceback
+ from invenio.ext.logging import get_pretty_traceback
print >> sys.stderr, get_pretty_traceback()
raise UnoException("Unable to store document to %s with properties %s. Exception: %s" % (outputurl, outputprops, e), None)
phase = "dispose"
document.dispose()
document.close(True)
except SystemError, e:
error("unoconv: SystemError during %s phase: %s" % (phase, e))
exitcode = 1
except RuntimeException, e:
error("unoconv: RuntimeException during %s phase: Office probably died. %s" % (phase, e))
exitcode = 6
except DisposedException, e:
error("unoconv: DisposedException during %s phase: Office probably died. %s" % (phase, e))
exitcode = 7
except IllegalArgumentException, e:
error("UNO IllegalArgument during %s phase: Source file cannot be read. %s" % (phase, e))
exitcode = 8
except IOException, e:
# for attr in dir(e): print '%s: %s', (attr, getattr(e, attr))
error("unoconv: IOException during %s phase: %s" % (phase, e.Message))
exitcode = 3
except CannotConvertException, e:
# for attr in dir(e): print '%s: %s', (attr, getattr(e, attr))
error("unoconv: CannotConvertException during %s phase: %s" % (phase, e.Message))
exitcode = 4
except UnoException, e:
if hasattr(e, 'ErrCode'):
error("unoconv: UnoException during %s phase in %s (ErrCode %d)" % (phase, repr(e.__class__), e.ErrCode))
exitcode = e.ErrCode
pass
if hasattr(e, 'Message'):
error("unoconv: UnoException during %s phase: %s" % (phase, e.Message))
exitcode = 5
else:
error("unoconv: UnoException during %s phase in %s" % (phase, repr(e.__class__)))
exitcode = 2
pass
class Listener:
def __init__(self):
global product
info(1, "Start listener on %s:%s" % (op.server, op.port))
self.context = uno.getComponentContext()
self.svcmgr = self.context.ServiceManager
try:
resolver = self.svcmgr.createInstanceWithContext("com.sun.star.bridge.UnoUrlResolver", self.context)
product = self.svcmgr.createInstance("com.sun.star.configuration.ConfigurationProvider").createInstanceWithArguments("com.sun.star.configuration.ConfigurationAccess", UnoProps(nodepath="/org.openoffice.Setup/Product"))
try:
unocontext = resolver.resolve("uno:%s" % op.connection)
except NoConnectException, e:
pass
else:
info(1, "Existing %s listener found, nothing to do." % product.ooName)
return
if product.ooName != "LibreOffice" or LooseVersion(product.ooSetupVersion) <= LooseVersion('3.3'):
subprocess.call([office.binary, "-headless", "-invisible", "-nocrashreport", "-nodefault", "-nologo", "-nofirststartwizard", "-norestore", "-accept=%s" % op.connection], env=os.environ)
else:
subprocess.call([office.binary, "--headless", "--invisible", "--nocrashreport", "--nodefault", "--nologo", "--nofirststartwizard", "--norestore", "--accept=%s" % op.connection], env=os.environ)
except Exception, e:
error("Launch of %s failed.\n%s" % (office.binary, e))
else:
info(1, "Existing %s listener found, nothing to do." % product.ooName)
def error(msg):
"Output error message"
print >> sys.stderr, msg
def info(level, msg):
"Output info message"
if 'op' not in globals():
pass
elif op.verbose >= 3 and level >= 3:
print >> sys.stderr, "DEBUG:", msg
elif not op.stdout and level <= op.verbose:
print >> sys.stdout, msg
elif level <= op.verbose:
print >> sys.stderr, msg
def die(ret, msg=None):
"Print optional error and exit with errorcode"
global convertor, ooproc, office
if msg:
error('Error: %s' % msg)
### Did we start our own listener instance ?
if not op.listener and ooproc and convertor:
### If there is a GUI now attached to the instance, disable listener
if convertor.desktop.getCurrentFrame():
info(2, 'Trying to stop %s GUI listener.' % product.ooName)
try:
if product.ooName != "LibreOffice" or product.ooSetupVersion <= 3.3:
subprocess.Popen([office.binary, "-headless", "-invisible", "-nocrashreport", "-nodefault", "-nofirststartwizard", "-nologo", "-norestore", "-unaccept=%s" % op.connection], env=os.environ)
else:
subprocess.Popen([office.binary, "--headless", "--invisible", "--nocrashreport", "--nodefault", "--nofirststartwizard", "--nologo", "--norestore", "--unaccept=%s" % op.connection], env=os.environ)
ooproc.wait()
info(2, '%s listener successfully disabled.' % product.ooName)
except Exception, e:
error("Terminate using %s failed.\n%s" % (office.binary, e))
### If there is no GUI attached to the instance, terminate instance
else:
info(3, 'Terminating %s instance.' % product.ooName)
try:
convertor.desktop.terminate()
except DisposedException:
info(2, '%s instance unsuccessfully closed, sending TERM signal.' % product.ooName)
try:
ooproc.terminate()
except AttributeError:
os.kill(ooproc.pid, 15)
info(3, 'Waiting for %s instance to exit.' % product.ooName)
ooproc.wait()
### LibreOffice processes may get stuck and we have to kill them
### Is it still running ?
if ooproc.poll() == None:
info(1, '%s instance still running, please investigate...' % product.ooName)
ooproc.wait()
info(2, '%s instance unsuccessfully terminated, sending KILL signal.' % product.ooName)
try:
ooproc.kill()
except AttributeError:
os.kill(ooproc.pid, 9)
info(3, 'Waiting for %s with pid %s to disappear.' % (ooproc.pid, product.ooName))
ooproc.wait()
# allow Python GC to garbage collect pyuno object *before* exit call
# which avoids random segmentation faults --vpa
convertor = None
sys.exit(ret)
def main():
global convertor, exitcode
convertor = None
try:
if op.listener:
listener = Listener()
if op.filenames:
convertor = Convertor()
for inputfn in op.filenames:
convertor.convert(inputfn)
except NoConnectException, e:
error("unoconv: could not find an existing connection to LibreOffice at %s:%s." % (op.server, op.port))
if op.connection:
info(0, "Please start an LibreOffice instance on server '%s' by doing:\n\n unoconv --listener --server %s --port %s\n\nor alternatively:\n\n soffice -nologo -nodefault -accept=\"%s\"" % (op.server, op.server, op.port, op.connection))
else:
info(0, "Please start an LibreOffice instance on server '%s' by doing:\n\n unoconv --listener --server %s --port %s\n\nor alternatively:\n\n soffice -nologo -nodefault -accept=\"socket,host=%s,port=%s;urp;\"" % (op.server, op.server, op.port, op.server, op.port))
info(0, "Please start an soffice instance on server '%s' by doing:\n\n soffice -nologo -nodefault -accept=\"socket,host=localhost,port=%s;urp;\"" % (op.server, op.port))
exitcode = 1
# except UnboundLocalError:
# die(252, "Failed to connect to remote listener.")
except OSError:
error("Warning: failed to launch Office suite. Aborting.")
### Main entrance
if __name__ == '__main__':
os.environ['HOME'] = CFG_OPENOFFICE_TMPDIR
exitcode = 0
info(3, 'sysname=%s, platform=%s, python=%s, python-version=%s' % (os.name, sys.platform, sys.executable, sys.version))
for of in find_offices():
if of.python != sys.executable and not sys.executable.startswith(of.basepath):
python_switch(of)
office_environ(of)
# debug_office()
try:
import uno, unohelper
office = of
break
except:
# debug_office()
print >> sys.stderr, "unoconv: Cannot find a suitable pyuno library and python binary combination in %s" % of
print >> sys.stderr, "ERROR:", sys.exc_info()[1]
print >> sys.stderr
else:
# debug_office()
print >> sys.stderr, "unoconv: Cannot find a suitable office installation on your system."
print >> sys.stderr, "ERROR: Please locate your office installation and send your feedback to:"
print >> sys.stderr, " http://github.com/dagwieers/unoconv/issues"
sys.exit(1)
### Now that we have found a working pyuno library, let's import some classes
from com.sun.star.beans import PropertyValue
from com.sun.star.connection import NoConnectException
from com.sun.star.document.UpdateDocMode import QUIET_UPDATE
from com.sun.star.lang import DisposedException, IllegalArgumentException
from com.sun.star.io import IOException, XOutputStream
from com.sun.star.script import CannotConvertException
from com.sun.star.uno import Exception as UnoException
from com.sun.star.uno import RuntimeException
### And now that we have those classes, build on them
class OutputStream( unohelper.Base, XOutputStream ):
def __init__( self ):
self.closed = 0
def closeOutput(self):
self.closed = 1
def writeBytes( self, seq ):
sys.stdout.write( seq.value )
def flush( self ):
pass
def UnoProps(**args):
props = []
for key in args:
prop = PropertyValue()
prop.Name = key
prop.Value = args[key]
props.append(prop)
return tuple(props)
op = Options(sys.argv[1:])
info(2, "Using office base path: %s" % office.basepath)
info(2, "Using office binary path: %s" % office.unopath)
try:
main()
except KeyboardInterrupt, e:
die(6, 'Exiting on user request')
except:
- from invenio.errorlib import register_exception
+ from invenio.ext.logging import register_exception
register_exception(alert_admin=True)
die(exitcode)
diff --git a/invenio/legacy/websubmit/web/publiline.py b/invenio/legacy/websubmit/web/publiline.py
index 2454121d5..9ae928a1c 100644
--- a/invenio/legacy/websubmit/web/publiline.py
+++ b/invenio/legacy/websubmit/web/publiline.py
@@ -1,1884 +1,1884 @@
## This file is part of Invenio.
## Copyright (C) 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011, 2012 CERN.
##
## Invenio is free software; you can redistribute it and/or
## modify it under the terms of the GNU General Public License as
## published by the Free Software Foundation; either version 2 of the
## License, or (at your option) any later version.
##
## Invenio is distributed in the hope that it will be useful, but
## WITHOUT ANY WARRANTY; without even the implied warranty of
## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
## General Public License for more details.
##
## You should have received a copy of the GNU General Public License
## along with Invenio; if not, write to the Free Software Foundation, Inc.,
## 59 Temple Place, Suite 330, Boston, MA 02111-1307, USA.
"""
publiline_complex.py -- implementes ...
actors in this process are:
1. author -- subilmts ...
2. edi
3; ref
Il ne faut pas oublier de definir les roles...
"""
__revision__ = "$Id$"
## import interesting modules:
import os
import re
from invenio.config import \
CFG_ACCESS_CONTROL_LEVEL_SITE, \
CFG_SITE_ADMIN_EMAIL, \
CFG_SITE_LANG, \
CFG_SITE_NAME, \
CFG_SITE_URL, \
CFG_PYLIBDIR, \
CFG_WEBSUBMIT_STORAGEDIR, \
CFG_SITE_SUPPORT_EMAIL, \
CFG_SITE_SECURE_URL, \
CFG_SITE_RECORD
from invenio.dbquery import run_sql, Error, OperationalError
from invenio.access_control_engine import acc_authorize_action
from invenio.access_control_admin import acc_get_role_users, acc_get_role_id
from invenio.webpage import page, error_page
from invenio.webuser import getUid, get_email, page_not_authorized, collect_user_info
from invenio.base.i18n import gettext_set_language, wash_language
#from invenio.legacy.websubmit.config import *
from invenio.search_engine import search_pattern, check_user_can_view_record
from invenio.search_engine_utils import get_fieldvalues
from invenio.legacy.websubmit.functions.Retrieve_Data import Get_Field
from invenio.ext.email import send_email
from invenio.utils.url import wash_url_argument
from invenio.webgroup_dblayer import get_group_infos, insert_new_group, insert_new_member, delete_member
from invenio.webaccessadmin_lib import cleanstring_email
from invenio.access_control_config import MAXSELECTUSERS
from invenio.access_control_admin import acc_get_user_email
from invenio.access_control_engine import acc_get_authorized_emails
from invenio.webmessage import perform_request_send
import invenio.webbasket_dblayer as basketdb
from invenio.webbasket_config import CFG_WEBBASKET_SHARE_LEVELS, CFG_WEBBASKET_CATEGORIES, CFG_WEBBASKET_SHARE_LEVELS_ORDERED
-from invenio.errorlib import register_exception
+from invenio.ext.logging import register_exception
from invenio.legacy.bibrecord import create_records, record_get_field_value, record_get_field_values
execfile("%s/invenio/websubmit_functions/Retrieve_Data.py" % CFG_PYLIBDIR)
import invenio.template
websubmit_templates = invenio.template.load('websubmit')
CFG_WEBSUBMIT_PENDING_DIR = "%s/pending" % CFG_WEBSUBMIT_STORAGEDIR
CFG_WEBSUBMIT_DUMMY_MARC_XML_REC = "dummy_marcxml_rec"
CFG_WEBSUBMIT_MARC_XML_REC = "recmysql"
def perform_request_save_comment(*args, **kwargs):
"""
FIXME: this function is a dummy workaround for the obsoleted
function calls below. Should get deleted at the same time as
them.
"""
return
def index(req, c=CFG_SITE_NAME, ln=CFG_SITE_LANG, doctype="", categ="", RN="", send="", flow="", apptype="", action="", email_user_pattern="", id_user="", id_user_remove="", validate="", id_user_val="", msg_subject = "", msg_body="", reply="", commentId=""):
ln = wash_language(ln)
categ = wash_url_argument(categ, 'str')
RN = wash_url_argument(RN, 'str')
send = wash_url_argument(send, 'str')
flow = wash_url_argument(flow, 'str')
apptype = wash_url_argument(apptype, 'str')
action = wash_url_argument(action, 'str')
email_user_pattern = wash_url_argument(email_user_pattern, 'str')
id_user = wash_url_argument(id_user, 'int')
id_user_remove = wash_url_argument(id_user_remove, 'int')
validate = wash_url_argument(validate, 'str')
id_user_val = wash_url_argument(id_user_val, 'int')
msg_subject = wash_url_argument(msg_subject, 'str')
msg_body = wash_url_argument(msg_body, 'str')
reply = wash_url_argument(reply, 'str')
commentId = wash_url_argument(commentId, 'str')
# load the right message language
_ = gettext_set_language(ln)
t = ""
# get user ID:
try:
uid = getUid(req)
if uid == -1 or CFG_ACCESS_CONTROL_LEVEL_SITE >= 1:
return page_not_authorized(req, "../publiline.py/index",
navmenuid='yourapprovals')
uid_email = get_email(uid)
except Error, e:
return error_page(str(e), req, ln=ln)
if flow == "cplx":
if doctype == "":
t = selectCplxDoctype(ln)
elif (categ == "") or (apptype == ""):
t = selectCplxCateg(doctype, ln)
elif RN == "":
t = selectCplxDocument(doctype, categ, apptype, ln)
elif action == "":
t = __displayCplxDocument(req, doctype, categ, RN, apptype, reply, commentId, ln)
else:
t = __doCplxAction(req, doctype, categ, RN, apptype, action, email_user_pattern, id_user, id_user_remove, validate, id_user_val, msg_subject, msg_body, reply, commentId, ln)
return page(title=_("Document Approval Workflow"),
navtrail= """<a class="navtrail" href="%(sitesecureurl)s/youraccount/display">%(account)s</a>""" % {
'sitesecureurl': CFG_SITE_SECURE_URL,
'account': _("Your Account"),
},
body=t,
description="",
keywords="",
uid=uid,
language=ln,
req=req,
navmenuid='yourapprovals')
else:
if doctype == "":
t = selectDoctype(ln)
elif categ == "":
t = selectCateg(doctype, ln)
elif RN == "":
t = selectDocument(doctype, categ, ln)
else:
t = __displayDocument(req, doctype, categ, RN, send, ln)
return page(title=_("Approval and Refereeing Workflow"),
navtrail= """<a class="navtrail" href="%(sitesecureurl)s/youraccount/display">%(account)s</a>""" % {
'sitesecureurl': CFG_SITE_SECURE_URL,
'account': _("Your Account"),
},
body=t,
description="",
keywords="",
uid=uid,
language=ln,
req=req,
navmenuid='yourapprovals')
def selectDoctype(ln = CFG_SITE_LANG):
res = run_sql("select DISTINCT doctype from sbmAPPROVAL")
docs = []
for row in res:
res2 = run_sql("select ldocname from sbmDOCTYPE where sdocname=%s", (row[0],))
docs.append({
'doctype': row[0],
'docname': res2[0][0],
})
t = websubmit_templates.tmpl_publiline_selectdoctype(
ln = ln,
docs = docs,
)
return t
def selectCplxDoctype(ln = CFG_SITE_LANG):
res = run_sql("select DISTINCT doctype from sbmCPLXAPPROVAL")
docs = []
for row in res:
res2 = run_sql("select ldocname from sbmDOCTYPE where sdocname=%s", (row[0],))
docs.append({
'doctype': row[0],
'docname': res2[0][0],
})
t = websubmit_templates.tmpl_publiline_selectcplxdoctype(
ln = ln,
docs = docs,
)
return t
def selectCateg(doctype, ln = CFG_SITE_LANG):
t = ""
res = run_sql("select ldocname from sbmDOCTYPE where sdocname=%s",(doctype,))
title = res[0][0]
sth = run_sql("select * from sbmCATEGORIES where doctype=%s order by lname",(doctype,))
if len(sth) == 0:
categ = "unknown"
return selectDocument(doctype, categ, ln=ln)
categories = []
for arr in sth:
waiting = 0
rejected = 0
approved = 0
sth2 = run_sql("select COUNT(*) from sbmAPPROVAL where doctype=%s and categ=%s and status='waiting'", (doctype, arr[1],))
waiting = sth2[0][0]
sth2 = run_sql("select COUNT(*) from sbmAPPROVAL where doctype=%s and categ=%s and status='approved'", (doctype, arr[1],))
approved = sth2[0][0]
sth2 = run_sql("select COUNT(*) from sbmAPPROVAL where doctype=%s and categ=%s and status='rejected'", (doctype, arr[1],))
rejected = sth2[0][0]
categories.append({
'waiting': waiting,
'approved': approved,
'rejected': rejected,
'id': arr[1],
})
t = websubmit_templates.tmpl_publiline_selectcateg(
ln=ln,
categories=categories,
doctype=doctype,
title=title,
)
return t
def selectCplxCateg(doctype, ln=CFG_SITE_LANG):
t = ""
res = run_sql("SELECT ldocname FROM sbmDOCTYPE WHERE sdocname=%s", (doctype,))
title = res[0][0]
sth = run_sql("SELECT * FROM sbmCATEGORIES WHERE doctype=%s ORDER BY lname", (doctype,))
if len(sth) == 0:
categ = "unknown"
return selectCplxDocument(doctype, categ, "", ln=ln)
types = {}
for apptype in ('RRP', 'RPB', 'RDA'):
for arr in sth:
info = {'id': arr[1],
'desc': arr[2],}
for status in ('waiting', 'rejected', 'approved', 'cancelled'):
info[status] = __db_count_doc (doctype, arr[1], status, apptype)
types.setdefault (apptype, []).append(info)
t = websubmit_templates.tmpl_publiline_selectcplxcateg(
ln=ln,
types=types,
doctype=doctype,
title=title,
)
return t
def selectDocument(doctype, categ, ln=CFG_SITE_LANG):
t = ""
res = run_sql("select ldocname from sbmDOCTYPE where sdocname=%s", (doctype,))
title = res[0][0]
if categ == "":
categ == "unknown"
docs = []
sth = run_sql("select rn,status from sbmAPPROVAL where doctype=%s and categ=%s order by status DESC,rn DESC", (doctype, categ))
for arr in sth:
docs.append({
'RN': arr[0],
'status': arr[1],
})
t = websubmit_templates.tmpl_publiline_selectdocument(
ln=ln,
doctype=doctype,
title=title,
categ=categ,
docs=docs,
)
return t
def selectCplxDocument(doctype, categ, apptype, ln=CFG_SITE_LANG):
t = ""
res = run_sql("select ldocname from sbmDOCTYPE where sdocname=%s", (doctype,))
title = res[0][0]
sth = run_sql("select lname from sbmCATEGORIES where doctype=%s and sname=%s order by lname",(doctype, categ,))
if len(sth) != 0:
categname = sth[0][0]
else:
categname = "Unknown"
docs = []
sth = run_sql("select rn,status from sbmCPLXAPPROVAL where doctype=%s and categ=%s and type=%s order by status DESC,rn DESC",(doctype, categ, apptype))
for arr in sth:
docs.append({
'RN': arr[0],
'status': arr[1],
})
t = websubmit_templates.tmpl_publiline_selectcplxdocument(
ln = ln,
doctype = doctype,
title = title,
categ = categ,
categname = categname,
docs = docs,
apptype = apptype,
)
return t
def __displayDocument(req, doctype, categ, RN, send, ln = CFG_SITE_LANG):
# load the right message language
_ = gettext_set_language(ln)
t = ""
res = run_sql("select ldocname from sbmDOCTYPE where sdocname=%s", (doctype,))
docname = res[0][0]
if categ == "":
categ = "unknown"
sth = run_sql("select rn,status,dFirstReq,dLastReq,dAction,access,note from sbmAPPROVAL where rn=%s",(RN,))
if len(sth) > 0:
arr = sth[0]
rn = arr[0]
status = arr[1]
dFirstReq = arr[2]
dLastReq = arr[3]
dAction = arr[4]
access = arr[5]
note = arr[6]
else:
return _("Approval has never been requested for this document.") + "<br />&nbsp;"
## Get the details of the pending item:
item_details = get_pending_item_details(doctype, RN)
## get_pending_item_details has returned either None or a dictionary
## with the following structure:
## { 'title' : '-', ## String - the item's title
## 'recid' : '', ## String - recid
## 'report-number' : '', ## String - the item's report number
## 'authors' : [], ## List - the item's authors
## }
if item_details is not None:
authors = ", ".join(item_details['authors'])
newrn = item_details['report-number']
title = item_details['title']
sysno = item_details['recid']
else:
# Was not found in the pending directory. Already approved?
try:
(authors, title, sysno) = getInfo(RN)
newrn = RN
if sysno is None:
return _("Unable to display document.")
except:
return _("Unable to display document.")
user_info = collect_user_info(req)
can_view_record_p, msg = check_user_can_view_record(user_info, sysno)
if can_view_record_p != 0:
return msg
confirm_send = 0
if send == _("Send Again"):
if authors == "unknown" or title == "unknown":
SendWarning(doctype, categ, RN, title, authors, access)
else:
# @todo - send in different languages
#SendEnglish(doctype, categ, RN, title, authors, access, sysno)
send_approval(doctype, categ, RN, title, authors, access, sysno)
run_sql("update sbmAPPROVAL set dLastReq=NOW() where rn=%s",(RN,))
confirm_send = 1
if status == "waiting":
if categ == "unknown":
## FIXME: This was necessary for document types without categories,
## such as DEMOBOO:
categ = "*"
(auth_code, auth_message) = acc_authorize_action(req, "referee", verbose=0, doctype=doctype, categ=categ)
else:
(auth_code, auth_message) = (None, None)
t = websubmit_templates.tmpl_publiline_displaydoc(
ln = ln,
docname = docname,
doctype = doctype,
categ = categ,
rn = rn,
status = status,
dFirstReq = dFirstReq,
dLastReq = dLastReq,
dAction = dAction,
access = access,
confirm_send = confirm_send,
auth_code = auth_code,
auth_message = auth_message,
authors = authors,
title = title,
sysno = sysno,
newrn = newrn,
note = note,
)
return t
def __displayCplxDocument(req, doctype, categ, RN, apptype, reply, commentId, ln = CFG_SITE_LANG):
# load the right message language
_ = gettext_set_language(ln)
t = ""
uid = getUid(req)
res = run_sql("select ldocname from sbmDOCTYPE where sdocname=%s", (doctype,))
docname = res[0][0]
if categ == "":
categ = "unknown"
key = (RN, apptype)
infos = __db_get_infos (key)
if len(infos) > 0:
(status, id_group, id_bskBASKET, id_EdBoardGroup,
dFirstReq, dLastReq, dEdBoardSel, dRefereeSel, dRefereeRecom, dEdBoardRecom, dPubComRecom, dProjectLeaderAction) = infos[0]
dates = {'dFirstReq': dFirstReq,
'dLastReq': dLastReq,
'dEdBoardSel': dEdBoardSel,
'dRefereeSel': dRefereeSel,
'dRefereeRecom': dRefereeRecom,
'dEdBoardRecom': dEdBoardRecom,
'dPubComRecom': dPubComRecom,
'dProjectLeaderAction': dProjectLeaderAction,
}
else:
return _("Approval has never been requested for this document.") + "<br />&nbsp;"
## Removing call to deprecated "getInAlice" function and replacing it with
## a call to the newer "get_brief_doc_details_from_repository" function:
## try:
## (authors, title, sysno, newrn) = getInAlice(doctype, categ, RN)
## except TypeError:
## return _("Unable to display document.")
item_details = get_brief_doc_details_from_repository(RN)
## get_brief_doc_details_from_repository has returned either None
## or a dictionary with the following structure:
## { 'title' : '-', ## String - the item's title
## 'recid' : '', ## String - recid
## 'report-number' : '', ## String - the item's report number
## 'authors' : [], ## List - the item's authors
## }
if item_details is not None:
## Details of the item were found in the Invenio repository
authors = ", ".join(item_details['authors'])
newrn = item_details['report-number']
title = item_details['title']
sysno = item_details['recid']
else:
## Can't find any document details.
return _("Unable to display document.")
if status == "waiting":
isPubCom = __is_PubCom (req, doctype)
isEdBoard = __is_EdBoard (uid, id_EdBoardGroup)
isReferee = __is_Referee (uid, id_bskBASKET)
isProjectLeader = __is_ProjectLeader (req, doctype, categ)
isAuthor = __is_Author (uid, sysno)
else:
isPubCom = None
isEdBoard = None
isReferee = None
isProjectLeader = None
isAuthor = None
user_info = collect_user_info(req)
can_view_record_p, msg = check_user_can_view_record(user_info, sysno)
if can_view_record_p != 0:
return msg
t += websubmit_templates.tmpl_publiline_displaycplxdoc(
ln = ln,
docname = docname,
doctype = doctype,
categ = categ,
rn = RN,
apptype = apptype,
status = status,
dates = dates,
isPubCom = isPubCom,
isEdBoard = isEdBoard,
isReferee = isReferee,
isProjectLeader = isProjectLeader,
isAuthor = isAuthor,
authors = authors,
title = title,
sysno = sysno,
newrn = newrn,
)
if id_bskBASKET > 0:
rights = basketdb.get_max_user_rights_on_basket(uid, id_bskBASKET)
if not(__check_basket_sufficient_rights(rights, CFG_WEBBASKET_SHARE_LEVELS['READITM'])):
return t
# FIXME This error will be fixed with Sam's new version of publiline.
# pylint: disable=E1101
comments = basketdb.get_comments(id_bskBASKET, sysno)
# pylint: enable=E1101
if dProjectLeaderAction != None:
user_can_add_comment = 0
else:
user_can_add_comment = __check_basket_sufficient_rights(rights, CFG_WEBBASKET_SHARE_LEVELS['ADDCMT'])
comment_subject = ""
comment_body = ""
if reply == "true":
#Get the message subject and body from the comment
for comment in comments:
if str(commentId) == str(comment[0]):
comment_subject = comment[2]
comment_body = comment[3]
comment_subject = comment_subject.lstrip("Re: ")
comment_subject = "Re: " + comment_subject
comment_body = "> " + comment_body.replace("\n", "\n> ")
t += websubmit_templates.tmpl_publiline_displaycplxdocitem(
doctype, categ, RN, apptype, "AddComment",
comments,
(__check_basket_sufficient_rights(rights, CFG_WEBBASKET_SHARE_LEVELS['READCMT']),
user_can_add_comment,
__check_basket_sufficient_rights(rights, CFG_WEBBASKET_SHARE_LEVELS['DELCMT'])),
selected_category=CFG_WEBBASKET_CATEGORIES['GROUP'], selected_topic=0, selected_group_id=id_group,
comment_subject=comment_subject, comment_body=comment_body, ln=ln)
return t
def __check_basket_sufficient_rights(rights_user_has, rights_needed):
"""Private function, check if the rights are sufficient."""
try:
out = CFG_WEBBASKET_SHARE_LEVELS_ORDERED.index(rights_user_has) >= \
CFG_WEBBASKET_SHARE_LEVELS_ORDERED.index(rights_needed)
except ValueError:
out = 0
return out
def __is_PubCom (req, doctype):
(isPubCom, auth_message) = acc_authorize_action(req, "pubcomchair", verbose=0, doctype=doctype)
return isPubCom
def __is_EdBoard (uid, id_EdBoardGroup):
isEdBoard = None
if id_EdBoardGroup > 0:
edBoard = run_sql("""SELECT u.id
FROM user u LEFT JOIN user_usergroup ug ON u.id = ug.id_user
WHERE ug.id_usergroup = '%s' and user_status != 'A' AND user_status != 'P'""" % (id_EdBoardGroup, ))
for uid_scan in edBoard:
if uid == uid_scan[0]:
isEdBoard = 0
break
return isEdBoard
def __is_Referee (uid, id_bskBASKET):
isReferee = None
if id_bskBASKET > 0:
if basketdb.check_user_owns_baskets (uid, id_bskBASKET) == 1:
isReferee = 0
return isReferee
def __is_ProjectLeader (req, doctype, categ):
(isProjectLeader, auth_message) = acc_authorize_action(req, "projectleader", verbose=0, doctype=doctype, categ=categ)
return isProjectLeader
def __is_Author (uid, sysno):
email = Get_Field("8560_f", sysno)
email = re.sub("[\n\r ]+", "", email)
uid_email = re.sub("[\n\r ]+", "", acc_get_user_email(uid))
isAuthor = None
if (re.search(uid_email, email, re.IGNORECASE) != None) and (uid_email != ""):
isAuthor = 0
return isAuthor
def __db_count_doc (doctype, categ, status, apptype):
return run_sql("SELECT COUNT(*) FROM sbmCPLXAPPROVAL WHERE doctype=%s AND categ=%s AND status=%s AND type=%s",(doctype, categ, status, apptype,))[0][0]
def __db_get_infos (key):
return run_sql("SELECT status,id_group,id_bskBASKET,id_EdBoardGroup,dFirstReq,dLastReq,dEdBoardSel,dRefereeSel,dRefereeRecom,dEdBoardRecom,dPubComRecom,dProjectLeaderAction FROM sbmCPLXAPPROVAL WHERE rn=%s and type=%s", key)
def __db_set_EdBoardSel_time (key):
run_sql("UPDATE sbmCPLXAPPROVAL SET dEdBoardSel=NOW() WHERE rn=%s and type=%s", key)
def __db_check_EdBoardGroup ((RN, apptype), id_EdBoardGroup, uid, group_descr):
res = get_group_infos (id_EdBoardGroup)
if len(res) == 0:
id_EdBoardGroup = insert_new_group (uid, RN, group_descr % RN, "VM")
run_sql("UPDATE sbmCPLXAPPROVAL SET id_EdBoardGroup=%s WHERE rn=%s and type=%s", (id_EdBoardGroup, RN, apptype,))
return id_EdBoardGroup
def __db_set_basket ((RN, apptype), id_bsk):
run_sql("UPDATE sbmCPLXAPPROVAL SET id_bskBASKET=%s, dRefereeSel=NOW() WHERE rn=%s and type=%s", (id_bsk, RN, apptype,))
def __db_set_RefereeRecom_time (key):
run_sql("UPDATE sbmCPLXAPPROVAL SET dRefereeRecom=NOW() WHERE rn=%s and type=%s", key)
def __db_set_EdBoardRecom_time (key):
run_sql("UPDATE sbmCPLXAPPROVAL SET dEdBoardRecom=NOW() WHERE rn=%s and type=%s", key)
def __db_set_PubComRecom_time (key):
run_sql("UPDATE sbmCPLXAPPROVAL SET dPubComRecom=NOW() WHERE rn=%s and type=%s", key)
def __db_set_status ((RN, apptype), status):
run_sql("UPDATE sbmCPLXAPPROVAL SET status=%s, dProjectLeaderAction=NOW() WHERE rn=%s and type=%s", (status, RN, apptype,))
def __doCplxAction(req, doctype, categ, RN, apptype, action, email_user_pattern, id_user, id_user_remove, validate, id_user_val, msg_subject, msg_body, reply, commentId, ln=CFG_SITE_LANG):
"""
Perform complex action. Note: all argume, ts are supposed to be washed already.
Return HTML body for the paget.
In case of errors, deletes hard drive. ;-)
"""
# load the right message language
_ = gettext_set_language(ln)
TEXT_RSN_RefereeSel_BASKET_DESCR = "Requests for refereeing process"
TEXT_RSN_RefereeSel_MSG_REFEREE_SUBJECT = "Referee selection"
TEXT_RSN_RefereeSel_MSG_REFEREE_BODY = "You have been named as a referee for this document :"
TEXT_RSN_RefereeSel_MSG_GROUP_SUBJECT = "Please, review this publication"
TEXT_RSN_RefereeSel_MSG_GROUP_BODY = "Please, review the following publication"
TEXT_RSN_RefereeRecom_MSG_PUBCOM_SUBJECT = "Final recommendation from the referee"
TEXT_RSN_PubComRecom_MSG_PRJLEADER_SUBJECT = "Final recommendation from the publication board : "
TEXT_RSN_ProjectLeaderDecision_MSG_SUBJECT = "Final decision from the project leader"
TEXT_RPB_EdBoardSel_MSG_EDBOARD_SUBJECT = "You have been selected in a editorial board"
TEXT_RPB_EdBoardSel_MSG_EDBOARD_BODY = "You have been selected as a member of the editorial board of this document :"
TEXT_RPB_EdBoardSel_EDBOARD_GROUP_DESCR = "Editorial board for %s"
TEXT_RPB_RefereeSel_BASKET_DESCR = "Requests for publication"
TEXT_RPB_RefereeSel_MSG_REFEREE_SUBJECT = "Referee selection"
TEXT_RPB_RefereeSel_MSG_REFEREE_BODY = "You have been named as a referee for this document :"
TEXT_RPB_RefereeSel_MSG_GROUP_SUBJECT = "Please, review this publication"
TEXT_RPB_RefereeSel_MSG_GROUP_BODY = "Please, review the following publication"
TEXT_RPB_RefereeRecom_MSG_EDBOARD_SUBJECT = "Final recommendation from the referee"
TEXT_RPB_EdBoardRecom_MSG_PUBCOM_SUBJECT = "Final recommendation from the editorial board"
TEXT_RPB_PubComRecom_MSG_PRJLEADER_SUBJECT = "Final recommendation from the publication board"
TEXT_RPB_ProjectLeaderDecision_MSG_SUBJECT = "Final decision from the project leader"
t = ""
uid = getUid(req)
if categ == "":
categ = "unknown"
key = (RN, apptype)
infos = __db_get_infos (key)
if len(infos) > 0:
(status, id_group, id_bskBASKET, id_EdBoardGroup, dummy, dummy,
dEdBoardSel, dRefereeSel, dRefereeRecom, dEdBoardRecom, dPubComRecom, dProjectLeaderAction) = infos[0]
else:
return _("Approval has never been requested for this document.") + "<br />&nbsp;"
## Removing call to deprecated "getInAlice" function and replacing it with
## a call to the newer "get_brief_doc_details_from_repository" function:
## try:
## (authors, title, sysno, newrn) = getInAlice(doctype, categ, RN)
## except TypeError:
## return _("Unable to display document.")
item_details = get_brief_doc_details_from_repository(RN)
## get_brief_doc_details_from_repository has returned either None
## or a dictionary with the following structure:
## { 'title' : '-', ## String - the item's title
## 'recid' : '', ## String - recid
## 'report-number' : '', ## String - the item's report number
## 'authors' : [], ## List - the item's authors
## }
if item_details is not None:
## Details of the item were found in the Invenio repository
authors = ", ".join(item_details['authors'])
newrn = item_details['report-number']
title = item_details['title']
sysno = item_details['recid']
else:
## Can't find any document details.
return _("Unable to display document.")
if (action == "EdBoardSel") and (apptype == "RPB"):
if __is_PubCom (req, doctype) != 0:
return _("Action unauthorized for this document.") + "<br />&nbsp;"
if status == "cancelled":
return _("Action unavailable for this document.") + "<br />&nbsp;"
if validate == "go":
if dEdBoardSel == None:
__db_set_EdBoardSel_time (key)
perform_request_send (uid, "", RN, TEXT_RPB_EdBoardSel_MSG_EDBOARD_SUBJECT, TEXT_RPB_EdBoardSel_MSG_EDBOARD_BODY)
return __displayCplxDocument(req, doctype, categ, RN, apptype, reply, commentId, ln)
id_EdBoardGroup = __db_check_EdBoardGroup (key, id_EdBoardGroup, uid, TEXT_RPB_EdBoardSel_EDBOARD_GROUP_DESCR)
subtitle1 = _('Adding users to the editorial board')
# remove letters not allowed in an email
email_user_pattern = cleanstring_email(email_user_pattern)
stopon1 = ""
stopon2 = ""
stopon3 = ""
users = []
extrausers = []
# pattern is entered
if email_user_pattern:
# users with matching email-address
try:
users1 = run_sql("""SELECT id, email FROM user WHERE email<>'' AND email RLIKE %s ORDER BY email """, (email_user_pattern, ))
except OperationalError:
users1 = ()
# users that are connected
try:
users2 = run_sql("""SELECT DISTINCT u.id, u.email
FROM user u LEFT JOIN user_usergroup ug ON u.id = ug.id_user
WHERE u.email<>'' AND ug.id_usergroup = %s AND u.email RLIKE %s
ORDER BY u.email """, (id_EdBoardGroup, email_user_pattern))
except OperationalError:
users2 = ()
# no users that match the pattern
if not (users1 or users2):
stopon1 = '<p>%s</p>' % _("no qualified users, try new search.")
elif len(users1) > MAXSELECTUSERS:
stopon1 = '<p><strong>%s %s</strong>, %s (%s %s)</p>' % (len(users1), _("hits"), _("too many qualified users, specify more narrow search."), _("limit"), MAXSELECTUSERS)
# show matching users
else:
users = []
extrausers = []
for (user_id, email) in users1:
if (user_id, email) not in users2: users.append([user_id, email,''])
for (user_id, email) in users2:
extrausers.append([-user_id, email,''])
try: id_user = int(id_user)
except ValueError: pass
# user selected already connected to role
email_out = acc_get_user_email(id_user)
if id_user < 0:
stopon2 = '<p>%s</p>' % _("users in brackets are already attached to the role, try another one...")
# a user is selected
elif email_out:
result = insert_new_member(id_user, id_EdBoardGroup, "M")
stopon2 = '<p>confirm: user <strong>%s</strong> added to the editorial board.</p>' % (email_out, )
subtitle2 = _('Removing users from the editorial board')
usersremove = run_sql("""SELECT DISTINCT u.id, u.email
FROM user u LEFT JOIN user_usergroup ug ON u.id = ug.id_user
WHERE u.email <> "" AND ug.id_usergroup = %s and user_status != 'A' AND user_status != 'P'
ORDER BY u.email """, (id_EdBoardGroup, ))
try: id_user_remove = int(id_user_remove)
except ValueError: pass
# user selected already connected to role
email_out = acc_get_user_email(id_user_remove)
# a user is selected
if email_out:
result = delete_member(id_EdBoardGroup, id_user_remove)
stopon3 = '<p>confirm: user <strong>%s</strong> removed from the editorial board.</p>' % (email_out, )
t = websubmit_templates.tmpl_publiline_displaydocplxaction (
ln = ln,
doctype = doctype,
categ = categ,
rn = RN,
apptype = apptype,
action = action,
status = status,
authors = authors,
title = title,
sysno = sysno,
subtitle1 = subtitle1,
email_user_pattern = email_user_pattern,
stopon1 = stopon1,
users = users,
extrausers = extrausers,
stopon2 = stopon2,
subtitle2 = subtitle2,
usersremove = usersremove,
stopon3 = stopon3,
validate_btn = _("Validate the editorial board selection"),
)
return t
elif (action == "RefereeSel") and ((apptype == "RRP") or (apptype == "RPB")):
if apptype == "RRP":
to_check = __is_PubCom (req, doctype)
TEXT_RefereeSel_BASKET_DESCR = TEXT_RSN_RefereeSel_BASKET_DESCR
TEXT_RefereeSel_MSG_REFEREE_SUBJECT = TEXT_RSN_RefereeSel_MSG_REFEREE_SUBJECT
TEXT_RefereeSel_MSG_REFEREE_BODY = TEXT_RSN_RefereeSel_MSG_REFEREE_BODY + " " + "\"" + item_details['title'] + "\""
TEXT_RefereeSel_MSG_GROUP_SUBJECT = TEXT_RSN_RefereeSel_MSG_GROUP_SUBJECT
TEXT_RefereeSel_MSG_GROUP_BODY = TEXT_RSN_RefereeSel_MSG_GROUP_BODY + " " + "\"" + item_details['title'] + "\""
elif apptype == "RPB":
to_check = __is_EdBoard (uid, id_EdBoardGroup)
TEXT_RefereeSel_BASKET_DESCR = TEXT_RSN_RefereeSel_BASKET_DESCR
TEXT_RefereeSel_MSG_REFEREE_SUBJECT = TEXT_RSN_RefereeSel_MSG_REFEREE_SUBJECT
TEXT_RefereeSel_MSG_REFEREE_BODY = TEXT_RSN_RefereeSel_MSG_REFEREE_BODY + " " + "\"" + item_details['title'] + "\""
TEXT_RefereeSel_MSG_GROUP_SUBJECT = TEXT_RSN_RefereeSel_MSG_GROUP_SUBJECT
TEXT_RefereeSel_MSG_GROUP_BODY = TEXT_RSN_RefereeSel_MSG_GROUP_BODY + " " + "\"" + item_details['title'] + "\""
else:
to_check = None
if to_check != 0:
return _("Action unauthorized for this document.") + "<br />&nbsp;"
if status == "cancelled":
return _("Action unavailable for this document.") + "<br />&nbsp;"
if validate == "go":
if dRefereeSel == None:
id_bsk = basketdb.create_basket (int(id_user_val), RN, TEXT_RefereeSel_BASKET_DESCR)
basketdb.share_basket_with_group (id_bsk, id_group, CFG_WEBBASKET_SHARE_LEVELS['ADDCMT'])
basketdb.add_to_basket (int(id_user_val), (sysno, ), (id_bsk, ))
__db_set_basket (key, id_bsk)
email_address = run_sql("""SELECT email FROM user WHERE id = %s """, (id_user_val, ))[0][0]
perform_request_send (uid, email_address, "", TEXT_RefereeSel_MSG_REFEREE_SUBJECT, TEXT_RefereeSel_MSG_REFEREE_BODY, 0, 0, 0, ln, 1)
sendMailToReferee(doctype, categ, RN, email_address, authors)
group_name = run_sql("""SELECT name FROM usergroup WHERE id = %s""", (id_group, ))[0][0]
perform_request_send (int(id_user_val), "", group_name, TEXT_RefereeSel_MSG_GROUP_SUBJECT, TEXT_RefereeSel_MSG_GROUP_BODY)
sendMailToGroup(doctype, categ, RN, id_group, authors)
return __displayCplxDocument(req, doctype, categ, RN, apptype, reply, commentId, ln)
subtitle1 = _('Referee selection')
# remove letters not allowed in an email
email_user_pattern = cleanstring_email(email_user_pattern)
stopon1 = ""
stopon2 = ""
users = []
extrausers = []
# pattern is entered
if email_user_pattern:
# users with matching email-address
try:
users1 = run_sql("""SELECT id, email FROM user WHERE email <> "" AND email RLIKE %s ORDER BY email """, (email_user_pattern, ))
except OperationalError:
users1 = ()
# no users that match the pattern
if not users1:
stopon1 = '<p>%s</p>' % _("no qualified users, try new search.")
elif len(users1) > MAXSELECTUSERS:
stopon1 = '<p><strong>%s %s</strong>, %s (%s %s)</p>' % (len(users1), _("hits"), _("too many qualified users, specify more narrow search."), _("limit"), MAXSELECTUSERS)
# show matching users
else:
users = []
for (user_id, email) in users1:
users.append([user_id, email,''])
try: id_user = int(id_user)
except ValueError: pass
# user selected already connected to role
email_out = acc_get_user_email(id_user)
# a user is selected
if email_out:
stopon2 = """<p>user <strong>%s</strong> will be the referee ?
<input type="hidden" name="id_user_val" value="%s" />
<input type="hidden" name="validate" value="go" />
<input class="adminbutton" type="submit" value="Validate the referee selection" />
</p>""" % (email_out, id_user)
t = websubmit_templates.tmpl_publiline_displaydocplxaction (
ln = ln,
doctype = doctype,
categ = categ,
rn = RN,
apptype = apptype,
action = action,
status = status,
authors = authors,
title = title,
sysno = sysno,
subtitle1 = subtitle1,
email_user_pattern = email_user_pattern,
stopon1 = stopon1,
users = users,
extrausers = [],
stopon2 = stopon2,
subtitle2 = "",
usersremove = [],
stopon3 = "",
validate_btn = "",
)
return t
elif (action == "AddAuthorList") and (apptype == "RPB"):
return ""
elif (action == "AddComment") and ((apptype == "RRP") or (apptype == "RPB")):
t = ""
if validate == "go":
(errors, infos) = perform_request_save_comment (uid, id_bskBASKET, sysno, msg_subject, msg_body, ln)
t += "%(infos)s<br /><br />" % {'infos': infos[0]}
t += """
<form action="publiline.py">
<input type="hidden" name="flow" value="cplx" />
<input type="hidden" name="doctype" value="%(doctype)s" />
<input type="hidden" name="categ" value="%(categ)s" />
<input type="hidden" name="RN" value="%(rn)s" />
<input type="hidden" name="apptype" value="%(apptype)s" />
<input type="submit" class="formbutton" value="%(button_label)s" />
</form>""" % {'doctype': doctype,
'categ': categ,
'rn': RN,
'apptype': apptype,
'button_label': _("Come back to the document"),
}
return t
elif (action == "RefereeRecom") and ((apptype == "RRP") or (apptype == "RPB")):
if __is_Referee (uid, id_bskBASKET) != 0:
return _("Action unauthorized for this document.") + "<br />&nbsp;"
if status == "cancelled":
return _("Action unavailable for this document.") + "<br />&nbsp;"
if apptype == "RRP":
# Build publication committee chair's email address
user_addr = ""
# Try to retrieve the publication committee chair's email from the role database
for user in acc_get_role_users(acc_get_role_id("pubcomchair_%s_%s" % (doctype, categ))):
user_addr += run_sql("""SELECT email FROM user WHERE id = %s """, (user[0], ))[0][0] + ","
# And if there are general publication committee chair's
for user in acc_get_role_users(acc_get_role_id("pubcomchair_%s_*" % doctype)):
user_addr += run_sql("""SELECT email FROM user WHERE id = %s """, (user[0], ))[0][0] + ","
user_addr = re.sub(",$", "", user_addr)
group_addr = ""
TEXT_RefereeRecom_MSG_SUBJECT = TEXT_RSN_RefereeRecom_MSG_PUBCOM_SUBJECT
elif apptype == "RPB":
user_addr = ""
group_addr = RN
TEXT_RefereeRecom_MSG_SUBJECT = TEXT_RPB_RefereeRecom_MSG_EDBOARD_SUBJECT
else:
user_addr = ""
group_addr = ""
TEXT_RefereeRecom_MSG_SUBJECT = ""
if validate == "approve" or validate == "reject":
if dRefereeRecom == None:
perform_request_send (uid, user_addr, group_addr, msg_subject, msg_body, 0, 0, 0, ln, 1)
if validate == "approve":
msg_body = "Approved : " + msg_body
else:
msg_body = "Rejected : " + msg_body
#Get the Project Leader's email address
# email = ""
# for user in acc_get_role_users(acc_get_role_id("projectleader_%s_%s" % (doctype, categ))):
# email += run_sql("""SELECT email FROM user WHERE id = %s """, (user[0], ))[0][0] + ","
# sendMailToProjectLeader(doctype, categ, RN, email, authors, "referee", msg_body)
sendMailtoCommitteeChair(doctype, categ, RN, user_addr, authors)
__db_set_RefereeRecom_time (key)
return __displayCplxDocument(req, doctype, categ, RN, apptype, reply, commentId, ln)
t = websubmit_templates.tmpl_publiline_displaycplxrecom (
ln = ln,
doctype = doctype,
categ = categ,
rn = RN,
apptype = apptype,
action = action,
status = status,
authors = authors,
title = title,
sysno = sysno,
msg_to = user_addr,
msg_to_group = group_addr,
msg_subject = TEXT_RefereeRecom_MSG_SUBJECT,
)
return t
elif (action == "EdBoardRecom") and (apptype == "RPB"):
if __is_EdBoard (uid, id_EdBoardGroup) != 0:
return _("Action unauthorized for this document.") + "<br />&nbsp;"
if status == "cancelled":
return _("Action unavailable for this document.") + "<br />&nbsp;"
# Build publication committee chair's email address
user_addr = ""
# Try to retrieve the publication committee chair's email from the role database
for user in acc_get_role_users(acc_get_role_id("pubcomchair_%s_%s" % (doctype, categ))):
user_addr += run_sql("""SELECT nickname FROM user WHERE id = %s """, (user[0], ))[0][0] + ","
# And if there are general publication committee chair's
for user in acc_get_role_users(acc_get_role_id("pubcomchair_%s_*" % doctype)):
user_addr += run_sql("""SELECT nickname FROM user WHERE id = %s """, (user[0], ))[0][0] + ","
user_addr = re.sub(",$", "", user_addr)
if validate == "go":
if dEdBoardRecom == None:
perform_request_send (uid, user_addr, "", msg_subject, msg_body)
__db_set_EdBoardRecom_time (key)
return __displayCplxDocument(req, doctype, categ, RN, apptype, reply, commentId, ln)
t = websubmit_templates.tmpl_publiline_displaycplxrecom (
ln = ln,
doctype = doctype,
categ = categ,
rn = RN,
apptype = apptype,
action = action,
status = status,
authors = authors,
title = title,
sysno = sysno,
msg_to = user_addr,
msg_to_group = "",
msg_subject = TEXT_RPB_EdBoardRecom_MSG_PUBCOM_SUBJECT,
)
return t
elif (action == "PubComRecom") and ((apptype == "RRP") or (apptype == "RPB")):
if __is_PubCom (req, doctype) != 0:
return _("Action unauthorized for this document.") + "<br />&nbsp;"
if status == "cancelled":
return _("Action unavailable for this document.") + "<br />&nbsp;"
# Build project leader's email address
user_addr = ""
# Try to retrieve the project leader's email from the role database
for user in acc_get_role_users(acc_get_role_id("projectleader_%s_%s" % (doctype, categ))):
user_addr += run_sql("""SELECT email FROM user WHERE id = %s """, (user[0], ))[0][0] + ","
# And if there are general project leader's
for user in acc_get_role_users(acc_get_role_id("projectleader_%s_*" % doctype)):
user_addr += run_sql("""SELECT email FROM user WHERE id = %s """, (user[0], ))[0][0] + ","
user_addr = re.sub(",$", "", user_addr)
if apptype == "RRP":
TEXT_PubComRecom_MSG_SUBJECT = TEXT_RSN_PubComRecom_MSG_PRJLEADER_SUBJECT
elif apptype == "RPB":
group_addr = RN
TEXT_PubComRecom_MSG_SUBJECT = TEXT_RPB_PubComRecom_MSG_PRJLEADER_SUBJECT
else:
TEXT_PubComRecom_MSG_SUBJECT = ""
if validate == "approve" or validate == "reject":
if validate == "approve":
msg_body = "Approved : " + msg_body
else:
msg_body = "Rejected : " + msg_body
if dPubComRecom == None:
perform_request_send (uid, user_addr, "", msg_subject, msg_body, 0, 0, 0, ln, 1)
sendMailToProjectLeader(doctype, categ, RN, user_addr, authors, "publication committee chair", msg_body)
__db_set_PubComRecom_time (key)
return __displayCplxDocument(req, doctype, categ, RN, apptype, reply, commentId, ln)
t = websubmit_templates.tmpl_publiline_displaycplxrecom (
ln = ln,
doctype = doctype,
categ = categ,
rn = RN,
apptype = apptype,
action = action,
status = status,
authors = authors,
title = title,
sysno = sysno,
msg_to = user_addr,
msg_to_group = "",
msg_subject = TEXT_PubComRecom_MSG_SUBJECT + " " + "\"" + item_details['title'] + "\"",
)
return t
elif (action == "ProjectLeaderDecision") and ((apptype == "RRP") or (apptype == "RPB")):
if __is_ProjectLeader (req, doctype, categ) != 0:
return _("Action unauthorized for this document.") + "<br />&nbsp;"
if status == "cancelled":
return _("Action unavailable for this document.") + "<br />&nbsp;"
t += """
<form action="publiline.py">
<input type="hidden" name="flow" value="cplx" />
<input type="hidden" name="doctype" value="%(doctype)s" />
<input type="hidden" name="categ" value="%(categ)s" />
<input type="hidden" name="RN" value="%(rn)s" />
<input type="hidden" name="apptype" value="%(apptype)s" />
<input type="submit" class="formbutton" value="%(button_label)s" />
</form>""" % {'doctype': doctype,
'categ': categ,
'rn': RN,
'apptype': apptype,
'button_label': _("Back to the document"),
}
if validate == "approve":
if dProjectLeaderAction == None:
(errors, infos) = perform_request_save_comment (uid, id_bskBASKET, sysno, msg_subject, msg_body, ln)
out = "%(infos)s<br /><br />" % {'infos': infos[0]}
sendMailToSubmitter(doctype, categ, RN, "approved")
__db_set_status (key, 'approved')
return out + t
elif validate == "reject":
if dProjectLeaderAction == None:
(errors, infos) = perform_request_save_comment (uid, id_bskBASKET, sysno, msg_subject, msg_body, ln)
out = "%(infos)s<br /><br />" % {'infos': infos[0]}
sendMailToSubmitter(doctype, categ, RN, "rejected")
__db_set_status (key, 'rejected')
return out + t
validation = """
<select name="validate">
<option value="%(select)s"> %(select)s</option>
<option value="approve">%(approve)s</option>
<option value="reject">%(reject)s</option>
</select>
<input type="submit" class="formbutton" value="%(button_label)s" />""" % {'select': _('Select:'),
'approve': _('Approve'),
'reject': _('Reject'),
'button_label': _('Take a decision'),
}
if apptype == "RRP":
TEXT_ProjectLeaderDecision_MSG_SUBJECT = TEXT_RSN_ProjectLeaderDecision_MSG_SUBJECT
elif apptype == "RPB":
TEXT_ProjectLeaderDecision_MSG_SUBJECT = TEXT_RPB_ProjectLeaderDecision_MSG_SUBJECT
else:
TEXT_ProjectLeaderDecision_MSG_SUBJECT = ""
t = websubmit_templates.tmpl_publiline_displaywritecomment(doctype, categ, RN, apptype, action, _("Take a decision"), TEXT_ProjectLeaderDecision_MSG_SUBJECT, validation, "", ln)
return t
elif (action == "ProjectLeaderDecision") and (apptype == "RDA"):
if __is_ProjectLeader (req, doctype, categ) != 0:
return _("Action unauthorized for this document.") + "<br />&nbsp;"
if status == "cancelled":
return _("Action unavailable for this document.") + "<br />&nbsp;"
if validate == "approve":
if dProjectLeaderAction == None:
__db_set_status (key, 'approved')
return __displayCplxDocument(req, doctype, categ, RN, apptype, reply, commentId, ln)
elif validate == "reject":
if dProjectLeaderAction == None:
__db_set_status (key, 'rejected')
return __displayCplxDocument(req, doctype, categ, RN, apptype, reply, commentId, ln)
t = """<p>
<form action="publiline.py">
<input type="hidden" name="flow" value="cplx" />
<input type="hidden" name="doctype" value="%(doctype)s" />
<input type="hidden" name="categ" value="%(categ)s" />
<input type="hidden" name="RN" value="%(rn)s" />
<input type="hidden" name="apptype" value="%(apptype)s" />
<input type="hidden" name="action" value="%(action)s" />
<input type="hidden" name="validate" value="approve" />
<input class="adminbutton" type="submit" value="%(approve)s" />
</form>
<form action="publiline.py">
<input type="hidden" name="flow" value="cplx" />
<input type="hidden" name="doctype" value="%(doctype)s" />
<input type="hidden" name="categ" value="%(categ)s" />
<input type="hidden" name="RN" value="%(rn)s" />
<input type="hidden" name="apptype" value="%(apptype)s" />
<input type="hidden" name="action" value="%(action)s" />
<input type="hidden" name="validate" value="reject" />
<input class="adminbutton" type="submit" value="%(reject)s" />
</form>
</p>""" % {
'rn': RN,
'categ': categ,
'doctype': doctype,
'apptype': apptype,
'action': action,
'approve': _('Approve'),
'reject': _('Reject'),
}
return t
elif (action == "AuthorCancel") and ((apptype == "RRP") or (apptype == "RPB") or (apptype == "RDA")):
if __is_Author (uid, sysno) != 0:
return _("Action unauthorized for this document.") + "<br />&nbsp;"
if (status == "cancelled") or (dProjectLeaderAction != None):
return _("Action unavailable for this document.") + "<br />&nbsp;"
if validate == "go":
__db_set_status (key, 'cancelled')
return __displayCplxDocument(req, doctype, categ, RN, apptype, reply, commentId, ln)
t = """<p>
<form action="publiline.py">
<input type="hidden" name="flow" value="cplx" />
<input type="hidden" name="doctype" value="%(doctype)s" />
<input type="hidden" name="categ" value="%(categ)s" />
<input type="hidden" name="RN" value="%(rn)s" />
<input type="hidden" name="apptype" value="%(apptype)s" />
<input type="hidden" name="action" value="%(action)s" />
<input type="hidden" name="validate" value="go" />
<input class="adminbutton" type="submit" value="%(cancel)s" />
</form>
</p>""" % {
'rn': RN,
'categ': categ,
'doctype': doctype,
'apptype': apptype,
'action': action,
'cancel': _('Cancel'),
}
return t
else:
return _("Wrong action for this document.") + "<br />&nbsp;"
return t
def get_pending_item_details(doctype, reportnumber):
"""Given a doctype and reference number, try to retrieve an item's details.
The first place to search for them should be the WebSubmit pending
directory. If nothing is retrieved from there, and attempt is made
to retrieve them from the Invenio repository itself.
@param doctype: (string) - the doctype of the item for which brief
details are to be retrieved.
@param reportnumber: (string) - the report number of the item
for which details are to be retrieved.
@return: (dictionary or None) - If details are found for the item,
they will be returned in a dictionary structured as follows:
{ 'title' : '-', ## String - the item's title
'recid' : '', ## String - recid taken from the SN file
'report-number' : '', ## String - the item's report number
'authors' : [], ## List - the item's authors
}
If no details were found a NoneType is returned.
"""
## First try to get the details of a document from the pending dir:
item_details = get_brief_doc_details_from_pending(doctype, \
reportnumber)
if item_details is None:
item_details = get_brief_doc_details_from_repository(reportnumber)
## Return the item details:
return item_details
def get_brief_doc_details_from_pending(doctype, reportnumber):
"""Try to get some brief details about the submission that is awaiting
the referee's decision.
Details sought are:
+ title
+ Authors
+ recid (why?)
+ report-number (why?)
This function searches for a MARC XML record in the pending submission's
working directory. It prefers the so-called 'dummy' record, but will
search for the final MARC XML record that would usually be passed to
bibupload (i.e. recmysql) if that is not present. If neither of these
records are present, no details will be found.
@param doctype: (string) - the WebSubmit document type of the item
to be refereed. It is used in order to locate the submission's
working directory in the WebSubmit pending directory.
@param reportnumber: (string) - the report number of the item for
which details are to be recovered. It is used in order to locate the
submission's working directory in the WebSubmit pending directory.
@return: (dictionary or None) - If details are found for the item,
they will be returned in a dictionary structured as follows:
{ 'title' : '-', ## String - the item's title
'recid' : '', ## String - recid taken from the SN file
'report-number' : '', ## String - the item's report number
'authors' : [], ## List - the item's authors
}
If no details were found (i.e. no MARC XML files in the submission's
working directory), a NoneType is returned.
"""
pending_doc_details = None
marcxml_rec_name = None
## Check for a MARC XML record in the pending dir.
## If it's there, we will use it to obtain certain bibliographic
## information such as title, author(s), etc, which we will then
## display to the referee.
## We favour the "dummy" record (created with the WebSubmit function
## "Make_Dummy_MARC_XML_Record"), because it was made for this
## purpose. If it's not there though, we'll take the normal
## (final) recmysql record that would generally be passed to bibupload.
if os.access("%s/%s/%s/%s" % (CFG_WEBSUBMIT_PENDING_DIR, \
doctype, \
reportnumber, \
CFG_WEBSUBMIT_DUMMY_MARC_XML_REC), \
os.F_OK|os.R_OK):
## Found the "dummy" marc xml record in the submission dir.
## Use it:
marcxml_rec_name = CFG_WEBSUBMIT_DUMMY_MARC_XML_REC
elif os.access("%s/%s/%s/%s" % (CFG_WEBSUBMIT_PENDING_DIR, \
doctype, \
reportnumber, \
CFG_WEBSUBMIT_MARC_XML_REC), \
os.F_OK|os.R_OK):
## Although we didn't find the "dummy" marc xml record in the
## submission dir, we did find the "real" one (that which would
## normally be passed to bibupload). Use it:
marcxml_rec_name = CFG_WEBSUBMIT_MARC_XML_REC
## If we have a MARC XML record in the pending submission's
## working directory, go ahead and use it:
if marcxml_rec_name is not None:
try:
fh_marcxml_record = open("%s/%s/%s/%s" \
% (CFG_WEBSUBMIT_PENDING_DIR, \
doctype, \
reportnumber, \
marcxml_rec_name), "r")
xmltext = fh_marcxml_record.read()
fh_marcxml_record.close()
except IOError:
## Unfortunately, it wasn't possible to read the details of the
## MARC XML record. Register the exception.
exception_prefix = "Error: Publiline was unable to read the " \
"MARC XML record [%s/%s/%s/%s] when trying to " \
"use it to recover details about a pending " \
"submission." % (CFG_WEBSUBMIT_PENDING_DIR, \
doctype, \
reportnumber, \
marcxml_rec_name)
register_exception(prefix=exception_prefix)
else:
## Attempt to use bibrecord to create an internal representation
## of the record, from which we can extract certain bibliographic
## information:
records = create_records(xmltext, 1, 1)
try:
record = records[0][0]
if record is None:
raise ValueError
except (IndexError, ValueError):
## Bibrecord couldn't successfully represent the record
## contained in the xmltext string. The record must have
## been empty or badly formed (or something).
pass
else:
## Dictionary to hold the interesting details of the
## pending item:
pending_doc_details = { 'title': '-',
'recid': '',
'report-number': '',
'authors': [],
}
## Get the recid:
## Note - the old "getInPending" function reads the "SN"
## file from the submission's working directory and since
## the "SN" file is currently "magic" and hardcoded
## throughout WebSubmit, I'm going to stick to this model.
## I could, however, have tried to get it from the MARC XML
## record as so:
## recid = record_get_field_value(rec=record, tag="001")
try:
fh_recid = open("%s/%s/%s/SN" \
% (CFG_WEBSUBMIT_PENDING_DIR, \
doctype, \
reportnumber), "r")
recid = fh_recid.read()
fh_recid.close()
except IOError:
## Probably, there was no "SN" file in the submission's
## working directory.
pending_doc_details['recid'] = ""
else:
pending_doc_details['recid'] = recid.strip()
## Item report number (from record):
## Note: I don't know what purpose this serves. It appears
## to be used in the email that is sent to the author, but
## it seems funny to me, since we already have the report
## number (which is indeed used to find the submission's
## working directory in pending). Perhaps it's used for
## cases when the reportnumber is changed after approval?
## To investigate when time allows:
finalrn = record_get_field_value(rec=record, \
tag="037", \
code="a")
if finalrn != "":
pending_doc_details['report-number'] = finalrn
## Item title:
title = record_get_field_value(rec=record, \
tag="245", \
code="a")
if title != "":
pending_doc_details['title'] = title
else:
## Alternative title:
alt_title = record_get_field_value(rec=record, \
tag="246", \
ind1="1", \
code="a")
if alt_title != "":
pending_doc_details['title'] = alt_title
## Item first author:
first_author = record_get_field_value(rec=record, \
tag="100", \
code="a")
if first_author != "":
pending_doc_details['authors'].append(first_author)
## Other Authors:
other_authors = record_get_field_values(rec=record, \
tag="700", \
code="a")
for author in other_authors:
pending_doc_details['authors'].append(author)
## Return the details discovered about the pending document:
return pending_doc_details
def get_brief_doc_details_from_repository(reportnumber):
"""Try to get some brief details about the submission that is awaiting
the referee's decision.
Details sought are:
+ title
+ Authors
+ recid (why?)
+ report-number (why?)
+ email
This function searches in the Invenio repository, based on
"reportnumber" for a record and then pulls the interesting fields
from it.
@param reportnumber: (string) - the report number of the item for
which details are to be recovered. It is used in the search.
@return: (dictionary or None) - If details are found for the item,
they will be returned in a dictionary structured as follows:
{ 'title' : '-', ## String - the item's title
'recid' : '', ## String - recid taken from the SN file
'report-number' : '', ## String - the item's report number
'authors' : [], ## List - the item's authors
}
If no details were found a NoneType is returned.
"""
## Details of the pending document, as found in the repository:
pending_doc_details = None
## Search for records matching this "report number"
found_record_ids = list(search_pattern(req=None, \
p=reportnumber, \
f="reportnumber", \
m="e"))
## How many records were found?
if len(found_record_ids) == 1:
## Found only 1 record. Get the fields of interest:
pending_doc_details = { 'title': '-',
'recid': '',
'report-number': '',
'authors': [],
'email': '',
}
recid = found_record_ids[0]
## Authors:
first_author = get_fieldvalues(recid, "100__a")
for author in first_author:
pending_doc_details['authors'].append(author)
other_authors = get_fieldvalues(recid, "700__a")
for author in other_authors:
pending_doc_details['authors'].append(author)
## Title:
title = get_fieldvalues(recid, "245__a")
if len(title) > 0:
pending_doc_details['title'] = title[0]
else:
## There was no value for title - check for an alternative title:
alt_title = get_fieldvalues(recid, "2641_a")
if len(alt_title) > 0:
pending_doc_details['title'] = alt_title[0]
## Record ID:
pending_doc_details['recid'] = recid
## Report Number:
reptnum = get_fieldvalues(recid, "037__a")
if len(reptnum) > 0:
pending_doc_details['report-number'] = reptnum[0]
## Email:
email = get_fieldvalues(recid, "859__f")
if len(email) > 0:
pending_doc_details['email'] = email[0]
elif len(found_record_ids) > 1:
## Oops. This is unexpected - there shouldn't be me multiple matches
## for this item. The old "getInAlice" function would have simply
## taken the first record in the list. That's not very nice though.
## Some kind of warning or error should be raised here. FIXME.
pass
return pending_doc_details
# Retrieve info about document
def getInfo(RN):
"""
Retrieve basic info from record with given report number.
Returns (authors, title, sysno)
"""
authors = None
title = None
sysno = None
recids = search_pattern(p=RN, f='037__a')
if len(recids) == 1:
sysno = int(recids.tolist()[0])
authors = ','.join(get_fieldvalues(sysno, "100__a") + get_fieldvalues(sysno, "700__a"))
title = ','.join(get_fieldvalues(sysno, "245__a"))
return (authors, title, sysno)
#seek info in pending directory
def getInPending(doctype, categ, RN):
"""FIXME: DEPRECATED!"""
PENDIR = "%s/pending" % CFG_WEBSUBMIT_STORAGEDIR
if os.path.exists("%s/%s/%s/AU" % (PENDIR, doctype, RN)):
fp = open("%s/%s/%s/AU" % (PENDIR, doctype, RN),"r")
authors = fp.read()
fp.close()
else:
authors = ""
if os.path.exists("%s/%s/%s/TI" % (PENDIR, doctype, RN)):
fp = open("%s/%s/%s/TI" % (PENDIR, doctype, RN),"r")
title = fp.read()
fp.close()
else:
title = ""
if os.path.exists("%s/%s/%s/SN" % (PENDIR, doctype, RN)):
fp = open("%s/%s/%s/SN" % (PENDIR, doctype, RN),"r")
sysno = fp.read()
fp.close()
else:
sysno = ""
if title == "" and os.path.exists("%s/%s/%s/TIF" % (PENDIR, doctype, RN)):
fp = open("%s/%s/%s/TIF" % (PENDIR, doctype, RN),"r")
title = fp.read()
fp.close()
if title == "":
return 0
else:
return (authors, title, sysno,"")
#seek info in Alice database
def getInAlice(doctype, categ, RN):
"""FIXME: DEPRECATED!"""
# initialize sysno variable
sysno = ""
searchresults = list(search_pattern(req=None, p=RN, f="reportnumber"))
if len(searchresults) == 0:
return 0
sysno = searchresults[0]
if sysno != "":
title = Get_Field('245__a', sysno)
emailvalue = Get_Field('8560_f', sysno)
authors = Get_Field('100__a', sysno)
authors += "\n%s" % Get_Field('700__a', sysno)
newrn = Get_Field('037__a', sysno)
return (authors, title, sysno, newrn)
else:
return 0
def SendEnglish(doctype, categ, RN, title, authors, access, sysno):
FROMADDR = '%s Submission Engine <%s>' % (CFG_SITE_NAME, CFG_SITE_SUPPORT_EMAIL)
# retrieve useful information from webSubmit configuration
res = run_sql("select value from sbmPARAMETERS where name='categformatDAM' and doctype=%s", (doctype,))
categformat = res[0][0]
categformat = re.sub("<CATEG>", "([^-]*)", categformat)
categs = re.match(categformat, RN)
if categs is not None:
categ = categs.group(1)
else:
categ = "unknown"
res = run_sql("select value from sbmPARAMETERS where name='addressesDAM' and doctype=%s",(doctype,))
if len(res) > 0:
otheraddresses = res[0][0]
otheraddresses = otheraddresses.replace("<CATEG>", categ)
else:
otheraddresses = ""
# Build referee's email address
refereeaddress = ""
# Try to retrieve the referee's email from the referee's database
for user in acc_get_role_users(acc_get_role_id("referee_%s_%s" % (doctype, categ))):
refereeaddress += user[1] + ","
# And if there are general referees
for user in acc_get_role_users(acc_get_role_id("referee_%s_*" % doctype)):
refereeaddress += user[1] + ","
refereeaddress = re.sub(",$", "", refereeaddress)
# Creation of the mail for the referee
addresses = ""
if refereeaddress != "":
addresses = refereeaddress + ","
if otheraddresses != "":
addresses += otheraddresses
else:
addresses = re.sub(",$", "", addresses)
if addresses == "":
SendWarning(doctype, categ, RN, title, authors, access)
return 0
if authors == "":
authors = "-"
res = run_sql("select value from sbmPARAMETERS where name='directory' and doctype=%s", (doctype,))
directory = res[0][0]
message = """
The document %s has been published as a Communication.
Your approval is requested for it to become an official Note.
Title: %s
Author(s): %s
To access the document(s), select the file(s) from the location:
<%s/%s/%s/files/>
To approve/reject the document, you should go to this URL:
<%s/approve.py?access=%s>
---------------------------------------------
Best regards.
The submission team.""" % (RN, title, authors, CFG_SITE_URL, CFG_SITE_RECORD, sysno, CFG_SITE_URL, access)
# send the mail
send_email(FROMADDR, addresses,"Request for Approval of %s" % RN, message, footer="")
return ""
def send_approval(doctype, categ, rn, title, authors, access, sysno):
fromaddr = '%s Submission Engine <%s>' % (CFG_SITE_NAME, CFG_SITE_SUPPORT_EMAIL)
if not categ:
categ = "nocategory"
if not doctype:
doctype = "nodoctype"
addresses = acc_get_authorized_emails('referee', categ=categ, doctype=doctype)
if not addresses:
return SendWarning(doctype, categ, rn, title, authors, access)
if not authors:
authors = "-"
message = """
The document %s has been published as a Communication.
Your approval is requested for it to become an official Note.
Title: %s
Author(s): %s
To access the document(s), select the file(s) from the location:
<%s/record/%s/files/>
As a referee for this document, you may approve or reject it
from the submission interface:
<%s/submit?doctype=%s>
---------------------------------------------
Best regards.
The submission team.""" % (rn, title, authors, CFG_SITE_URL, sysno, CFG_SITE_URL, doctype)
# send the mail
return send_email(fromaddr, ', '.join(addresses), "Request for Approval of %s" % rn, message, footer="")
def SendWarning(doctype, categ, RN, title, authors, access):
FROMADDR = '%s Submission Engine <%s>' % (CFG_SITE_NAME, CFG_SITE_SUPPORT_EMAIL)
message = "Failed sending approval email request for %s" % RN
# send the mail
send_email(FROMADDR, CFG_SITE_ADMIN_EMAIL, "Failed sending approval email request", message)
return ""
def sendMailToReferee(doctype, categ, RN, email, authors):
item_details = get_brief_doc_details_from_repository(RN)
## get_brief_doc_details_from_repository has returned either None
## or a dictionary with the following structure:
## { 'title' : '-', ## String - the item's title
## 'recid' : '', ## String - recid
## 'report-number' : '', ## String - the item's report number
## 'authors' : [], ## List - the item's authors
## }
FROMADDR = '%s Submission Engine <%s>' % (CFG_SITE_NAME, CFG_SITE_SUPPORT_EMAIL)
message = """
Scientific Note approval for document %s has been submitted to the CERN Document Server.
Your recommendation is requested on it.
Requested subcategory: %s
Title: %s
Author(s): %s
To access the document(s), select the file(s) from the location:
<%s/%s/%s>
To make a reccommendation, you should go to this URL:
<%s>
You can also check the status of the document:
<%s>
---------------------------------------------
Best regards.
The submission team.""" % (str(RN),
str(categ),
str(item_details['title']),
authors,
CFG_SITE_URL,
CFG_SITE_URL,
str(item_details['recid']),
str(CFG_SITE_URL + "/publiline.py?flow=cplx&doctype="+doctype+"&ln=en&apptype=RRP&categ="+categ+"&RN="+RN+"&action=RefereeRecom"),
str(CFG_SITE_URL + "/publiline.py?flow=cplx&doctype="+doctype+"&ln=en&apptype=RRP&categ="+categ+"&RN="+RN))
# send the mail
send_email(FROMADDR, email,"Request for document %s recommendation" % (RN), message)
return ""
def sendMailToGroup(doctype, categ, RN, group_id, authors):
item_details = get_brief_doc_details_from_repository(RN)
## get_brief_doc_details_from_repository has returned either None
## or a dictionary with the following structure:
## { 'title' : '-', ## String - the item's title
## 'recid' : '', ## String - recid
## 'report-number' : '', ## String - the item's report number
## 'authors' : [], ## List - the item's authors
## }
FROMADDR = '%s Submission Engine <%s>' % (CFG_SITE_NAME, CFG_SITE_SUPPORT_EMAIL)
message = """
Scientific Note approval for document %s has been submitted to the CERN Document Server.
Your comments are requested on this document.
Requested subcategory: %s
Title: %s
Author(s): %s
To access the document(s), select the file(s) from the location:
<%s/%s/%s>
To leave a comment or check the status of the approval process, you should go to this URL:
<%s>
""" % (str(RN),
str(categ),
str(item_details['title']),
authors,
CFG_SITE_URL,
CFG_SITE_RECORD,
str(item_details['recid']),
str(CFG_SITE_URL + "/publiline.py?flow=cplx&doctype="+doctype+"&ln=en&apptype=RRP&categ="+categ+"&RN="+RN))
# send mails to all members of the ATLAS group
group_member_ids = run_sql("SELECT id_user FROM user_usergroup WHERE id_usergroup = '%s'" % (group_id))
for member_id in group_member_ids:
member_email = run_sql("SELECT email FROM user WHERE id = '%s'" % (member_id))
if not member_email[0][0] == "info@invenio-software.org":
send_email(FROMADDR, member_email[0][0],"Request for comment on document %s" % (RN), message)
return ""
def sendMailToProjectLeader(doctype, categ, RN, email, authors, actor, recommendation):
item_details = get_brief_doc_details_from_repository(RN)
## get_brief_doc_details_from_repository has returned either None
## or a dictionary with the following structure:
## { 'title' : '-', ## String - the item's title
## 'recid' : '', ## String - recid
## 'report-number' : '', ## String - the item's report number
## 'authors' : [], ## List - the item's authors
## }
FROMADDR = '%s Submission Engine <%s>' % (CFG_SITE_NAME, CFG_SITE_SUPPORT_EMAIL)
message = """
Scientific Note approval for document %s has been submitted to the CERN Document Server.
Your approval is requested for this document. Once you have received recommendations from both the referee and the publication committee chair, you will be able to make your decision.
Requested subcategory: %s
Title: %s
Author(s): %s
To access the document(s), select the file(s) from the location:
<%s/%s/%s>
The %s has made a recommendation for the document. He/she said the following:
%s
You can approve this document by visiting this page:
<%s>
You can also check the status of the document from:
<%s>
""" % (str(RN),
str(categ),
str(item_details['title']),
authors,
CFG_SITE_URL,
CFG_SITE_RECORD,
str(item_details['recid']),
actor,
recommendation,
str(CFG_SITE_URL + "/publiline.py?flow=cplx&doctype="+doctype+"&ln=en&apptype=RRP&categ="+categ+"&RN="+RN+"&action=ProjectLeaderDecision"),
str(CFG_SITE_URL + "/publiline.py?flow=cplx&doctype="+doctype+"&ln=en&apptype=RRP&categ="+categ+"&RN="+RN))
# send mails to all members of the ATLAS group
send_email(FROMADDR, email,"Request for approval/rejection of document %s" % (RN), message)
return ""
def sendMailToSubmitter(doctype, categ, RN, outcome):
item_details = get_brief_doc_details_from_repository(RN)
## get_brief_doc_details_from_repository has returned either None
## or a dictionary with the following structure:
## { 'title' : '-', ## String - the item's title
## 'recid' : '', ## String - recid
## 'report-number' : '', ## String - the item's report number
## 'authors' : [], ## List - the item's authors
## }
FROMADDR = '%s Submission Engine <%s>' % (CFG_SITE_NAME, CFG_SITE_SUPPORT_EMAIL)
message = """
The approval process for your document : %s, has been completed. The details of this document are as follows:
Requested subcategory: %s
Title: %s
The project leader has made the following recommendation for the document:
%s
""" % (RN, categ, item_details['title'], outcome)
# send mails to all members of the ATLAS group
send_email(FROMADDR, item_details['email'],"Final outcome for approval of document : %s" % (RN), message)
return ""
def sendMailtoCommitteeChair(doctype, categ, RN, email, authors):
item_details = get_brief_doc_details_from_repository(RN)
## get_brief_doc_details_from_repository has returned either None
## or a dictionary with the following structure:
## { 'title' : '-', ## String - the item's title
## 'recid' : '', ## String - recid
## 'report-number' : '', ## String - the item's report number
## 'authors' : [], ## List - the item's authors
## }
FROMADDR = '%s Submission Engine <%s>' % (CFG_SITE_NAME, CFG_SITE_SUPPORT_EMAIL)
message = """
The referree assigned to the document detailed below has made a reccommendation. You are now requested to make a reccommendation of your own.
Requested subcategory: %s
Title: %s
Author(s): %s
To access the document(s), select the file(s) from the location:
<%s/%s/%s>
You can make a reccommendation by visiting this page:
<%s>
""" % (str(categ),
str(item_details['title']),
authors,
CFG_SITE_URL,
CFG_SITE_RECORD,
str(item_details['recid']),
str(CFG_SITE_URL + "/publiline.py?flow=cplx&doctype="+doctype+"&ln=en&apptype=RRP&categ="+categ+"&RN="+RN))
# send mails to all members of the ATLAS group
send_email(FROMADDR, email,"Request for reccommendation of document %s" % (RN), message)
diff --git a/invenio/legacy/websubmit/webinterface.py b/invenio/legacy/websubmit/webinterface.py
index b1b9906ee..a6255c16f 100644
--- a/invenio/legacy/websubmit/webinterface.py
+++ b/invenio/legacy/websubmit/webinterface.py
@@ -1,929 +1,929 @@
## This file is part of Invenio.
## Copyright (C) 2006, 2007, 2008, 2009, 2010, 2011, 2012 CERN.
##
## Invenio is free software; you can redistribute it and/or
## modify it under the terms of the GNU General Public License as
## published by the Free Software Foundation; either version 2 of the
## License, or (at your option) any later version.
##
## Invenio is distributed in the hope that it will be useful, but
## WITHOUT ANY WARRANTY; without even the implied warranty of
## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
## General Public License for more details.
##
## You should have received a copy of the GNU General Public License
## along with Invenio; if not, write to the Free Software Foundation, Inc.,
## 59 Temple Place, Suite 330, Boston, MA 02111-1307, USA.
__lastupdated__ = """$Date$"""
__revision__ = "$Id$"
import os
import errno
import time
import cgi
import sys
import shutil
from urllib import urlencode
from invenio.config import \
CFG_ACCESS_CONTROL_LEVEL_SITE, \
CFG_SITE_LANG, \
CFG_SITE_NAME, \
CFG_SITE_URL, \
CFG_SITE_SECURE_URL, \
CFG_WEBSUBMIT_STORAGEDIR, \
CFG_PREFIX, \
CFG_CERN_SITE
from invenio.utils import apache
from invenio.dbquery import run_sql
from invenio.access_control_engine import acc_authorize_action
from invenio.access_control_admin import acc_is_role
from invenio.webpage import warning_page
from invenio.webuser import getUid, page_not_authorized, collect_user_info, \
isGuestUser
from invenio.webinterface_handler import wash_urlargd, WebInterfaceDirectory
from invenio.utils.url import make_canonical_urlargd, redirect_to_url
from invenio.base.i18n import gettext_set_language
from invenio.bibdocfile import stream_file, \
decompose_file, propose_next_docname
-from invenio.errorlib import register_exception
+from invenio.ext.logging import register_exception
from invenio.htmlutils import is_html_text_editor_installed
from invenio.websubmit_icon_creator import create_icon, InvenioWebSubmitIconCreatorError
from invenio.ckeditor_invenio_connector import process_CKEditor_upload, send_response
import invenio.template
websubmit_templates = invenio.template.load('websubmit')
from invenio.websearchadminlib import get_detailed_page_tabs
from invenio.utils.json import json, CFG_JSON_AVAILABLE
import invenio.template
from flask import session
webstyle_templates = invenio.template.load('webstyle')
websearch_templates = invenio.template.load('websearch')
from invenio.legacy.websubmit.engine import home, action, interface, endaction, makeCataloguesTable
class WebInterfaceSubmitPages(WebInterfaceDirectory):
_exports = ['summary', 'sub', 'direct', '', 'attachfile', 'uploadfile', \
'getuploadedfile', 'upload_video', ('continue', 'continue_')]
def uploadfile(self, req, form):
"""
Similar to /submit, but only consider files. Nice for
asynchronous Javascript uploads. Should be used to upload a
single file.
Also try to create an icon, and return URL to file(s) + icon(s)
Authentication is performed based on session ID passed as
parameter instead of cookie-based authentication, due to the
use of this URL by the Flash plugin (to upload multiple files
at once), which does not route cookies.
FIXME: consider adding /deletefile and /modifyfile functions +
parsing of additional parameters to rename files, add
comments, restrictions, etc.
"""
argd = wash_urlargd(form, {
'doctype': (str, ''),
'access': (str, ''),
'indir': (str, ''),
'session_id': (str, ''),
'rename': (str, ''),
})
curdir = None
if not form.has_key("indir") or \
not form.has_key("doctype") or \
not form.has_key("access"):
raise apache.SERVER_RETURN(apache.HTTP_BAD_REQUEST)
else:
curdir = os.path.join(CFG_WEBSUBMIT_STORAGEDIR,
argd['indir'],
argd['doctype'],
argd['access'])
user_info = collect_user_info(req)
if form.has_key("session_id"):
# Are we uploading using Flash, which does not transmit
# cookie? The expect to receive session_id as a form
# parameter. First check that IP addresses do not
# mismatch.
uid = session.uid
user_info = collect_user_info(uid)
try:
act_fd = file(os.path.join(curdir, 'act'))
action = act_fd.read()
act_fd.close()
except:
action = ""
# Is user authorized to perform this action?
(auth_code, auth_message) = acc_authorize_action(uid, "submit",
authorized_if_no_roles=not isGuestUser(uid),
verbose=0,
doctype=argd['doctype'],
act=action)
if acc_is_role("submit", doctype=argd['doctype'], act=action) and auth_code != 0:
# User cannot submit
raise apache.SERVER_RETURN(apache.HTTP_UNAUTHORIZED)
else:
# Process the upload and get the response
added_files = {}
for key, formfields in form.items():
filename = key.replace("[]", "")
file_to_open = os.path.join(curdir, filename)
if hasattr(formfields, "filename") and formfields.filename:
dir_to_open = os.path.abspath(os.path.join(curdir,
'files',
str(user_info['uid']),
key))
try:
assert(dir_to_open.startswith(CFG_WEBSUBMIT_STORAGEDIR))
except AssertionError:
register_exception(req=req, prefix='curdir="%s", key="%s"' % (curdir, key))
raise apache.SERVER_RETURN(apache.HTTP_FORBIDDEN)
if not os.path.exists(dir_to_open):
try:
os.makedirs(dir_to_open)
except OSError, e:
if e.errno != errno.EEXIST:
# If the issue is only that directory
# already exists, then continue, else
# report
register_exception(req=req, alert_admin=True)
raise apache.SERVER_RETURN(apache.HTTP_FORBIDDEN)
filename = formfields.filename
## Before saving the file to disc, wash the filename (in particular
## washing away UNIX and Windows (e.g. DFS) paths):
filename = os.path.basename(filename.split('\\')[-1])
filename = filename.strip()
if filename != "":
# Check that file does not already exist
n = 1
while os.path.exists(os.path.join(dir_to_open, filename)):
#dirname, basename, extension = decompose_file(new_destination_path)
basedir, name, extension = decompose_file(filename)
new_name = propose_next_docname(name)
filename = new_name + extension
# This may be dangerous if the file size is bigger than the available memory
fp = open(os.path.join(dir_to_open, filename), "w")
fp.write(formfields.file.read())
fp.close()
fp = open(os.path.join(curdir, "lastuploadedfile"), "w")
fp.write(filename)
fp.close()
fp = open(file_to_open, "w")
fp.write(filename)
fp.close()
try:
# Create icon
(icon_path, icon_name) = create_icon(
{ 'input-file' : os.path.join(dir_to_open, filename),
'icon-name' : filename, # extension stripped automatically
'icon-file-format' : 'gif',
'multipage-icon' : False,
'multipage-icon-delay' : 100,
'icon-scale' : "300>", # Resize only if width > 300
'verbosity' : 0,
})
icons_dir = os.path.join(os.path.join(curdir,
'icons',
str(user_info['uid']),
key))
if not os.path.exists(icons_dir):
# Create uid/icons dir if needed
try:
os.makedirs(icons_dir)
except OSError, e:
if e.errno != errno.EEXIST:
# If the issue is only that
# directory already exists,
# then continue, else report
register_exception(req=req, alert_admin=True)
raise apache.SERVER_RETURN(apache.HTTP_FORBIDDEN)
os.rename(os.path.join(icon_path, icon_name),
os.path.join(icons_dir, icon_name))
added_files[key] = {'name': filename,
'iconName': icon_name}
except InvenioWebSubmitIconCreatorError, e:
# We could not create the icon
added_files[key] = {'name': filename}
continue
else:
raise apache.SERVER_RETURN(apache.HTTP_BAD_REQUEST)
# Send our response
if CFG_JSON_AVAILABLE:
return json.dumps(added_files)
def upload_video(self, req, form):
"""
A clone of uploadfile but for (large) videos.
Does not copy the uploaded file to the websubmit directory.
Instead, the path to the file is stored inside the submission directory.
"""
def gcd(a, b):
""" the euclidean algorithm """
while a:
a, b = b % a, a
return b
from invenio.modules.encoder.extract import extract_frames
from invenio.modules.encoder.config import CFG_BIBENCODE_WEBSUBMIT_ASPECT_SAMPLE_DIR, CFG_BIBENCODE_WEBSUBMIT_ASPECT_SAMPLE_FNAME
from invenio.modules.encoder.encode import determine_aspect
from invenio.modules.encoder.utils import probe
from invenio.modules.encoder.metadata import ffprobe_metadata
from invenio.legacy.websubmit.config import CFG_WEBSUBMIT_TMP_VIDEO_PREFIX
argd = wash_urlargd(form, {
'doctype': (str, ''),
'access': (str, ''),
'indir': (str, ''),
'session_id': (str, ''),
'rename': (str, ''),
})
curdir = None
if not form.has_key("indir") or \
not form.has_key("doctype") or \
not form.has_key("access"):
raise apache.SERVER_RETURN(apache.HTTP_BAD_REQUEST)
else:
curdir = os.path.join(CFG_WEBSUBMIT_STORAGEDIR,
argd['indir'],
argd['doctype'],
argd['access'])
user_info = collect_user_info(req)
if form.has_key("session_id"):
# Are we uploading using Flash, which does not transmit
# cookie? The expect to receive session_id as a form
# parameter. First check that IP addresses do not
# mismatch.
uid = session.uid
user_info = collect_user_info(uid)
try:
act_fd = file(os.path.join(curdir, 'act'))
action = act_fd.read()
act_fd.close()
except:
act = ""
# Is user authorized to perform this action?
(auth_code, auth_message) = acc_authorize_action(uid, "submit",
authorized_if_no_roles=not isGuestUser(uid),
verbose=0,
doctype=argd['doctype'],
act=action)
if acc_is_role("submit", doctype=argd['doctype'], act=action) and auth_code != 0:
# User cannot submit
raise apache.SERVER_RETURN(apache.HTTP_UNAUTHORIZED)
else:
# Process the upload and get the response
json_response = {}
for key, formfields in form.items():
filename = key.replace("[]", "")
if hasattr(formfields, "filename") and formfields.filename:
dir_to_open = os.path.abspath(os.path.join(curdir,
'files',
str(user_info['uid']),
key))
try:
assert(dir_to_open.startswith(CFG_WEBSUBMIT_STORAGEDIR))
except AssertionError:
register_exception(req=req, prefix='curdir="%s", key="%s"' % (curdir, key))
raise apache.SERVER_RETURN(apache.HTTP_FORBIDDEN)
if not os.path.exists(dir_to_open):
try:
os.makedirs(dir_to_open)
except OSError, e:
if e.errno != errno.EEXIST:
# If the issue is only that directory
# already exists, then continue, else
# report
register_exception(req=req, alert_admin=True)
raise apache.SERVER_RETURN(apache.HTTP_FORBIDDEN)
filename = formfields.filename
## Before saving the file to disc, wash the filename (in particular
## washing away UNIX and Windows (e.g. DFS) paths):
filename = os.path.basename(filename.split('\\')[-1])
filename = filename.strip()
if filename != "":
# Check that file does not already exist
while os.path.exists(os.path.join(dir_to_open, filename)):
#dirname, basename, extension = decompose_file(new_destination_path)
basedir, name, extension = decompose_file(filename)
new_name = propose_next_docname(name)
filename = new_name + extension
#-------------#
# VIDEO STUFF #
#-------------#
## Remove all previous uploads
filelist = os.listdir(os.path.split(formfields.file.name)[0])
for afile in filelist:
if argd['access'] in afile:
os.remove(os.path.join(os.path.split(formfields.file.name)[0], afile))
## Check if the file is a readable video
## We must exclude all image and audio formats that are readable by ffprobe
if (os.path.splitext(filename)[1] in ['jpg', 'jpeg', 'gif', 'tiff', 'bmp', 'png', 'tga',
'jp2', 'j2k', 'jpf', 'jpm', 'mj2', 'biff', 'cgm',
'exif', 'img', 'mng', 'pic', 'pict', 'raw', 'wmf', 'jpe', 'jif',
'jfif', 'jfi', 'tif', 'webp', 'svg', 'ai', 'ps', 'psd',
'wav', 'mp3', 'pcm', 'aiff', 'au', 'flac', 'wma', 'm4a', 'wv', 'oga',
'm4a', 'm4b', 'm4p', 'm4r', 'aac', 'mp4', 'vox', 'amr', 'snd']
or not probe(formfields.file.name)):
formfields.file.close()
raise apache.SERVER_RETURN(apache.HTTP_FORBIDDEN)
## We have no "delete" attribute in Python 2.4
if sys.hexversion < 0x2050000:
## We need to rename first and create a dummy file
## Rename the temporary file for the garbage collector
new_tmp_fullpath = os.path.split(formfields.file.name)[0] + "/" + CFG_WEBSUBMIT_TMP_VIDEO_PREFIX + argd['access'] + "_" + os.path.split(formfields.file.name)[1]
os.rename(formfields.file.name, new_tmp_fullpath)
dummy = open(formfields.file.name, "w")
dummy.close()
formfields.file.close()
else:
# Mark the NamedTemporatyFile as not to be deleted
formfields.file.delete = False
formfields.file.close()
## Rename the temporary file for the garbage collector
new_tmp_fullpath = os.path.split(formfields.file.name)[0] + "/" + CFG_WEBSUBMIT_TMP_VIDEO_PREFIX + argd['access'] + "_" + os.path.split(formfields.file.name)[1]
os.rename(formfields.file.name, new_tmp_fullpath)
# Write the path to the temp file to a file in STORAGEDIR
fp = open(os.path.join(dir_to_open, "filepath"), "w")
fp.write(new_tmp_fullpath)
fp.close()
fp = open(os.path.join(dir_to_open, "filename"), "w")
fp.write(filename)
fp.close()
## We are going to extract some thumbnails for websubmit ##
sample_dir = os.path.join(curdir, 'files', str(user_info['uid']), CFG_BIBENCODE_WEBSUBMIT_ASPECT_SAMPLE_DIR)
try:
## Remove old thumbnails
shutil.rmtree(sample_dir)
except OSError:
register_exception(req=req, alert_admin=False)
try:
os.makedirs(os.path.join(curdir, 'files', str(user_info['uid']), sample_dir))
except OSError:
register_exception(req=req, alert_admin=False)
try:
extract_frames(input_file=new_tmp_fullpath,
output_file=os.path.join(sample_dir, CFG_BIBENCODE_WEBSUBMIT_ASPECT_SAMPLE_FNAME),
size="600x600",
numberof=5)
json_response['frames'] = []
for extracted_frame in os.listdir(sample_dir):
json_response['frames'].append(extracted_frame)
except:
## If the frame extraction fails, something was bad with the video
os.remove(new_tmp_fullpath)
register_exception(req=req, alert_admin=False)
raise apache.SERVER_RETURN(apache.HTTP_FORBIDDEN)
## Try to detect the aspect. if this fails, the video is not readable
## or a wrong file might have been uploaded
try:
(aspect, width, height) = determine_aspect(new_tmp_fullpath)
if aspect:
aspx, aspy = aspect.split(':')
else:
the_gcd = gcd(width, height)
aspx = str(width / the_gcd)
aspy = str(height / the_gcd)
json_response['aspx'] = aspx
json_response['aspy'] = aspy
except TypeError:
## If the aspect detection completely fails
os.remove(new_tmp_fullpath)
register_exception(req=req, alert_admin=False)
raise apache.SERVER_RETURN(apache.HTTP_FORBIDDEN)
## Try to extract some metadata from the video container
metadata = ffprobe_metadata(new_tmp_fullpath)
json_response['meta_title'] = metadata['format'].get('TAG:title')
json_response['meta_description'] = metadata['format'].get('TAG:description')
json_response['meta_year'] = metadata['format'].get('TAG:year')
json_response['meta_author'] = metadata['format'].get('TAG:author')
## Empty file name
else:
raise apache.SERVER_RETURN(apache.HTTP_BAD_REQUEST)
## We found our file, we can break the loop
break;
# Send our response
if CFG_JSON_AVAILABLE:
dumped_response = json.dumps(json_response)
# store the response in the websubmit directory
# this is needed if the submission is not finished and continued later
response_dir = os.path.join(curdir, 'files', str(user_info['uid']), "response")
try:
os.makedirs(response_dir)
except OSError:
# register_exception(req=req, alert_admin=False)
pass
fp = open(os.path.join(response_dir, "response"), "w")
fp.write(dumped_response)
fp.close()
return dumped_response
def getuploadedfile(self, req, form):
"""
Stream uploaded files.
For the moment, restrict to files in ./curdir/files/uid or
./curdir/icons/uid directory, so that we are sure we stream
files only to the user who uploaded them.
"""
argd = wash_urlargd(form, {'indir': (str, None),
'doctype': (str, None),
'access': (str, None),
'icon': (int, 0),
'key': (str, None),
'filename': (str, None),
'nowait': (int, 0)})
if None in argd.values():
raise apache.SERVER_RETURN(apache.HTTP_BAD_REQUEST)
uid = getUid(req)
if argd['icon']:
file_path = os.path.join(CFG_WEBSUBMIT_STORAGEDIR,
argd['indir'],
argd['doctype'],
argd['access'],
'icons',
str(uid),
argd['key'],
argd['filename']
)
else:
file_path = os.path.join(CFG_WEBSUBMIT_STORAGEDIR,
argd['indir'],
argd['doctype'],
argd['access'],
'files',
str(uid),
argd['key'],
argd['filename']
)
abs_file_path = os.path.abspath(file_path)
if abs_file_path.startswith(CFG_WEBSUBMIT_STORAGEDIR):
# Check if file exist. Note that icon might not yet have
# been created.
if not argd['nowait']:
for i in range(5):
if os.path.exists(abs_file_path):
return stream_file(req, abs_file_path)
time.sleep(1)
else:
if os.path.exists(abs_file_path):
return stream_file(req, abs_file_path)
# Send error 404 in all other cases
raise apache.SERVER_RETURN(apache.HTTP_NOT_FOUND)
def attachfile(self, req, form):
"""
Process requests received from CKEditor to upload files.
If the uploaded file is an image, create an icon version
"""
if not is_html_text_editor_installed():
return apache.HTTP_NOT_FOUND
if not form.has_key('type'):
form['type'] = 'File'
if not form.has_key('upload') or \
not form['type'] in \
['File', 'Image', 'Flash', 'Media']:
#return apache.HTTP_NOT_FOUND
pass
filetype = form['type'].lower()
uid = getUid(req)
# URL where the file can be fetched after upload
user_files_path = '%(CFG_SITE_URL)s/submit/getattachedfile/%(uid)s' % \
{'uid': uid,
'CFG_SITE_URL': CFG_SITE_URL,
'filetype': filetype}
# Path to directory where uploaded files are saved
user_files_absolute_path = '%(CFG_PREFIX)s/var/tmp/attachfile/%(uid)s/%(filetype)s' % \
{'uid': uid,
'CFG_PREFIX': CFG_PREFIX,
'filetype': filetype}
try:
os.makedirs(user_files_absolute_path)
except:
pass
user_info = collect_user_info(req)
(auth_code, auth_message) = acc_authorize_action(user_info, 'attachsubmissionfile')
msg = ""
if user_info['email'] == 'guest':
# User is guest: must login prior to upload
msg = 'Please login before uploading file.'
elif auth_code:
# User cannot submit
msg = 'Sorry, you are not allowed to submit files.'
## elif len(form['upload']) != 1:
## msg = 'Sorry, you must upload one single file'
else:
# Process the upload and get the response
(msg, uploaded_file_path, uploaded_file_name, uploaded_file_url, callback_function) = \
process_CKEditor_upload(form, uid, user_files_path, user_files_absolute_path)
if uploaded_file_path:
# Create an icon
if form.get('type','') == 'Image':
try:
(icon_path, icon_name) = create_icon(
{ 'input-file' : uploaded_file_path,
'icon-name' : os.path.splitext(uploaded_file_name)[0],
'icon-file-format' : os.path.splitext(uploaded_file_name)[1][1:] or 'gif',
'multipage-icon' : False,
'multipage-icon-delay' : 100,
'icon-scale' : "700>", # Resize only if width > 700
'verbosity' : 0,
})
# Move original file to /original dir, and replace it with icon file
original_user_files_absolute_path = os.path.join(user_files_absolute_path,
'original')
if not os.path.exists(original_user_files_absolute_path):
# Create /original dir if needed
os.mkdir(original_user_files_absolute_path)
os.rename(uploaded_file_path,
original_user_files_absolute_path + os.sep + uploaded_file_name)
os.rename(icon_path + os.sep + icon_name,
uploaded_file_path)
except InvenioWebSubmitIconCreatorError, e:
pass
user_files_path += '/' + filetype + '/' + uploaded_file_name
else:
user_files_path = ''
if not msg:
msg = 'No valid file found'
# Send our response
send_response(req, msg, user_files_path, callback_function)
def _lookup(self, component, path):
""" This handler is invoked for the dynamic URLs (for getting
and putting attachments) Eg:
/submit/getattachedfile/41336978/image/myfigure.png
/submit/attachfile/41336978/image/myfigure.png
"""
if component == 'getattachedfile' and len(path) > 2:
uid = path[0] # uid of the submitter
file_type = path[1] # file, image, flash or media (as
# defined by CKEditor)
if file_type in ['file', 'image', 'flash', 'media']:
file_name = '/'.join(path[2:]) # the filename
def answer_get(req, form):
"""Accessing files attached to submission."""
form['file'] = file_name
form['type'] = file_type
form['uid'] = uid
return self.getattachedfile(req, form)
return answer_get, []
# All other cases: file not found
return None, []
def getattachedfile(self, req, form):
"""
Returns a file uploaded to the submission 'drop box' by the
CKEditor.
"""
argd = wash_urlargd(form, {'file': (str, None),
'type': (str, None),
'uid': (int, 0)})
# Can user view this record, i.e. can user access its
# attachments?
uid = getUid(req)
user_info = collect_user_info(req)
if not argd['file'] is None:
# Prepare path to file on disk. Normalize the path so that
# ../ and other dangerous components are removed.
path = os.path.abspath(CFG_PREFIX + '/var/tmp/attachfile/' + \
'/' + str(argd['uid']) + \
'/' + argd['type'] + '/' + argd['file'])
# Check that we are really accessing attachements
# directory, for the declared record.
if path.startswith(CFG_PREFIX + '/var/tmp/attachfile/') and os.path.exists(path):
return stream_file(req, path)
# Send error 404 in all other cases
return(apache.HTTP_NOT_FOUND)
def continue_(self, req, form):
"""
Continue an interrupted submission.
"""
args = wash_urlargd(form, {'access': (str, ''), 'doctype': (str, '')})
ln = args['ln']
_ = gettext_set_language(ln)
access = args['access']
doctype = args['doctype']
if not access or not doctype:
return warning_page(_("Sorry, invalid arguments"), req=req, ln=ln)
user_info = collect_user_info(req)
email = user_info['email']
res = run_sql("SELECT action, status FROM sbmSUBMISSIONS WHERE id=%s AND email=%s and doctype=%s", (access, email, doctype))
if res:
action, status = res[0]
if status == 'finished':
return warning_page(_("Note: the requested submission has already been completed"), req=req, ln=ln)
redirect_to_url(req, CFG_SITE_SECURE_URL + '/submit/direct?' + urlencode({
'sub': action + doctype,
'access': access}))
return warning_page(_("Sorry, you don't seem to have initiated a submission with the provided access number"), req=req, ln=ln)
def direct(self, req, form):
"""Directly redirected to an initialized submission."""
args = wash_urlargd(form, {'sub': (str, ''),
'access' : (str, '')})
sub = args['sub']
access = args['access']
ln = args['ln']
_ = gettext_set_language(ln)
uid = getUid(req)
if uid == -1 or CFG_ACCESS_CONTROL_LEVEL_SITE >= 1:
return page_not_authorized(req, "direct",
navmenuid='submit')
myQuery = req.args
if not sub:
return warning_page(_("Sorry, 'sub' parameter missing..."), req, ln=ln)
res = run_sql("SELECT docname,actname FROM sbmIMPLEMENT WHERE subname=%s", (sub,))
if not res:
return warning_page(_("Sorry. Cannot analyse parameter"), req, ln=ln)
else:
# get document type
doctype = res[0][0]
# get action name
action = res[0][1]
# retrieve other parameter values
params = dict(form)
# find existing access number
if not access:
# create 'unique' access number
pid = os.getpid()
now = time.time()
access = "%i_%s" % (now, pid)
# retrieve 'dir' value
res = run_sql ("SELECT dir FROM sbmACTION WHERE sactname=%s", (action,))
dir = res[0][0]
mainmenu = req.headers_in.get('referer')
params['access'] = access
params['act'] = action
params['doctype'] = doctype
params['startPg'] = '1'
params['mainmenu'] = mainmenu
params['ln'] = ln
params['indir'] = dir
url = "%s/submit?%s" % (CFG_SITE_SECURE_URL, urlencode(params))
redirect_to_url(req, url)
def sub(self, req, form):
"""DEPRECATED: /submit/sub is deprecated now, so raise email to the admin (but allow submission to continue anyway)"""
args = wash_urlargd(form, {'password': (str, '')})
uid = getUid(req)
if uid == -1 or CFG_ACCESS_CONTROL_LEVEL_SITE >= 1:
return page_not_authorized(req, "../sub/",
navmenuid='submit')
try:
raise DeprecationWarning, 'submit/sub handler has been used. Please use submit/direct. e.g. "submit/sub?RN=123@SBIFOO" -> "submit/direct?RN=123&sub=SBIFOO"'
except DeprecationWarning:
register_exception(req=req, alert_admin=True)
ln = args['ln']
_ = gettext_set_language(ln)
#DEMOBOO_RN=DEMO-BOOK-2008-001&ln=en&password=1223993532.26572%40APPDEMOBOO
params = dict(form)
password = args['password']
if password:
del params['password']
if "@" in password:
params['access'], params['sub'] = password.split('@', 1)
else:
params['sub'] = password
else:
args = str(req.args).split('@')
if len(args) > 1:
params = {'sub' : args[-1]}
args = '@'.join(args[:-1])
params.update(cgi.parse_qs(args))
else:
return warning_page(_("Sorry, invalid URL..."), req, ln=ln)
url = "%s/submit/direct?%s" % (CFG_SITE_SECURE_URL, urlencode(params, doseq=True))
redirect_to_url(req, url)
def summary(self, req, form):
args = wash_urlargd(form, {
'doctype': (str, ''),
'act': (str, ''),
'access': (str, ''),
'indir': (str, '')})
ln = args['ln']
uid = getUid(req)
if uid == -1 or CFG_ACCESS_CONTROL_LEVEL_SITE >= 1:
return page_not_authorized(req, "../summary",
navmenuid='submit')
t = ""
curdir = os.path.join(CFG_WEBSUBMIT_STORAGEDIR, args['indir'], args['doctype'], args['access'])
try:
assert(curdir == os.path.abspath(curdir))
except AssertionError:
register_exception(req=req, alert_admin=True, prefix='Possible cracking tentative: indir="%s", doctype="%s", access="%s"' % (args['indir'], args['doctype'], args['access']))
return warning_page("Invalid parameters", req, ln)
subname = "%s%s" % (args['act'], args['doctype'])
res = run_sql("select sdesc,fidesc,pagenb,level from sbmFIELD where subname=%s "
"order by pagenb,fieldnb", (subname,))
nbFields = 0
values = []
for arr in res:
if arr[0] != "":
val = {
'mandatory' : (arr[3] == 'M'),
'value' : '',
'page' : arr[2],
'name' : arr[0],
}
if os.path.exists(os.path.join(curdir, curdir, arr[1])):
fd = open(os.path.join(curdir, arr[1]),"r")
value = fd.read()
fd.close()
value = value.replace("\n"," ")
value = value.replace("Select:","")
else:
value = ""
val['value'] = value
values.append(val)
return websubmit_templates.tmpl_submit_summary(
ln = args['ln'],
values = values,
)
def index(self, req, form):
args = wash_urlargd(form, {
'c': (str, CFG_SITE_NAME),
'doctype': (str, ''),
'act': (str, ''),
'startPg': (str, "1"),
'access': (str, ''),
'mainmenu': (str, ''),
'fromdir': (str, ''),
'nextPg': (str, ''),
'nbPg': (str, ''),
'curpage': (str, '1'),
'step': (str, '0'),
'mode': (str, 'U'),
})
## Strip whitespace from beginning and end of doctype and action:
args["doctype"] = args["doctype"].strip()
args["act"] = args["act"].strip()
def _index(req, c, ln, doctype, act, startPg, access,
mainmenu, fromdir, nextPg, nbPg, curpage, step,
mode):
auth_args = {}
if doctype:
auth_args['doctype'] = doctype
if act:
auth_args['act'] = act
uid = getUid(req)
if CFG_CERN_SITE:
## HACK BEGIN: this is a hack for CMS and ATLAS draft
user_info = collect_user_info(req)
if doctype == 'CMSPUB' and act == "" and 'cds-admin [CERN]' not in user_info['group'] and not user_info['email'].lower() == 'cds.support@cern.ch':
if isGuestUser(uid):
return redirect_to_url(req, "%s/youraccount/login%s" % (
CFG_SITE_SECURE_URL,
make_canonical_urlargd({'referer' : CFG_SITE_SECURE_URL + req.unparsed_uri, 'ln' : args['ln']}, {}))
, norobot=True)
if 'cms-publication-committee-chair [CERN]' not in user_info['group']:
return page_not_authorized(req, "../submit", text="In order to access this submission interface you need to be member of the CMS Publication Committee Chair.",
navmenuid='submit')
elif doctype == 'ATLPUB' and 'cds-admin [CERN]' not in user_info['group'] and not user_info['email'].lower() == 'cds.support@cern.ch':
if isGuestUser(uid):
return redirect_to_url(req, "%s/youraccount/login%s" % (
CFG_SITE_SECURE_URL,
make_canonical_urlargd({'referer' : CFG_SITE_SECURE_URL + req.unparsed_uri, 'ln' : args['ln']}, {}))
, norobot=True)
if 'atlas-gen [CERN]' not in user_info['group']:
return page_not_authorized(req, "../submit", text="In order to access this submission interface you need to be member of ATLAS.",
navmenuid='submit')
## HACK END
if doctype == "":
catalogues_text, at_least_one_submission_authorized, submission_exists = makeCataloguesTable(req, ln=CFG_SITE_LANG)
if not at_least_one_submission_authorized and submission_exists:
if isGuestUser(uid):
return redirect_to_url(req, "%s/youraccount/login%s" % (
CFG_SITE_SECURE_URL,
make_canonical_urlargd({'referer' : CFG_SITE_SECURE_URL + req.unparsed_uri, 'ln' : args['ln']}, {}))
, norobot=True)
else:
return page_not_authorized(req, "../submit",
uid=uid,
navmenuid='submit')
return home(req, catalogues_text, c, ln)
elif act == "":
return action(req, c, ln, doctype)
elif int(step)==0:
return interface(req, c, ln, doctype, act, startPg, access, mainmenu, fromdir, nextPg, nbPg, curpage)
else:
return endaction(req, c, ln, doctype, act, startPg, access, mainmenu, fromdir, nextPg, nbPg, curpage, step, mode)
return _index(req, **args)
# Answer to both /submit/ and /submit
__call__ = index
## def retrieve_most_recent_attached_file(file_path):
## """
## Retrieve the latest file that has been uploaded with the
## CKEditor. This is the only way to retrieve files that the
## CKEditor has renamed after the upload.
## Eg: 'prefix/image.jpg' was uploaded but did already
## exist. CKEditor silently renamed it to 'prefix/image(1).jpg':
## >>> retrieve_most_recent_attached_file('prefix/image.jpg')
## 'prefix/image(1).jpg'
## """
## (base_path, filename) = os.path.split(file_path)
## base_name = os.path.splitext(filename)[0]
## file_ext = os.path.splitext(filename)[1][1:]
## most_recent_filename = filename
## i = 0
## while True:
## i += 1
## possible_filename = "%s(%d).%s" % \
## (base_name, i, file_ext)
## if os.path.exists(base_path + os.sep + possible_filename):
## most_recent_filename = possible_filename
## else:
## break
## return os.path.join(base_path, most_recent_filename)
diff --git a/invenio/legacy/webuser.py b/invenio/legacy/webuser.py
index 2887b26f2..a6090c9d3 100644
--- a/invenio/legacy/webuser.py
+++ b/invenio/legacy/webuser.py
@@ -1,1409 +1,1409 @@
# -*- coding: utf-8 -*-
##
## This file is part of Invenio.
## Copyright (C) 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011, 2012, 2013 CERN.
##
## Invenio is free software; you can redistribute it and/or
## modify it under the terms of the GNU General Public License as
## published by the Free Software Foundation; either version 2 of the
## License, or (at your option) any later version.
##
## Invenio is distributed in the hope that it will be useful, but
## WITHOUT ANY WARRANTY; without even the implied warranty of
## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
## General Public License for more details.
##
## You should have received a copy of the GNU General Public License
## along with Invenio; if not, write to the Free Software Foundation, Inc.,
## 59 Temple Place, Suite 330, Boston, MA 02111-1307, USA.
"""
This file implements all methods necessary for working with users and
sessions in Invenio. Contains methods for logging/registration
when a user log/register into the system, checking if it is a guest
user or not.
At the same time this presents all the stuff it could need with
sessions managements, working with websession.
It also contains Apache-related user authentication stuff.
"""
__revision__ = "$Id$"
import cgi
import urllib
import urlparse
import socket
import smtplib
import re
import random
import datetime
from socket import gaierror
from flask import Request
from invenio.config import \
CFG_ACCESS_CONTROL_LEVEL_ACCOUNTS, \
CFG_ACCESS_CONTROL_LEVEL_GUESTS, \
CFG_ACCESS_CONTROL_LEVEL_SITE, \
CFG_ACCESS_CONTROL_LIMIT_REGISTRATION_TO_DOMAIN, \
CFG_ACCESS_CONTROL_NOTIFY_ADMIN_ABOUT_NEW_ACCOUNTS, \
CFG_ACCESS_CONTROL_NOTIFY_USER_ABOUT_NEW_ACCOUNT, \
CFG_SITE_ADMIN_EMAIL, \
CFG_SITE_LANG, \
CFG_SITE_NAME, \
CFG_SITE_NAME_INTL, \
CFG_SITE_SUPPORT_EMAIL, \
CFG_SITE_SECURE_URL, \
CFG_SITE_URL, \
CFG_WEBSESSION_DIFFERENTIATE_BETWEEN_GUESTS, \
CFG_WEBSESSION_ADDRESS_ACTIVATION_EXPIRE_IN_DAYS, \
CFG_CERN_SITE, \
CFG_INSPIRE_SITE, \
CFG_BIBAUTHORID_ENABLED, \
CFG_SITE_RECORD
try:
from flask import session
except ImportError:
pass
from invenio.dbquery import run_sql, OperationalError, \
serialize_via_marshal, deserialize_via_marshal
from invenio.access_control_admin import acc_get_role_id, acc_get_action_roles, acc_get_action_id, acc_is_user_in_role, acc_find_possible_activities
from invenio.access_control_mailcookie import mail_cookie_create_mail_activation
from invenio.access_control_firerole import acc_firerole_check_user, load_role_definition
from invenio.access_control_config import SUPERADMINROLE, CFG_EXTERNAL_AUTH_USING_SSO
from invenio.base.i18n import gettext_set_language, wash_languages, wash_language
from invenio.ext.email import send_email
-from invenio.errorlib import register_exception
+from invenio.ext.logging import register_exception
from invenio.webgroup_dblayer import get_groups
from invenio.external_authentication import InvenioWebAccessExternalAuthError
from invenio.access_control_config import CFG_EXTERNAL_AUTHENTICATION, \
CFG_WEBACCESS_MSGS, CFG_WEBACCESS_WARNING_MSGS, CFG_EXTERNAL_AUTH_DEFAULT, \
CFG_TEMP_EMAIL_ADDRESS
from invenio.webuser_config import CFG_WEBUSER_USER_TABLES
import invenio.template
tmpl = invenio.template.load('websession')
re_invalid_nickname = re.compile(""".*[,'@]+.*""")
# pylint: disable=C0301
def createGuestUser():
"""Create a guest user , insert into user null values in all fields
createGuestUser() -> GuestUserID
"""
if CFG_ACCESS_CONTROL_LEVEL_GUESTS == 0:
try:
return run_sql("insert into user (email, note) values ('', '1')")
except OperationalError:
return None
else:
try:
return run_sql("insert into user (email, note) values ('', '0')")
except OperationalError:
return None
def page_not_authorized(req, referer='', uid='', text='', navtrail='', ln=CFG_SITE_LANG,
navmenuid=""):
"""Show error message when user is not authorized to do something.
@param referer: in case the displayed message propose a login link, this
is the url to return to after logging in. If not specified it is guessed
from req.
@param uid: the uid of the user. If not specified it is guessed from req.
@param text: the message to be displayed. If not specified it will be
guessed from the context.
"""
from invenio.webpage import page
_ = gettext_set_language(ln)
if not referer:
referer = req.unparsed_uri
if not CFG_ACCESS_CONTROL_LEVEL_SITE:
title = CFG_WEBACCESS_MSGS[5]
if not uid:
uid = getUid(req)
try:
res = run_sql("SELECT email FROM user WHERE id=%s AND note=1", (uid,))
if res and res[0][0]:
if text:
body = text
else:
body = "%s %s" % (CFG_WEBACCESS_WARNING_MSGS[9] % cgi.escape(res[0][0]),
("%s %s" % (CFG_WEBACCESS_MSGS[0] % urllib.quote(referer), CFG_WEBACCESS_MSGS[1])))
else:
if text:
body = text
else:
if CFG_ACCESS_CONTROL_LEVEL_GUESTS == 1:
body = CFG_WEBACCESS_MSGS[3]
else:
body = CFG_WEBACCESS_WARNING_MSGS[4] + CFG_WEBACCESS_MSGS[2]
except OperationalError, e:
body = _("Database problem") + ': ' + str(e)
elif CFG_ACCESS_CONTROL_LEVEL_SITE == 1:
title = CFG_WEBACCESS_MSGS[8]
body = "%s %s" % (CFG_WEBACCESS_MSGS[7], CFG_WEBACCESS_MSGS[2])
elif CFG_ACCESS_CONTROL_LEVEL_SITE == 2:
title = CFG_WEBACCESS_MSGS[6]
body = "%s %s" % (CFG_WEBACCESS_MSGS[4], CFG_WEBACCESS_MSGS[2])
return page(title=title,
language=ln,
uid=getUid(req),
body=body,
navtrail=navtrail,
req=req,
navmenuid=navmenuid)
def getUid(req):
"""Return user ID taking it from the cookie of the request.
Includes control mechanism for the guest users, inserting in
the database table when need be, raising the cookie back to the
client.
User ID is set to 0 when client refuses cookie or we are in the
read-only site operation mode.
User ID is set to -1 when we are in the permission denied site
operation mode.
getUid(req) -> userId
"""
#if hasattr(req, '_user_info'):
# return req._user_info['_uid']
if CFG_ACCESS_CONTROL_LEVEL_SITE == 1: return 0
if CFG_ACCESS_CONTROL_LEVEL_SITE == 2: return -1
guest = 0
from flask import session
uid = session.uid
if not session.need_https:
if uid == -1: # first time, so create a guest user
if CFG_WEBSESSION_DIFFERENTIATE_BETWEEN_GUESTS:
uid = session['uid'] = createGuestUser()
session.set_remember_me(False)
guest = 1
else:
if CFG_ACCESS_CONTROL_LEVEL_GUESTS == 0:
session['uid'] = 0
session.set_remember_me(False)
return 0
else:
return -1
else:
if not hasattr(req, '_user_info') and 'user_info' in session:
req._user_info = session['user_info']
req._user_info = collect_user_info(req, refresh=True)
if guest == 0:
guest = isGuestUser(uid)
if guest:
if CFG_ACCESS_CONTROL_LEVEL_GUESTS == 0:
return uid
elif CFG_ACCESS_CONTROL_LEVEL_GUESTS >= 1:
return -1
else:
res = run_sql("SELECT note FROM user WHERE id=%s", (uid,))
if CFG_ACCESS_CONTROL_LEVEL_ACCOUNTS == 0:
return uid
elif CFG_ACCESS_CONTROL_LEVEL_ACCOUNTS >= 1 and res and res[0][0] in [1, "1"]:
return uid
else:
return -1
from invenio.ext.login import current_user, login_user, logout_user
getUid = lambda req: current_user.get_id()
def setUid(req, uid, remember_me=False):
"""It sets the userId into the session, and raise the cookie to the client.
"""
if uid > 0:
login_user(uid, remember_me)
else:
logout_user()
return uid
def session_param_del(req, key):
"""
Remove a given key from the session.
"""
del session[key]
def session_param_set(req, key, value):
"""
Set a VALUE for the session param KEY for the current session.
"""
session[key] = value
def session_param_get(req, key, default = None):
"""
Return session parameter value associated with session parameter KEY for the current session.
If the key doesn't exists return the provided default.
"""
return session.get(key, default)
def session_param_list(req):
"""
List all available session parameters.
"""
return session.keys()
def get_last_login(uid):
"""Return the last_login datetime for uid if any, otherwise return the Epoch."""
res = run_sql('SELECT last_login FROM user WHERE id=%s', (uid,), 1)
if res and res[0][0]:
return res[0][0]
else:
return datetime.datetime(1970, 1, 1)
def get_user_info(uid, ln=CFG_SITE_LANG):
"""Get infos for a given user.
@param uid: user id (int)
@return: tuple: (uid, nickname, display_name)
"""
_ = gettext_set_language(ln)
query = """SELECT id, nickname
FROM user
WHERE id=%s"""
res = run_sql(query, (uid,))
if res:
if res[0]:
user = list(res[0])
if user[1]:
user.append(user[1])
else:
user[1] = str(user[0])
user.append(_("user") + ' #' + str(user[0]))
return tuple(user)
return (uid, '', _("N/A"))
def get_uid_from_email(email):
"""Return the uid corresponding to an email.
Return -1 when the email does not exists."""
try:
res = run_sql("SELECT id FROM user WHERE email=%s", (email,))
if res:
return res[0][0]
else:
return -1
except OperationalError:
register_exception()
return -1
def isGuestUser(uid, run_on_slave=True):
"""It Checks if the userId corresponds to a guestUser or not
isGuestUser(uid) -> boolean
"""
out = 1
try:
res = run_sql("SELECT email FROM user WHERE id=%s LIMIT 1", (uid,), 1,
run_on_slave=run_on_slave)
if res:
if res[0][0]:
out = 0
except OperationalError:
register_exception()
return out
def isUserSubmitter(user_info):
"""Return True if the user is a submitter for something; False otherwise."""
u_email = get_email(user_info['uid'])
res = run_sql("SELECT email FROM sbmSUBMISSIONS WHERE email=%s LIMIT 1", (u_email,), 1)
return len(res) > 0
def isUserReferee(user_info):
"""Return True if the user is a referee for something; False otherwise."""
if CFG_CERN_SITE:
return True
else:
for (role_id, role_name, role_description) in acc_get_action_roles(acc_get_action_id('referee')):
if acc_is_user_in_role(user_info, role_id):
return True
return False
def isUserAdmin(user_info):
"""Return True if the user has some admin rights; False otherwise."""
return acc_find_possible_activities(user_info) != {}
def isUserSuperAdmin(user_info):
"""Return True if the user is superadmin; False otherwise."""
if run_sql("""SELECT r.id
FROM accROLE r LEFT JOIN user_accROLE ur
ON r.id = ur.id_accROLE
WHERE r.name = %s AND
ur.id_user = %s AND ur.expiration>=NOW() LIMIT 1""", (SUPERADMINROLE, user_info['uid']), 1, run_on_slave=True):
return True
return acc_firerole_check_user(user_info, load_role_definition(acc_get_role_id(SUPERADMINROLE)))
def nickname_valid_p(nickname):
"""Check whether wanted NICKNAME supplied by the user is valid.
At the moment we just check whether it is not empty, does not
contain blanks or @, is not equal to `guest', etc.
This check relies on re_invalid_nickname regexp (see above)
Return 1 if nickname is okay, return 0 if it is not.
"""
if nickname and \
not(nickname.startswith(' ') or nickname.endswith(' ')) and \
nickname.lower() != 'guest':
if not re_invalid_nickname.match(nickname):
return 1
return 0
def email_valid_p(email):
"""Check whether wanted EMAIL address supplied by the user is valid.
At the moment we just check whether it contains '@' and whether
it doesn't contain blanks. We also check the email domain if
CFG_ACCESS_CONTROL_LIMIT_REGISTRATION_TO_DOMAIN is set.
Return 1 if email is okay, return 0 if it is not.
"""
if (email.find("@") <= 0) or (email.find(" ") > 0):
return 0
elif CFG_ACCESS_CONTROL_LIMIT_REGISTRATION_TO_DOMAIN:
if not email.endswith(CFG_ACCESS_CONTROL_LIMIT_REGISTRATION_TO_DOMAIN):
return 0
return 1
def confirm_email(email):
"""Confirm the email. It returns None when there are problems, otherwise
it return the uid involved."""
if CFG_ACCESS_CONTROL_LEVEL_ACCOUNTS == 0:
activated = 1
elif CFG_ACCESS_CONTROL_LEVEL_ACCOUNTS == 1:
activated = 0
elif CFG_ACCESS_CONTROL_LEVEL_ACCOUNTS >= 2:
return -1
run_sql('UPDATE user SET note=%s where email=%s', (activated, email))
res = run_sql('SELECT id FROM user where email=%s', (email,))
if res:
if CFG_ACCESS_CONTROL_NOTIFY_ADMIN_ABOUT_NEW_ACCOUNTS:
send_new_admin_account_warning(email, CFG_SITE_ADMIN_EMAIL)
return res[0][0]
else:
return None
def registerUser(req, email, passw, nickname, register_without_nickname=False,
login_method=None, ln=CFG_SITE_LANG):
"""Register user with the desired values of NICKNAME, EMAIL and
PASSW.
If REGISTER_WITHOUT_NICKNAME is set to True, then ignore
desired NICKNAME and do not set any. This is suitable for
external authentications so that people can login without
having to register an internal account first.
Return 0 if the registration is successful, 1 if email is not
valid, 2 if nickname is not valid, 3 if email is already in the
database, 4 if nickname is already in the database, 5 when
users cannot register themselves because of the site policy, 6 when the
site is having problem contacting the user.
If login_method is None or is equal to the key corresponding to local
authentication, then CFG_ACCESS_CONTROL_LEVEL_ACCOUNTS is taken
in account for deciding the behaviour about registering.
"""
# is email valid?
email = email.lower()
if not email_valid_p(email):
return 1
_ = gettext_set_language(ln)
# is email already taken?
res = run_sql("SELECT email FROM user WHERE email=%s", (email,))
if len(res) > 0:
return 3
if register_without_nickname:
# ignore desired nick and use default empty string one:
nickname = ""
else:
# is nickname valid?
if not nickname_valid_p(nickname):
return 2
# is nickname already taken?
res = run_sql("SELECT nickname FROM user WHERE nickname=%s", (nickname,))
if len(res) > 0:
return 4
activated = 1 # By default activated
if not login_method or not CFG_EXTERNAL_AUTHENTICATION[login_method]: # local login
if CFG_ACCESS_CONTROL_LEVEL_ACCOUNTS >= 2:
return 5
elif CFG_ACCESS_CONTROL_NOTIFY_USER_ABOUT_NEW_ACCOUNT:
activated = 2 # Email confirmation required
elif CFG_ACCESS_CONTROL_LEVEL_ACCOUNTS >= 1:
activated = 0 # Administrator confirmation required
if CFG_ACCESS_CONTROL_NOTIFY_USER_ABOUT_NEW_ACCOUNT:
address_activation_key = mail_cookie_create_mail_activation(
email,
cookie_timeout=datetime.timedelta(
days=CFG_WEBSESSION_ADDRESS_ACTIVATION_EXPIRE_IN_DAYS
)
)
try:
ip_address = req.remote_host or req.remote_ip
except:
ip_address = None
try:
if not send_email(CFG_SITE_SUPPORT_EMAIL, email, _("Account registration at %s") % CFG_SITE_NAME_INTL.get(ln, CFG_SITE_NAME),
tmpl.tmpl_account_address_activation_email_body(
email, address_activation_key,
ip_address, ln)):
return 1
except (smtplib.SMTPException, socket.error):
return 6
# okay, go on and register the user:
user_preference = get_default_user_preferences()
uid = run_sql("INSERT INTO user (nickname, email, password, note, settings, last_login) "
"VALUES (%s,%s,AES_ENCRYPT(email,%s),%s,%s, NOW())",
(nickname, email, passw, activated, serialize_via_marshal(user_preference)))
if activated == 1: # Ok we consider the user as logged in :-)
setUid(req, uid)
return 0
def updateDataUser(uid, email, nickname):
"""
Update user data. Used when a user changed his email or password
or nickname.
"""
email = email.lower()
if email == 'guest':
return 0
if CFG_ACCESS_CONTROL_LEVEL_ACCOUNTS < 2:
run_sql("update user set email=%s where id=%s", (email, uid))
if nickname and nickname != '':
run_sql("update user set nickname=%s where id=%s", (nickname, uid))
return 1
def updatePasswordUser(uid, password):
"""Update the password of a user."""
if CFG_ACCESS_CONTROL_LEVEL_ACCOUNTS < 3:
run_sql("update user set password=AES_ENCRYPT(email,%s) where id=%s", (password, uid))
return 1
def merge_usera_into_userb(id_usera, id_userb):
"""
Merges all the information of usera into userb.
Deletes afterwards any reference to usera.
The information about SQL tables is contained in the CFG_WEBUSER_USER_TABLES
variable.
"""
preferencea = get_user_preferences(id_usera)
preferenceb = get_user_preferences(id_userb)
preferencea.update(preferenceb)
set_user_preferences(id_userb, preferencea)
try:
## FIXME: for the time being, let's disable locking
## until we will move to InnoDB and we will have
## real transitions
#for table, dummy in CFG_WEBUSER_USER_TABLES:
#run_sql("LOCK TABLE %s WRITE" % table)
index = 0
table = ''
try:
for index, (table, column) in enumerate(CFG_WEBUSER_USER_TABLES):
run_sql("UPDATE %(table)s SET %(column)s=%%s WHERE %(column)s=%%s; DELETE FROM %(table)s WHERE %(column)s=%%s;" % {
'table': table,
'column': column
}, (id_userb, id_usera, id_usera))
except Exception, err:
msg = "Error when merging id_user=%s into id_userb=%s for table %s: %s\n" % (id_usera, id_userb, table, err)
msg += "users where succesfully already merged for tables: %s\n" % ', '.join([table[0] for table in CFG_WEBUSER_USER_TABLES[:index]])
msg += "users where not succesfully already merged for tables: %s\n" % ', '.join([table[0] for table in CFG_WEBUSER_USER_TABLES[index:]])
register_exception(alert_admin=True, prefix=msg)
raise
finally:
## FIXME: locking disabled
#run_sql("UNLOCK TABLES")
pass
def loginUser(req, p_un, p_pw, login_method):
"""It is a first simple version for the authentication of user. It returns the id of the user,
for checking afterwards if the login is correct
"""
# p_un passed may be an email or a nickname:
p_email = get_email_from_username(p_un)
# go on with the old stuff based on p_email:
if not login_method in CFG_EXTERNAL_AUTHENTICATION:
return (None, p_email, p_pw, 12)
if CFG_EXTERNAL_AUTHENTICATION[login_method]: # External Authentication
try:
result = CFG_EXTERNAL_AUTHENTICATION[login_method].auth_user(p_email, p_pw, req)
if (result == (None, None) or result is None) and not login_method in ['oauth1', 'oauth2', 'openid']:
# There is no need to call auth_user with username for
# OAuth1, OAuth2 and OpenID authentication
result = CFG_EXTERNAL_AUTHENTICATION[login_method].auth_user(p_un, p_pw, req) ## We try to login with either the email of the nickname
if isinstance(result, (tuple, list)) and len(result) == 2:
p_email, p_extid = result
else:
## For backward compatibility we use the email as external
## identifier if it was not returned already by the plugin
p_email, p_extid = str(result), str(result)
if p_email:
p_email = p_email.lower()
if not p_extid:
p_extid = p_email
elif not p_extid:
try:
# OpenID and OAuth authentications have own error messages
return (None, p_email, p_pw, CFG_EXTERNAL_AUTHENTICATION[login_method].get_msg(req))
except NotImplementedError:
return(None, p_email, p_pw, 15)
else:
# External login is successfull but couldn't fetch the email
# address.
generate_string = lambda: reduce((lambda x, y: x+y), [random.choice("qwertyuiopasdfghjklzxcvbnm1234567890") for i in range(32)])
random_string = generate_string()
p_email = CFG_TEMP_EMAIL_ADDRESS % random_string
while run_sql("SELECT * FROM user WHERE email=%s", (p_email,)):
random_string = generate_string()
p_email = CFG_TEMP_EMAIL_ADDRESS % random_string
except InvenioWebAccessExternalAuthError:
register_exception(req=req, alert_admin=True)
raise
if p_email: # Authenthicated externally
query_result = run_sql("SELECT id_user FROM userEXT WHERE id=%s and method=%s", (p_extid, login_method))
if query_result:
## User was already registered with this external method.
id_user = query_result[0][0]
old_email = run_sql("SELECT email FROM user WHERE id=%s", (id_user,))[0][0]
# Look if the email address matches with the template given.
# If it matches, use the email address saved in the database.
regexp = re.compile(CFG_TEMP_EMAIL_ADDRESS % r"\w*")
if regexp.match(p_email):
p_email = old_email
if old_email != p_email:
## User has changed email of reference.
res = run_sql("SELECT id FROM user WHERE email=%s", (p_email,))
if res:
## User was also registered with the other email.
## We should merge the two!
new_id = res[0][0]
if new_id == id_user:
raise AssertionError("We should not reach this situation: new_id=%s, id_user=%s, old_email=%s, p_email=%s" % (new_id, id_user, old_email, p_email))
merge_usera_into_userb(id_user, new_id)
run_sql("DELETE FROM user WHERE id=%s", (id_user, ))
for row in run_sql("SELECT method FROM userEXT WHERE id_user=%s", (id_user, )):
## For all known accounts of id_user not conflicting with new_id we move them to refer to new_id
if not run_sql("SELECT method FROM userEXT WHERE id_user=%s AND method=%s", (new_id, row[0])):
run_sql("UPDATE userEXT SET id_user=%s WHERE id_user=%s AND method=%s", (new_id, id_user, row[0]))
## And we delete the duplicate remaining ones :-)
run_sql("DELETE FROM userEXT WHERE id_user=%s", (id_user, ))
id_user = new_id
else:
## We just need to rename the email address of the
## corresponding user. Unfortunately the local
## password will be then invalid, but its unlikely
## the user is using both an external and a local
## account.
run_sql("UPDATE user SET email=%s WHERE id=%s", (p_email, id_user))
else:
## User was not already registered with this external method.
query_result = run_sql("SELECT id FROM user WHERE email=%s", (p_email, ))
if query_result:
## The user was already known with this email
id_user = query_result[0][0]
## We fix the inconsistence in the userEXT table.
run_sql("INSERT INTO userEXT(id, method, id_user) VALUES(%s, %s, %s) ON DUPLICATE KEY UPDATE id=%s, method=%s, id_user=%s", (p_extid, login_method, id_user, p_extid, login_method, id_user))
else:
## First time user
p_pw_local = int(random.random() * 1000000)
p_nickname = ''
if CFG_EXTERNAL_AUTHENTICATION[login_method].enforce_external_nicknames:
try: # Let's discover the external nickname!
p_nickname = CFG_EXTERNAL_AUTHENTICATION[login_method].fetch_user_nickname(p_email, p_pw, req)
except (AttributeError, NotImplementedError):
pass
except:
register_exception(req=req, alert_admin=True)
raise
res = registerUser(req, p_email, p_pw_local, p_nickname,
register_without_nickname=p_nickname == '',
login_method=login_method)
if res == 4 or res == 2: # The nickname was already taken
res = registerUser(req, p_email, p_pw_local, '',
register_without_nickname=True,
login_method=login_method)
query_result = run_sql("SELECT id from user where email=%s", (p_email,))
id_user = query_result[0][0]
elif res == 0: # Everything was ok, with or without nickname.
query_result = run_sql("SELECT id from user where email=%s", (p_email,))
id_user = query_result[0][0]
elif res == 6: # error in contacting the user via email
return (None, p_email, p_pw_local, 19)
else:
return (None, p_email, p_pw_local, 13)
run_sql("INSERT INTO userEXT(id, method, id_user) VALUES(%s, %s, %s)", (p_extid, login_method, id_user))
if CFG_EXTERNAL_AUTHENTICATION[login_method].enforce_external_nicknames:
## Let's still fetch a possibly upgraded nickname.
try: # Let's discover the external nickname!
p_nickname = CFG_EXTERNAL_AUTHENTICATION[login_method].fetch_user_nickname(p_email, p_pw, req)
if nickname_valid_p(p_nickname) and nicknameUnique(p_nickname) == 0:
updateDataUser(id_user, p_email, p_nickname)
except (AttributeError, NotImplementedError):
pass
except:
register_exception(alert_admin=True)
raise
try:
groups = CFG_EXTERNAL_AUTHENTICATION[login_method].fetch_user_groups_membership(p_email, p_pw, req)
# groups is a dictionary {group_name : group_description,}
new_groups = {}
for key, value in groups.items():
new_groups[key + " [" + str(login_method) + "]"] = value
groups = new_groups
except (AttributeError, NotImplementedError):
pass
except:
register_exception(req=req, alert_admin=True)
return (None, p_email, p_pw, 16)
else: # Groups synchronization
if groups:
from invenio.webgroup import synchronize_external_groups
synchronize_external_groups(id_user, groups, login_method)
user_prefs = get_user_preferences(id_user)
if not CFG_EXTERNAL_AUTHENTICATION[login_method]:
## I.e. if the login method is not of robot type:
if CFG_ACCESS_CONTROL_LEVEL_ACCOUNTS >= 4:
# Let's prevent the user to switch login_method
if user_prefs.has_key("login_method") and \
user_prefs["login_method"] != login_method:
return (None, p_email, p_pw, 11)
user_prefs["login_method"] = login_method
# Cleaning external settings
for key in user_prefs.keys():
if key.startswith('EXTERNAL_'):
del user_prefs[key]
try:
# Importing external settings
new_prefs = CFG_EXTERNAL_AUTHENTICATION[login_method].fetch_user_preferences(p_email, p_pw, req)
for key, value in new_prefs.items():
user_prefs['EXTERNAL_' + key] = value
except (AttributeError, NotImplementedError):
pass
except InvenioWebAccessExternalAuthError:
register_exception(req=req, alert_admin=True)
return (None, p_email, p_pw, 16)
# Storing settings
set_user_preferences(id_user, user_prefs)
else:
return (None, p_un, p_pw, 10)
else: # Internal Authenthication
if not p_pw:
p_pw = ''
query_result = run_sql("SELECT id,email,note from user where email=%s and password=AES_ENCRYPT(email,%s)", (p_email, p_pw,))
if query_result:
#FIXME drop external groups and settings
note = query_result[0][2]
id_user = query_result[0][0]
if note == '1': # Good account
preferred_login_method = get_user_preferences(query_result[0][0])['login_method']
p_email = query_result[0][1].lower()
if login_method != preferred_login_method:
if preferred_login_method in CFG_EXTERNAL_AUTHENTICATION:
return (None, p_email, p_pw, 11)
elif note == '2': # Email address need to be confirmed by user
return (None, p_email, p_pw, 17)
elif note == '0': # Account need to be confirmed by administrator
return (None, p_email, p_pw, 18)
else:
return (None, p_email, p_pw, 14)
# Login successful! Updating the last access time
run_sql("UPDATE user SET last_login=NOW() WHERE email=%s", (p_email,))
return (id_user, p_email, p_pw, 0)
def drop_external_settings(userId):
"""Drop the external (EXTERNAL_) settings of userid."""
prefs = get_user_preferences(userId)
for key in prefs.keys():
if key.startswith('EXTERNAL_'):
del prefs[key]
set_user_preferences(userId, prefs)
def logoutUser(req):
"""It logout the user of the system, creating a guest user.
"""
if CFG_WEBSESSION_DIFFERENTIATE_BETWEEN_GUESTS:
uid = createGuestUser()
session['uid'] = uid
session.set_remember_me(False)
else:
uid = 0
session.invalidate()
if hasattr(req, '_user_info'):
delattr(req, '_user_info')
return uid
def username_exists_p(username):
"""Check if USERNAME exists in the system. Username may be either
nickname or email.
Return 1 if it does exist, 0 if it does not.
"""
if username == "":
# return not exists if asked for guest users
return 0
res = run_sql("SELECT email FROM user WHERE email=%s", (username,)) + \
run_sql("SELECT email FROM user WHERE nickname=%s", (username,))
if len(res) > 0:
return 1
return 0
def emailUnique(p_email):
"""Check if the email address only exists once. If yes, return userid, if not, -1
"""
query_result = run_sql("select id, email from user where email=%s", (p_email,))
if len(query_result) == 1:
return query_result[0][0]
elif len(query_result) == 0:
return 0
return -1
def nicknameUnique(p_nickname):
"""Check if the nickname only exists once. If yes, return userid, if not, -1
"""
query_result = run_sql("select id, nickname from user where nickname=%s", (p_nickname,))
if len(query_result) == 1:
return query_result[0][0]
elif len(query_result) == 0:
return 0
return -1
def update_Uid(req, p_email, remember_me=False):
"""It updates the userId of the session. It is used when a guest user is logged in succesfully in the system with a given email and password.
As a side effect it will discover all the restricted collection to which the user has right to
"""
query_ID = int(run_sql("select id from user where email=%s",
(p_email,))[0][0])
setUid(req, query_ID, remember_me)
return query_ID
def send_new_admin_account_warning(new_account_email, send_to, ln=CFG_SITE_LANG):
"""Send an email to the address given by send_to about the new account new_account_email."""
_ = gettext_set_language(ln)
sub = _("New account on") + " '%s'" % CFG_SITE_NAME
if CFG_ACCESS_CONTROL_LEVEL_ACCOUNTS == 1:
sub += " - " + _("PLEASE ACTIVATE")
body = _("A new account has been created on") + " '%s'" % CFG_SITE_NAME
if CFG_ACCESS_CONTROL_LEVEL_ACCOUNTS == 1:
body += _(" and is awaiting activation")
body += ":\n\n"
body += _(" Username/Email") + ": %s\n\n" % new_account_email
body += _("You can approve or reject this account request at") + ": %s/admin/webaccess/webaccessadmin.py/manageaccounts\n" % CFG_SITE_URL
return send_email(CFG_SITE_SUPPORT_EMAIL, send_to, subject=sub, content=body)
def get_email(uid):
"""Return email address of the user uid. Return string 'guest' in case
the user is not found."""
out = "guest"
res = run_sql("SELECT email FROM user WHERE id=%s", (uid,), 1)
if res and res[0][0]:
out = res[0][0].lower()
return out
def get_email_from_username(username):
"""Return email address of the user corresponding to USERNAME.
The username may be either nickname or email. Return USERNAME
untouched if not found in the database or if found several
matching entries.
"""
if username == '':
return ''
out = username
res = run_sql("SELECT email FROM user WHERE email=%s", (username,), 1) + \
run_sql("SELECT email FROM user WHERE nickname=%s", (username,), 1)
if res and len(res) == 1:
out = res[0][0].lower()
return out
#def get_password(uid):
#"""Return password of the user uid. Return None in case
#the user is not found."""
#out = None
#res = run_sql("SELECT password FROM user WHERE id=%s", (uid,), 1)
#if res and res[0][0] != None:
#out = res[0][0]
#return out
def get_nickname(uid):
"""Return nickname of the user uid. Return None in case
the user is not found."""
out = None
res = run_sql("SELECT nickname FROM user WHERE id=%s", (uid,), 1)
if res and res[0][0]:
out = res[0][0]
return out
def get_nickname_or_email(uid):
"""Return nickname (preferred) or the email address of the user uid.
Return string 'guest' in case the user is not found."""
out = "guest"
res = run_sql("SELECT nickname, email FROM user WHERE id=%s", (uid,), 1)
if res and res[0]:
if res[0][0]:
out = res[0][0]
elif res[0][1]:
out = res[0][1].lower()
return out
def create_userinfobox_body(req, uid, language="en"):
"""Create user info box body for user UID in language LANGUAGE."""
if req:
if req.is_https():
url_referer = CFG_SITE_SECURE_URL + req.unparsed_uri
else:
url_referer = CFG_SITE_URL + req.unparsed_uri
if '/youraccount/logout' in url_referer:
url_referer = ''
else:
url_referer = CFG_SITE_URL
user_info = collect_user_info(req)
try:
return tmpl.tmpl_create_userinfobox(ln=language,
url_referer=url_referer,
guest=int(user_info['guest']),
username=get_nickname_or_email(uid),
submitter=user_info['precached_viewsubmissions'],
referee=user_info['precached_useapprove'],
admin=user_info['precached_useadmin'],
usebaskets=user_info['precached_usebaskets'],
usemessages=user_info['precached_usemessages'],
usealerts=user_info['precached_usealerts'],
usegroups=user_info['precached_usegroups'],
useloans=user_info['precached_useloans'],
usestats=user_info['precached_usestats']
)
except OperationalError:
return ""
def create_useractivities_menu(req, uid, navmenuid, ln="en"):
"""Create user activities menu.
@param req: request object
@param uid: user id
@type uid: int
@param navmenuid: the section of the website this page belongs (search, submit, baskets, etc.)
@type navmenuid: string
@param ln: language
@type ln: string
@return: HTML menu of the user activities
@rtype: string
"""
if req:
if req.is_https():
url_referer = CFG_SITE_SECURE_URL + req.unparsed_uri
else:
url_referer = CFG_SITE_URL + req.unparsed_uri
if '/youraccount/logout' in url_referer:
url_referer = ''
else:
url_referer = CFG_SITE_URL
user_info = collect_user_info(req)
is_user_menu_selected = False
if navmenuid == 'personalize' or \
navmenuid.startswith('your') and \
navmenuid != 'youraccount':
is_user_menu_selected = True
try:
return tmpl.tmpl_create_useractivities_menu(
ln=ln,
selected=is_user_menu_selected,
url_referer=url_referer,
guest=int(user_info['guest']),
username=get_nickname_or_email(uid),
submitter=user_info['precached_viewsubmissions'],
referee=user_info['precached_useapprove'],
admin=user_info['precached_useadmin'],
usebaskets=user_info['precached_usebaskets'],
usemessages=user_info['precached_usemessages'],
usealerts=user_info['precached_usealerts'],
usegroups=user_info['precached_usegroups'],
useloans=user_info['precached_useloans'],
usestats=user_info['precached_usestats'],
usecomments=user_info['precached_sendcomments'],
)
except OperationalError:
return ""
def create_adminactivities_menu(req, uid, navmenuid, ln="en"):
"""Create admin activities menu.
@param req: request object
@param uid: user id
@type uid: int
@param navmenuid: the section of the website this page belongs (search, submit, baskets, etc.)
@type navmenuid: string
@param ln: language
@type ln: string
@return: HTML menu of the user activities
@rtype: string
"""
_ = gettext_set_language(ln)
if req:
if req.is_https():
url_referer = CFG_SITE_SECURE_URL + req.unparsed_uri
else:
url_referer = CFG_SITE_URL + req.unparsed_uri
if '/youraccount/logout' in url_referer:
url_referer = ''
else:
url_referer = CFG_SITE_URL
user_info = collect_user_info(req)
activities = acc_find_possible_activities(user_info, ln)
# For BibEdit and BibDocFile menu items, take into consideration
# current record whenever possible
if activities.has_key(_("Run Record Editor")) or \
activities.has_key(_("Run Document File Manager")) and \
user_info['uri'].startswith('/' + CFG_SITE_RECORD + '/'):
try:
# Get record ID and try to cast it to an int
current_record_id = int(urlparse.urlparse(user_info['uri'])[2].split('/')[2])
except:
pass
else:
if activities.has_key(_("Run Record Editor")):
activities[_("Run Record Editor")] = activities[_("Run Record Editor")] + '&amp;#state=edit&amp;recid=' + str(current_record_id)
if activities.has_key(_("Run Document File Manager")):
activities[_("Run Document File Manager")] = activities[_("Run Document File Manager")] + '&amp;recid=' + str(current_record_id)
try:
return tmpl.tmpl_create_adminactivities_menu(
ln=ln,
selected=navmenuid == 'admin',
url_referer=url_referer,
guest=int(user_info['guest']),
username=get_nickname_or_email(uid),
submitter=user_info['precached_viewsubmissions'],
referee=user_info['precached_useapprove'],
admin=user_info['precached_useadmin'],
usebaskets=user_info['precached_usebaskets'],
usemessages=user_info['precached_usemessages'],
usealerts=user_info['precached_usealerts'],
usegroups=user_info['precached_usegroups'],
useloans=user_info['precached_useloans'],
usestats=user_info['precached_usestats'],
activities=activities
)
except OperationalError:
return ""
def list_registered_users():
"""List all registered users."""
return run_sql("SELECT id,email FROM user where email!=''")
def list_users_in_role(role):
"""List all users of a given role (see table accROLE)
@param role: role of user (string)
@return: list of uids
"""
res = run_sql("""SELECT uacc.id_user
FROM user_accROLE uacc JOIN accROLE acc
ON uacc.id_accROLE=acc.id
WHERE acc.name=%s""",
(role,), run_on_slave=True)
if res:
return map(lambda x: int(x[0]), res)
return []
def list_users_in_roles(role_list):
"""List all users of given roles (see table accROLE)
@param role_list: list of roles [string]
@return: list of uids
"""
if not(type(role_list) is list or type(role_list) is tuple):
role_list = [role_list]
query = """SELECT DISTINCT(uacc.id_user)
FROM user_accROLE uacc JOIN accROLE acc
ON uacc.id_accROLE=acc.id
"""
query_addons = ""
query_params = ()
if len(role_list) > 0:
query_params = role_list
query_addons = " WHERE "
for role in role_list[:-1]:
query_addons += "acc.name=%s OR "
query_addons += "acc.name=%s"
res = run_sql(query + query_addons, query_params, run_on_slave=True)
if res:
return map(lambda x: int(x[0]), res)
return []
def get_uid_based_on_pref(prefname, prefvalue):
"""get the user's UID based where his/her preference prefname has value prefvalue in preferences"""
prefs = run_sql("SELECT id, settings FROM user WHERE settings is not NULL")
the_uid = None
for pref in prefs:
try:
settings = deserialize_via_marshal(pref[1])
if (settings.has_key(prefname)) and (settings[prefname] == prefvalue):
the_uid = pref[0]
except:
pass
return the_uid
def get_user_preferences(uid):
pref = run_sql("SELECT id, settings FROM user WHERE id=%s", (uid,))
if pref:
try:
return deserialize_via_marshal(pref[0][1])
except:
pass
return get_default_user_preferences() # empty dict mean no preferences
def set_user_preferences(uid, pref):
assert(type(pref) == type({}))
run_sql("UPDATE user SET settings=%s WHERE id=%s",
(serialize_via_marshal(pref), uid))
def get_default_user_preferences():
user_preference = {
'login_method': ''}
if CFG_EXTERNAL_AUTH_DEFAULT in CFG_EXTERNAL_AUTHENTICATION:
user_preference['login_method'] = CFG_EXTERNAL_AUTH_DEFAULT
return user_preference
def get_preferred_user_language(req):
def _get_language_from_req_header(accept_language_header):
"""Extract langs info from req.headers_in['Accept-Language'] which
should be set to something similar to:
'fr,en-us;q=0.7,en;q=0.3'
"""
tmp_langs = {}
for lang in accept_language_header.split(','):
lang = lang.split(';q=')
if len(lang) == 2:
lang[1] = lang[1].replace('"', '') # Hack for Yeti robot
try:
tmp_langs[float(lang[1])] = lang[0]
except ValueError:
pass
else:
tmp_langs[1.0] = lang[0]
ret = []
priorities = tmp_langs.keys()
priorities.sort()
priorities.reverse()
for priority in priorities:
ret.append(tmp_langs[priority])
return ret
uid = getUid(req)
guest = isGuestUser(uid)
new_lang = None
preferred_lang = None
if not guest:
user_preferences = get_user_preferences(uid)
preferred_lang = new_lang = user_preferences.get('language', None)
if not new_lang:
try:
new_lang = wash_languages(cgi.parse_qs(req.args)['ln'])
except (TypeError, AttributeError, KeyError):
pass
if not new_lang:
try:
new_lang = wash_languages(_get_language_from_req_header(req.headers_in['Accept-Language']))
except (TypeError, AttributeError, KeyError):
pass
new_lang = wash_language(new_lang)
if new_lang != preferred_lang and not guest:
user_preferences['language'] = new_lang
set_user_preferences(uid, user_preferences)
return new_lang
def collect_user_info(req, login_time=False, refresh=False):
"""Given the mod_python request object rec or a uid it returns a dictionary
containing at least the keys uid, nickname, email, groups, plus any external keys in
the user preferences (collected at login time and built by the different
external authentication plugins) and if the mod_python request object is
provided, also the remote_ip, remote_host, referer, agent fields.
NOTE: if req is a mod_python request object, the user_info dictionary
is saved into req._user_info (for caching purpouses)
setApacheUser & setUid will properly reset it.
"""
if type(req) in [long, int] or req is None:
from invenio.ext.login import UserInfo
return UserInfo(req)
from flask.ext.login import current_user
return current_user._get_current_object()
##
## NOT USED ANYMORE
## please see inveno.ext.login
##
#FIXME move EXTERNAL SSO functionality
from invenio.search_engine import get_permitted_restricted_collections
user_info = {
'remote_ip' : '',
'remote_host' : '',
'referer' : '',
'uri' : '',
'agent' : '',
'uid' :-1,
'nickname' : '',
'email' : '',
'group' : [],
'guest' : '1',
'session' : None,
'precached_permitted_restricted_collections' : [],
'precached_usebaskets' : False,
'precached_useloans' : False,
'precached_usegroups' : False,
'precached_usealerts' : False,
'precached_usemessages' : False,
'precached_viewsubmissions' : False,
'precached_useapprove' : False,
'precached_useadmin' : False,
'precached_usestats' : False,
'precached_viewclaimlink' : False,
'precached_usepaperclaim' : False,
'precached_usepaperattribution' : False,
'precached_canseehiddenmarctags' : False,
'precached_sendcomments' : False,
}
try:
is_req = False
is_flask = False
session = None
if not req:
uid = -1
elif type(req) in (type(1), type(1L)):
## req is infact a user identification
uid = req
elif type(req) is dict:
## req is by mistake already a user_info
try:
assert(req.has_key('uid'))
assert(req.has_key('email'))
assert(req.has_key('nickname'))
except AssertionError:
## mmh... misuse of collect_user_info. Better warn the admin!
register_exception(alert_admin=True)
user_info.update(req)
return user_info
elif isinstance(req, Request):
is_flask = True
from flask import session
uid = session.uid
if 'user_info' in session:
user_info = session['user_info']
if not login_time and not refresh:
return user_info
user_info['remote_ip'] = req.remote_addr
user_info['session'] = session.sid
user_info['remote_host'] = req.environ.get('REMOTE_HOST', '')
user_info['referer'] = req.referrer
user_info['uri'] = req.url or ''
user_info['agent'] = req.user_agent or 'N/A'
else:
is_req = True
uid = getUid(req)
if hasattr(req, '_user_info') and not login_time:
user_info = req._user_info
if not refresh:
return req._user_info
req._user_info = user_info
try:
user_info['remote_ip'] = req.remote_ip
except gaierror:
#FIXME: we should support IPV6 too. (hint for FireRole)
pass
user_info['session'] = session.sid
user_info['remote_host'] = req.remote_host or ''
user_info['referer'] = req.headers_in.get('Referer', '')
user_info['uri'] = req.unparsed_uri or ''
user_info['agent'] = req.headers_in.get('User-Agent', 'N/A')
user_info['uid'] = uid
user_info['nickname'] = get_nickname(uid) or ''
user_info['email'] = get_email(uid) or ''
user_info['group'] = []
user_info['guest'] = str(isGuestUser(uid))
if user_info['guest'] == '1' and CFG_INSPIRE_SITE:
usepaperattribution = False
viewclaimlink = False
if (CFG_BIBAUTHORID_ENABLED
and acc_is_user_in_role(user_info, acc_get_role_id("paperattributionviewers"))):
usepaperattribution = True
# if (CFG_BIBAUTHORID_ENABLED
# and usepaperattribution
# and acc_is_user_in_role(user_info, acc_get_role_id("paperattributionlinkviewers"))):
# viewclaimlink = True
viewlink = False
if is_req or is_flask:
try:
viewlink = session['personinfo']['claim_in_process']
except (KeyError, TypeError):
pass
if (CFG_BIBAUTHORID_ENABLED
and usepaperattribution
and viewlink):
viewclaimlink = True
user_info['precached_viewclaimlink'] = viewclaimlink
user_info['precached_usepaperattribution'] = usepaperattribution
if user_info['guest'] == '0':
user_info['group'] = [group[1] for group in get_groups(uid)]
prefs = get_user_preferences(uid)
login_method = prefs['login_method']
## NOTE: we fall back to default login_method if the login_method
## specified in the user settings does not exist (e.g. after
## a migration.)
login_object = CFG_EXTERNAL_AUTHENTICATION.get(login_method, CFG_EXTERNAL_AUTHENTICATION[CFG_EXTERNAL_AUTH_DEFAULT])
if login_object and ((datetime.datetime.now() - get_last_login(uid)).seconds > 3600):
## The user uses an external authentication method and it's a bit since
## she has not performed a login
if not CFG_EXTERNAL_AUTH_USING_SSO or (
is_req and login_object.in_shibboleth(req)):
## If we're using SSO we must be sure to be in HTTPS and Shibboleth handler
## otherwise we can't really read anything, hence
## it's better skip the synchronization
try:
groups = login_object.fetch_user_groups_membership(user_info['email'], req=req)
# groups is a dictionary {group_name : group_description,}
new_groups = {}
for key, value in groups.items():
new_groups[key + " [" + str(login_method) + "]"] = value
groups = new_groups
except (AttributeError, NotImplementedError, TypeError, InvenioWebAccessExternalAuthError):
pass
else: # Groups synchronization
from invenio.webgroup import synchronize_external_groups
synchronize_external_groups(uid, groups, login_method)
user_info['group'] = [group[1] for group in get_groups(uid)]
try:
# Importing external settings
new_prefs = login_object.fetch_user_preferences(user_info['email'], req=req)
for key, value in new_prefs.items():
prefs['EXTERNAL_' + key] = value
except (AttributeError, NotImplementedError, TypeError, InvenioWebAccessExternalAuthError):
pass
else:
set_user_preferences(uid, prefs)
prefs = get_user_preferences(uid)
run_sql('UPDATE user SET last_login=NOW() WHERE id=%s', (uid,))
if prefs:
for key, value in prefs.iteritems():
user_info[key.lower()] = value
if login_time:
## Heavy computational information
from invenio.access_control_engine import acc_authorize_action
user_info['precached_permitted_restricted_collections'] = get_permitted_restricted_collections(user_info)
user_info['precached_usebaskets'] = acc_authorize_action(user_info, 'usebaskets')[0] == 0
user_info['precached_useloans'] = acc_authorize_action(user_info, 'useloans')[0] == 0
user_info['precached_usegroups'] = acc_authorize_action(user_info, 'usegroups')[0] == 0
user_info['precached_usealerts'] = acc_authorize_action(user_info, 'usealerts')[0] == 0
user_info['precached_usemessages'] = acc_authorize_action(user_info, 'usemessages')[0] == 0
user_info['precached_usestats'] = acc_authorize_action(user_info, 'runwebstatadmin')[0] == 0
user_info['precached_viewsubmissions'] = isUserSubmitter(user_info)
user_info['precached_useapprove'] = isUserReferee(user_info)
user_info['precached_useadmin'] = isUserAdmin(user_info)
user_info['precached_canseehiddenmarctags'] = acc_authorize_action(user_info, 'runbibedit')[0] == 0
user_info['precached_sendcomments'] = acc_authorize_action(user_info, 'sendcomment', '*')[0] == 0
usepaperclaim = False
usepaperattribution = False
viewclaimlink = False
if (CFG_BIBAUTHORID_ENABLED
and acc_is_user_in_role(user_info, acc_get_role_id("paperclaimviewers"))):
usepaperclaim = True
if (CFG_BIBAUTHORID_ENABLED
and acc_is_user_in_role(user_info, acc_get_role_id("paperattributionviewers"))):
usepaperattribution = True
viewlink = False
if is_req or is_flask:
try:
viewlink = session['personinfo']['claim_in_process']
except (KeyError, TypeError):
pass
if (CFG_BIBAUTHORID_ENABLED
and usepaperattribution
and viewlink):
viewclaimlink = True
# if (CFG_BIBAUTHORID_ENABLED
# and ((usepaperclaim or usepaperattribution)
# and acc_is_user_in_role(user_info, acc_get_role_id("paperattributionlinkviewers")))):
# viewclaimlink = True
user_info['precached_viewclaimlink'] = viewclaimlink
user_info['precached_usepaperclaim'] = usepaperclaim
user_info['precached_usepaperattribution'] = usepaperattribution
except Exception, e:
register_exception()
return user_info
diff --git a/invenio/legacy/wsgi/__init__.py b/invenio/legacy/wsgi/__init__.py
index 36474af42..4a6a71cb1 100644
--- a/invenio/legacy/wsgi/__init__.py
+++ b/invenio/legacy/wsgi/__init__.py
@@ -1,666 +1,666 @@
# -*- coding: utf-8 -*-
## This file is part of Invenio.
## Copyright (C) 2009, 2010, 2011, 2012, 2013 CERN.
##
## Invenio is free software; you can redistribute it and/or
## modify it under the terms of the GNU General Public License as
## published by the Free Software Foundation; either version 2 of the
## License, or (at your option) any later version.
##
## Invenio is distributed in the hope that it will be useful, but
## WITHOUT ANY WARRANTY; without even the implied warranty of
## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
## General Public License for more details.
##
## You should have received a copy of the GNU General Public License
## along with Invenio; if not, write to the Free Software Foundation, Inc.,
## 59 Temple Place, Suite 330, Boston, MA 02111-1307, USA.
"""mod_python->WSGI Framework"""
import sys
import os
import re
import cgi
import gc
import inspect
from fnmatch import fnmatch
from urlparse import urlparse, urlunparse
from wsgiref.util import FileWrapper
from invenio.webinterface_handler_wsgi_utils import table
from invenio.utils.apache import \
HTTP_STATUS_MAP, SERVER_RETURN, OK, DONE, \
HTTP_NOT_FOUND, HTTP_INTERNAL_SERVER_ERROR
from invenio.config import CFG_WEBDIR, CFG_SITE_LANG, \
CFG_WEBSTYLE_HTTP_STATUS_ALERT_LIST, CFG_DEVEL_SITE, CFG_SITE_URL, \
CFG_SITE_SECURE_URL, CFG_WEBSTYLE_REVERSE_PROXY_IPS
-from invenio.errorlib import register_exception
+from invenio.ext.logging import register_exception
from invenio.utils.datastructures import flatten_multidict
## TODO for future reimplementation of stream_file
#from invenio.bibdocfile import StreamFileException
from flask import request, after_this_request
## Magic regexp to search for usage of CFG_SITE_URL within src/href or
## any src usage of an external website
_RE_HTTPS_REPLACES = re.compile(r"\b((?:src\s*=|url\s*\()\s*[\"']?)http\://", re.I)
## Regexp to verify that the IP starts with a number (filter cases where 'unknown')
## It is faster to verify only the start (585 ns) compared with verifying
## the whole ip address - re.compile('^\d+\.\d+\.\d+\.\d+$') (1.01 µs)
_RE_IPADDRESS_START = re.compile("^\d+\.")
def _http_replace_func(match):
## src external_site -> CFG_SITE_SECURE_URL/sslredirect/external_site
return match.group(1) + CFG_SITE_SECURE_URL + '/sslredirect/'
_ESCAPED_CFG_SITE_URL = cgi.escape(CFG_SITE_URL, True)
_ESCAPED_CFG_SITE_SECURE_URL = cgi.escape(CFG_SITE_SECURE_URL, True)
def https_replace(html):
html = html.replace(_ESCAPED_CFG_SITE_URL, _ESCAPED_CFG_SITE_SECURE_URL)
return _RE_HTTPS_REPLACES.sub(_http_replace_func, html)
class InputProcessed(object):
"""
Auxiliary class used when reading input.
@see: <http://www.wsgi.org/wsgi/Specifications/handling_post_forms>.
"""
def read(self, *args):
raise EOFError('The wsgi.input stream has already been consumed')
readline = readlines = __iter__ = read
from werkzeug import BaseResponse, ResponseStreamMixin, \
CommonResponseDescriptorsMixin
class Response(BaseResponse, ResponseStreamMixin,
CommonResponseDescriptorsMixin):
"""
Full featured response object implementing :class:`ResponseStreamMixin`
to add support for the `stream` property.
"""
class SimulatedModPythonRequest(object):
"""
mod_python like request object.
Minimum and cleaned implementation to make moving out of mod_python
easy.
@see: <http://www.modpython.org/live/current/doc-html/pyapi-mprequest.html>
"""
def __init__(self, environ, start_response):
self.response = Response()
self.__environ = environ
self.__start_response = start_response
self.__response_sent_p = False
self.__content_type_set_p = False
self.__buffer = ''
self.__low_level_headers = []
self.__filename = None
self.__disposition_type = None
self.__bytes_sent = 0
self.__allowed_methods = []
self.__cleanups = []
self.headers_out = {'Cache-Control': None}
#self.headers_out.update(dict(request.headers))
## See: <http://www.python.org/dev/peps/pep-0333/#the-write-callable>
self.__write = None
self.__write_error = False
self.__errors = environ['wsgi.errors']
self.__headers_in = table([])
self.__tainted = False
self.__is_https = self.__environ.get('wsgi.url_scheme') == 'https'
self.__replace_https = False
self.track_writings = False
self.__what_was_written = ""
self.__cookies_out = {}
self.g = {} ## global dictionary in case it's needed
for key, value in environ.iteritems():
if key.startswith('HTTP_'):
self.__headers_in[key[len('HTTP_'):].replace('_', '-')] = value
if environ.get('CONTENT_LENGTH'):
self.__headers_in['content-length'] = environ['CONTENT_LENGTH']
if environ.get('CONTENT_TYPE'):
self.__headers_in['content-type'] = environ['CONTENT_TYPE']
def get_wsgi_environ(self):
return self.__environ
def get_post_form(self):
""" Returns only POST form. """
self.__tainted = True
form = flatten_multidict(request.values)
if request.files:
form.update(request.files.to_dict())
return form
def get_response_sent_p(self):
return self.__response_sent_p
def get_low_level_headers(self):
return self.__low_level_headers
def get_buffer(self):
return self.__buffer
def write(self, string, flush=1):
if isinstance(string, unicode):
self.__buffer += string.encode('utf8')
else:
self.__buffer += string
if flush:
self.flush()
def flush(self):
self.send_http_header()
if self.__buffer:
self.__bytes_sent += len(self.__buffer)
try:
if not self.__write_error:
if self.__replace_https:
self.__write(https_replace(self.__buffer))
else:
if self.__buffer:
self.__write(self.__buffer)
if self.track_writings:
if self.__replace_https:
self.__what_was_written += https_replace(self.__buffer)
else:
self.__what_was_written += self.__buffer
except IOError, err:
if "failed to write data" in str(err) or "client connection closed" in str(err):
## Let's just log this exception without alerting the admin:
register_exception(req=self)
self.__write_error = True ## This flag is there just
## to not report later other errors to the admin.
else:
raise
self.__buffer = ''
def set_content_type(self, content_type):
self.__content_type_set_p = True
self.response.content_type = content_type
if self.__is_https:
if content_type.startswith("text/html") or content_type.startswith("application/rss+xml"):
self.__replace_https = True
def get_content_type(self):
return self.response.content_type
def send_http_header(self):
for (k, v) in self.__low_level_headers:
self.response.headers[k] = v
for k, v in self.headers_out.iteritems():
self.response.headers[k] = v
self.__write = self.response.stream.write
def get_unparsed_uri(self):
return '?'.join([self.__environ['PATH_INFO'], self.__environ['QUERY_STRING']])
def get_uri(self):
return request.environ['PATH_INFO']
def get_headers_in(self):
return request.headers
def get_subprocess_env(self):
return self.__environ
def add_common_vars(self):
pass
def get_args(self):
return request.environ['QUERY_STRING']
def get_remote_ip(self):
if 'X-FORWARDED-FOR' in self.__headers_in and \
self.__headers_in.get('X-FORWARDED-SERVER', '') == \
self.__headers_in.get('X-FORWARDED-HOST', '') == \
urlparse(CFG_SITE_URL)[1]:
# we are using proxy setup
if self.__environ.get('REMOTE_ADDR') in CFG_WEBSTYLE_REVERSE_PROXY_IPS:
# we trust this proxy
ip_list = self.__headers_in['X-FORWARDED-FOR'].split(',')
for ip in ip_list:
if _RE_IPADDRESS_START.match(ip):
return ip
# no IP has the correct format, return a default IP
return '10.0.0.10'
else:
# we don't trust this proxy
register_exception(prefix="You are running in a proxy configuration, but the " + \
"CFG_WEBSTYLE_REVERSE_PROXY_IPS variable does not contain " + \
"the IP of your proxy, thus the remote IP addresses of your " + \
"clients are not trusted. Please configure this variable.",
alert_admin=True)
return '10.0.0.11'
return request.remote_addr
def get_remote_host(self):
return request.environ.get('REMOTE_HOST', # apache
request.environ.get('HTTP_HOST',
'0.0.0.0')) # not found
def get_header_only(self):
return request.environ['REQUEST_METHOD'] == 'HEAD'
def set_status(self, status):
self.response.status_code = status
def get_status(self):
return self.response.status_code
def get_wsgi_status(self):
return '%s %s' % (self.response.status_code,
HTTP_STATUS_MAP.get(int(self.response.status_code),
'Explanation not available'))
def sendfile(self, path, offset=0, the_len=-1):
try:
self.send_http_header()
file_to_send = open(path)
file_to_send.seek(offset)
file_wrapper = FileWrapper(file_to_send)
count = 0
if the_len < 0:
for chunk in file_wrapper:
count += len(chunk)
self.__bytes_sent += len(chunk)
self.__write(chunk)
else:
for chunk in file_wrapper:
if the_len >= len(chunk):
the_len -= len(chunk)
count += len(chunk)
self.__bytes_sent += len(chunk)
self.__write(chunk)
else:
count += the_len
self.__bytes_sent += the_len
self.__write(chunk[:the_len])
break
except IOError, err:
if "failed to write data" in str(err) or "client connection closed" in str(err):
## Let's just log this exception without alerting the admin:
register_exception(req=self)
else:
raise
return self.__bytes_sent
def set_content_length(self, content_length):
if content_length is not None:
self.response.headers['content-length'] = str(content_length)
else:
del self.response.headers['content-length']
def is_https(self):
return self.__is_https
def get_method(self):
return request.environ['REQUEST_METHOD']
def get_hostname(self):
return request.environ.get('HTTP_HOST', '')
def set_filename(self, filename):
self.__filename = filename
if self.__disposition_type is None:
self.__disposition_type = 'inline'
self.response.headers['content-disposition'] = '%s; filename=%s' % (self.__disposition_type, self.__filename)
def set_encoding(self, encoding):
if encoding:
self.response.headers['content-encoding'] = str(encoding)
else:
del self.response.headers['content-encoding']
def get_bytes_sent(self):
return self.__bytes_sent
def log_error(self, message):
self.__errors.write(message.strip() + '\n')
def get_content_type_set_p(self):
return self.__content_type_set_p and \
bool(self.response.headers['content-type'])
def allow_methods(self, methods, reset=0):
if reset:
self.__allowed_methods = []
self.__allowed_methods += [method.upper().strip() for method in methods]
def get_allowed_methods(self):
return self.__allowed_methods
def readline(self, hint=None):
try:
return request.stream.readline(hint)
except TypeError:
## the hint param is not part of wsgi pep, although
## it's great to exploit it in when reading FORM
## with large files, in order to avoid filling up the memory
## Too bad it's not there :-(
return request.stream.readline()
def readlines(self, hint=None):
return request.stream.readlines(hint)
def read(self, hint=None):
return request.stream.read(hint)
def register_cleanup(self, callback, data=None):
@after_this_request
def f(response):
callback(data)
def get_cleanups(self):
return self.__cleanups
def get_referer(self):
return request.referrer
def get_what_was_written(self):
return self.__what_was_written
def __str__(self):
from pprint import pformat
out = ""
for key in dir(self):
try:
if not callable(getattr(self, key)) and not key.startswith("_SimulatedModPythonRequest") and not key.startswith('__'):
out += 'req.%s: %s\n' % (key, pformat(getattr(self, key)))
except:
pass
return out
def get_original_wsgi_environment(self):
"""
Return the original WSGI environment used to initialize this request
object.
@return: environ, start_response
@raise AssertionError: in case the environment has been altered, i.e.
either the input has been consumed or something has already been
written to the output.
"""
assert not self.__tainted, "The original WSGI environment is tainted since at least req.write or req.form has been used."
return self.__environ, self.__start_response
content_type = property(get_content_type, set_content_type)
unparsed_uri = property(get_unparsed_uri)
uri = property(get_uri)
headers_in = property(get_headers_in)
subprocess_env = property(get_subprocess_env)
args = property(get_args)
header_only = property(get_header_only)
status = property(get_status, set_status)
method = property(get_method)
hostname = property(get_hostname)
filename = property(fset=set_filename)
encoding = property(fset=set_encoding)
bytes_sent = property(get_bytes_sent)
content_type_set_p = property(get_content_type_set_p)
allowed_methods = property(get_allowed_methods)
response_sent_p = property(get_response_sent_p)
form = property(get_post_form)
remote_ip = property(get_remote_ip)
remote_host = property(get_remote_host)
referer = property(get_referer)
what_was_written = property(get_what_was_written)
def alert_admin_for_server_status_p(status, referer):
"""
Check the configuration variable
CFG_WEBSTYLE_HTTP_STATUS_ALERT_LIST to see if the exception should
be registered and the admin should be alerted.
"""
status = str(status)
for pattern in CFG_WEBSTYLE_HTTP_STATUS_ALERT_LIST:
pattern = pattern.lower()
must_have_referer = False
if pattern.endswith('r'):
## e.g. "404 r"
must_have_referer = True
pattern = pattern[:-1].strip() ## -> "404"
if fnmatch(status, pattern) and (not must_have_referer or referer):
return True
return False
def application(environ, start_response, handler=None):
"""
Entry point for wsgi.
"""
## Needed for mod_wsgi, see: <http://code.google.com/p/modwsgi/wiki/ApplicationIssues>
req = SimulatedModPythonRequest(environ, start_response)
#print 'Starting mod_python simulation'
try:
if handler is None:
from invenio.webinterface_layout import invenio_handler
invenio_handler(req)
else:
handler(req)
req.flush()
## TODO for future reimplementation of stream_file
#except StreamFileException as e:
# return e.value
except SERVER_RETURN, status:
redirection, = status.args
from werkzeug.wrappers import BaseResponse
if isinstance(redirection, BaseResponse):
return redirection
status = int(str(status))
if status == 404:
from werkzeug.exceptions import NotFound
raise NotFound()
if status not in (OK, DONE):
req.status = status
req.headers_out['content-type'] = 'text/html'
admin_to_be_alerted = alert_admin_for_server_status_p(status,
req.headers_in.get('referer'))
if admin_to_be_alerted:
register_exception(req=req, alert_admin=True)
if not req.response_sent_p:
start_response(req.get_wsgi_status(), req.get_low_level_headers(), sys.exc_info())
map(req.write, generate_error_page(req, admin_to_be_alerted))
req.flush()
finally:
##for (callback, data) in req.get_cleanups():
## callback(data)
#if hasattr(req, '_session'):
# ## The session handler saves for caching a request_wrapper
# ## in req.
# ## This saves req as an attribute, creating a circular
# ## reference.
# ## Since we have have reached the end of the request handler
# ## we can safely drop the request_wrapper so to avoid
# ## memory leaks.
# delattr(req, '_session')
#if hasattr(req, '_user_info'):
# ## For the same reason we can delete the user_info.
# delattr(req, '_user_info')
## as suggested in
## <http://www.python.org/doc/2.3.5/lib/module-gc.html>
del gc.garbage[:]
return req.response
def generate_error_page(req, admin_was_alerted=True, page_already_started=False):
"""
Returns an iterable with the error page to be sent to the user browser.
"""
from invenio.webpage import page
from invenio import template
webstyle_templates = template.load('webstyle')
ln = req.form.get('ln', CFG_SITE_LANG)
if page_already_started:
return [webstyle_templates.tmpl_error_page(status=req.get_wsgi_status(), ln=ln, admin_was_alerted=admin_was_alerted)]
else:
return [page(title=req.get_wsgi_status(), body=webstyle_templates.tmpl_error_page(status=req.get_wsgi_status(), ln=ln, admin_was_alerted=admin_was_alerted), language=ln, req=req)]
def is_static_path(path):
"""
Returns True if path corresponds to an exsting file under CFG_WEBDIR.
@param path: the path.
@type path: string
@return: True if path corresponds to an exsting file under CFG_WEBDIR.
@rtype: bool
"""
path = os.path.abspath(CFG_WEBDIR + path)
if path.startswith(CFG_WEBDIR) and os.path.isfile(path):
return path
return None
def is_mp_legacy_publisher_path(path):
"""
Checks path corresponds to an exsting Python file under CFG_WEBDIR.
@param path: the path.
@type path: string
@return: the path of the module to load and the function to call there.
@rtype: tuple
"""
path = path.split('/')
for index, component in enumerate(path):
if component.endswith('.py'):
possible_module = os.path.abspath(CFG_WEBDIR + os.path.sep + os.path.sep.join(path[:index + 1]))
possible_handler = '/'.join(path[index + 1:]).strip()
if possible_handler.startswith('_'):
return None, None
if not possible_handler:
possible_handler = 'index'
if os.path.exists(possible_module) and possible_module.startswith(CFG_WEBDIR):
return (possible_module, possible_handler)
else:
return None, None
def mp_legacy_publisher(req, possible_module, possible_handler):
"""
mod_python legacy publisher minimum implementation.
"""
from invenio.session import get_session
from invenio.webinterface_handler import CFG_HAS_HTTPS_SUPPORT, CFG_FULL_HTTPS
the_module = open(possible_module).read()
module_globals = {}
exec(the_module, module_globals)
if possible_handler in module_globals and callable(module_globals[possible_handler]):
from invenio.webinterface_handler import _check_result
## req is the required first parameter of any handler
expected_args = list(inspect.getargspec(module_globals[possible_handler])[0])
if not expected_args or 'req' != expected_args[0]:
## req was not the first argument. Too bad!
raise SERVER_RETURN, HTTP_NOT_FOUND
## the req.form must be casted to dict because of Python 2.4 and earlier
## otherwise any object exposing the mapping interface can be
## used with the magic **
form = dict()
for key, value in req.form.items():
## FIXME: this is a backward compatibility workaround
## because most of the old administration web handler
## expect parameters to be of type str.
## When legacy publisher will be removed all this
## pain will go away anyway :-)
if isinstance(value, unicode):
form[key] = value.encode('utf8')
else:
## NOTE: this is a workaround for e.g. legacy webupload
## that is still using legacy publisher and expect to
## have a file (Field) instance instead of a string.
form[key] = value
if (CFG_FULL_HTTPS or CFG_HAS_HTTPS_SUPPORT and get_session(req).need_https) and not req.is_https():
from invenio.utils.url import redirect_to_url
# We need to isolate the part of the URI that is after
# CFG_SITE_URL, and append that to our CFG_SITE_SECURE_URL.
original_parts = urlparse(req.unparsed_uri)
plain_prefix_parts = urlparse(CFG_SITE_URL)
secure_prefix_parts = urlparse(CFG_SITE_SECURE_URL)
# Compute the new path
plain_path = original_parts[2]
plain_path = secure_prefix_parts[2] + \
plain_path[len(plain_prefix_parts[2]):]
# ...and recompose the complete URL
final_parts = list(secure_prefix_parts)
final_parts[2] = plain_path
final_parts[-3:] = original_parts[-3:]
target = urlunparse(final_parts)
redirect_to_url(req, target)
try:
return _check_result(req, module_globals[possible_handler](req, **form))
except TypeError, err:
if ("%s() got an unexpected keyword argument" % possible_handler) in str(err) or ('%s() takes at least' % possible_handler) in str(err):
inspected_args = inspect.getargspec(module_globals[possible_handler])
expected_args = list(inspected_args[0])
expected_defaults = list(inspected_args[3])
expected_args.reverse()
expected_defaults.reverse()
register_exception(req=req, prefix="Wrong GET parameter set in calling a legacy publisher handler for %s: expected_args=%s, found_args=%s" % (possible_handler, repr(expected_args), repr(req.form.keys())), alert_admin=CFG_DEVEL_SITE)
cleaned_form = {}
for index, arg in enumerate(expected_args):
if arg == 'req':
continue
if index < len(expected_defaults):
cleaned_form[arg] = form.get(arg, expected_defaults[index])
else:
cleaned_form[arg] = form.get(arg, None)
return _check_result(req, module_globals[possible_handler](req, **cleaned_form))
else:
raise
else:
raise SERVER_RETURN, HTTP_NOT_FOUND
def check_wsgiref_testing_feasability():
"""
In order to use wsgiref for running Invenio, CFG_SITE_URL and
CFG_SITE_SECURE_URL must not use HTTPS because SSL is not supported.
"""
if CFG_SITE_URL.lower().startswith('https'):
print >> sys.stderr, """
ERROR: SSL is not supported by the wsgiref simple server implementation.
Please set CFG_SITE_URL not to start with "https".
Currently CFG_SITE_URL is set to: "%s".""" % CFG_SITE_URL
sys.exit(1)
if CFG_SITE_SECURE_URL.lower().startswith('https'):
print >> sys.stderr, """
ERROR: SSL is not supported by the wsgiref simple server implementation.
Please set CFG_SITE_SECURE_URL not to start with "https".
Currently CFG_SITE_SECURE_URL is set to: "%s".""" % CFG_SITE_SECURE_URL
sys.exit(1)
def wsgi_handler_test(port=80):
"""
Simple WSGI testing environment based on wsgiref.
"""
check_wsgiref_testing_feasability()
from invenio.base.factory import create_app
app = create_app(wsgi_serve_static_files=True)
app.run(debug=True, port=port)
def main():
from optparse import OptionParser
parser = OptionParser()
parser.add_option('-t', '--test', action='store_true',
dest='test', default=False,
help="Run a WSGI test server via wsgiref (not using Apache).")
parser.add_option('-p', '--port', type='int', dest='port', default='80',
help="The port where the WSGI test server will listen. [80]")
(options, args) = parser.parse_args()
if options.test:
wsgi_handler_test(options.port)
else:
parser.print_help()
if __name__ == "__main__":
main()
diff --git a/invenio/modules/access/firerole.py b/invenio/modules/access/firerole.py
index 98a805cbb..e12ae7a0d 100644
--- a/invenio/modules/access/firerole.py
+++ b/invenio/modules/access/firerole.py
@@ -1,337 +1,337 @@
## This file is part of Invenio.
## Copyright (C) 2007, 2008, 2009, 2010, 2011, 2013 CERN.
##
## Invenio is free software; you can redistribute it and/or
## modify it under the terms of the GNU General Public License as
## published by the Free Software Foundation; either version 2 of the
## License, or (at your option) any later version.
##
## Invenio is distributed in the hope that it will be useful, but
## WITHOUT ANY WARRANTY; without even the implied warranty of
## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
## General Public License for more details.
##
## You should have received a copy of the GNU General Public License
## along with Invenio; if not, write to the Free Software Foundation, Inc.,
## 59 Temple Place, Suite 330, Boston, MA 02111-1307, USA.
"""Invenio Access Control FireRole."""
__revision__ = "$Id$"
__lastupdated__ = """$Date$"""
"""These functions are for realizing a firewall like role definition for extending
webaccess to connect user to roles using every infos about users.
"""
import re
import cPickle
from zlib import compress, decompress
import sys
import time
if sys.hexversion < 0x2040000:
# pylint: disable=W0622
from sets import Set as set
# pylint: enable=W0622
from invenio.access_control_config import InvenioWebAccessFireroleError
from invenio.dbquery import run_sql, blob_to_string
from invenio.config import CFG_CERN_SITE
from invenio.access_control_config import CFG_ACC_EMPTY_ROLE_DEFINITION_SRC, \
CFG_ACC_EMPTY_ROLE_DEFINITION_SER, CFG_ACC_EMPTY_ROLE_DEFINITION_OBJ
-from invenio.errorlib import register_exception
+from invenio.ext.logging import register_exception
# INTERFACE
def compile_role_definition(firerole_def_src):
""" Given a text in which every row contains a rule it returns the compiled
object definition.
Rules have the following syntax:
allow|deny [not] field {list of one or more (double)quoted string or regexp}
or allow|deny any
Every row may contain a # sign followed by a comment which are discarded.
Field could be any key contained in a user_info dictionary. If the key does
not exist in the dictionary, the rule is skipped.
The first rule which matches return.
"""
line = 0
ret = []
default_allow_p = False
if not firerole_def_src or not firerole_def_src.strip():
firerole_def_src = CFG_ACC_EMPTY_ROLE_DEFINITION_SRC
for row in firerole_def_src.split('\n'):
line += 1
row = row.strip()
if not row:
continue
clean_row = _no_comment_re.sub('', row)
if clean_row:
g = _any_rule_re.match(clean_row)
if g:
default_allow_p = g.group('command').lower() == 'allow'
break
g = _rule_re.match(clean_row)
if g:
allow_p = g.group('command').lower() == 'allow'
not_p = g.group('not') != None
field = g.group('field').lower()
# Renaming groups to group
for alias_item in _aliasTable:
if field in alias_item:
field = alias_item[0]
break
if field.startswith('precached_'):
raise InvenioWebAccessFireroleError("Error while compiling rule %s (line %s): %s is a reserved key and can not be used in FireRole rules!" % (row, line, field))
expressions = g.group('expression')+g.group('more_expressions')
expressions_list = []
for expr in _expressions_re.finditer(expressions):
expr = expr.group()
if field in ('from', 'until'):
try:
expressions_list.append((False, time.mktime(time.strptime(expr[1:-1], '%Y-%m-%d'))))
except Exception, msg:
raise InvenioWebAccessFireroleError("Syntax error while compiling rule %s (line %s): %s is not a valid date with format YYYY-MM-DD because %s!" % (row, line, expr, msg))
elif expr[0] == '/':
try:
expressions_list.append((True, re.compile(expr[1:-1], re.I)))
except Exception, msg:
raise InvenioWebAccessFireroleError("Syntax error while compiling rule %s (line %s): %s is not a valid re because %s!" % (row, line, expr, msg))
else:
if field == 'remote_ip' and '/' in expr[1:-1]:
try:
expressions_list.append((False, _ip_matcher_builder(expr[1:-1])))
except Exception, msg:
raise InvenioWebAccessFireroleError("Syntax error while compiling rule %s (line %s): %s is not a valid ip group because %s!" % (row, line, expr, msg))
else:
expressions_list.append((False, expr[1:-1]))
expressions_list = tuple(expressions_list)
if field in ('from', 'until'):
if len(expressions_list) != 1:
raise InvenioWebAccessFireroleError("Error when compiling rule %s (line %s): exactly one date is expected when using 'from' or 'until', but %s were found" % (row, line, len(expressions_list)))
if not_p:
raise InvenioWebAccessFireroleError("Error when compiling rule %s (line %s): 'not' is not allowed when using 'from' or 'until'" % (row, line))
ret.append((allow_p, not_p, field, expressions_list))
else:
raise InvenioWebAccessFireroleError("Syntax error while compiling rule %s (line %s): not a valid rule!" % (row, line))
return (default_allow_p, tuple(ret))
def repair_role_definitions():
""" Try to rebuild compiled serialized definitions from their respectives
sources. This is needed in case Python break back compatibility.
"""
definitions = run_sql("SELECT id, firerole_def_src FROM accROLE")
for role_id, firerole_def_src in definitions:
run_sql("UPDATE accROLE SET firerole_def_ser=%s WHERE id=%s", (serialize(compile_role_definition(firerole_def_src)), role_id))
def store_role_definition(role_id, firerole_def_ser, firerole_def_src):
""" Store a compiled serialized definition and its source in the database
alongside the role to which it belong.
@param role_id: the role_id
@param firerole_def_ser: the serialized compiled definition
@param firerole_def_src: the sources from which the definition was taken
"""
run_sql("UPDATE accROLE SET firerole_def_ser=%s, firerole_def_src=%s WHERE id=%s", (firerole_def_ser, firerole_def_src, role_id))
def load_role_definition(role_id):
""" Load the definition corresponding to a role. If the compiled definition
is corrupted it try to repairs definitions from their sources and try again
to return the definition.
@param role_id:
@return: a deserialized compiled role definition
"""
res = run_sql("SELECT firerole_def_ser FROM accROLE WHERE id=%s", (role_id, ), 1, run_on_slave=True)
if res:
try:
return deserialize(res[0][0])
except Exception:
## Something bad might have happened? (Update of Python?)
repair_role_definitions()
res = run_sql("SELECT firerole_def_ser FROM accROLE WHERE id=%s", (role_id, ), 1, run_on_slave=True)
if res:
return deserialize(res[0][0])
return CFG_ACC_EMPTY_ROLE_DEFINITION_OBJ
def acc_firerole_extract_emails(firerole_def_obj):
"""
Best effort function to extract all the possible email addresses
authorized by the given firerole.
"""
authorized_emails = set()
try:
default_allow_p, rules = firerole_def_obj
for (allow_p, not_p, field, expressions_list) in rules: # for every rule
if not_p:
continue
if field == 'group':
for reg_p, expr in expressions_list:
if reg_p:
continue
if CFG_CERN_SITE and expr.endswith(' [CERN]'):
authorized_emails.add(expr[:-len(' [CERN]')].lower().strip() + '@cern.ch')
emails = run_sql("SELECT user.email FROM usergroup JOIN user_usergroup ON usergroup.id=user_usergroup.id_usergroup JOIN user ON user.id=user_usergroup.id_user WHERE usergroup.name=%s", (expr, ))
for email in emails:
authorized_emails.add(email[0].lower().strip())
elif field == 'email':
for reg_p, expr in expressions_list:
if reg_p:
continue
authorized_emails.add(expr.lower().strip())
elif field == 'uid':
for reg_p, expr in expressions_list:
if reg_p:
continue
email = run_sql("SELECT email FROM user WHERE id=%s", (expr, ))
if email:
authorized_emails.add(email[0][0].lower().strip())
return authorized_emails
except Exception, msg:
raise InvenioWebAccessFireroleError, msg
def acc_firerole_check_user(user_info, firerole_def_obj):
""" Given a user_info dictionary, it matches the rules inside the deserializez
compiled definition in order to discover if the current user match the roles
corresponding to this definition.
@param user_info: a dict produced by collect_user_info which contains every
info about a user
@param firerole_def_obj: a compiled deserialized definition produced by
compile_role_defintion
@return: True if the user match the definition, False otherwise.
"""
try:
default_allow_p, rules = firerole_def_obj
for (allow_p, not_p, field, expressions_list) in rules: # for every rule
group_p = field == 'group' # Is it related to group?
ip_p = field == 'remote_ip' # Is it related to Ips?
until_p = field == 'until' # Is it related to dates?
from_p = field == 'from' # Idem.
next_expr_p = False # Silly flag to break 2 for cycles
if not user_info.has_key(field) and not from_p and not until_p:
continue
for reg_p, expr in expressions_list: # For every element in the rule
if group_p: # Special case: groups
if reg_p: # When it is a regexp
for group in user_info[field]: # iterate over every group
if expr.match(group): # if it matches
if not_p: # if must not match
next_expr_p = True # let's skip to next expr
break
else: # Ok!
return allow_p
if next_expr_p:
break # I said: let's skip to next rule ;-)
elif expr.lower() in [group.lower() for group in user_info[field]]: # Simple expression then just check for expr in groups
if not_p: # If expr is in groups then if must not match
break # let's skip to next expr
else: # Ok!
return allow_p
elif reg_p: # Not a group, then easier. If it's a regexp
if expr.match(user_info[field]): # if it matches
if not_p: # If must not match
break # Let's skip to next expr
else:
return allow_p # Ok!
elif ip_p and type(expr) == type(()): # If it's just a simple expression but an IP!
if _ipmatch(user_info['remote_ip'], expr): # Then if Ip matches
if not_p: # If must not match
break # let's skip to next expr
else:
return allow_p # ok!
elif until_p:
if time.time() <= expr:
if allow_p:
break
else:
return False
elif allow_p:
return False
else:
break
elif from_p:
if time.time() >= expr:
if allow_p:
break
else:
return False
elif allow_p:
return False
else:
break
elif expr.lower() == str(user_info[field]).lower(): # Finally the easiest one!!
if not_p: # ...
break
else: # ...
return allow_p # ...
if not_p and not next_expr_p: # Nothing has matched and we got not
return allow_p # Then the whole rule matched!
except Exception, msg:
register_exception(alert_admin=True)
raise InvenioWebAccessFireroleError, msg
return default_allow_p # By default we allow ;-) it'an OpenAccess project
def serialize(firerole_def_obj):
""" Serialize and compress a definition."""
if firerole_def_obj == CFG_ACC_EMPTY_ROLE_DEFINITION_OBJ:
return CFG_ACC_EMPTY_ROLE_DEFINITION_SER
elif firerole_def_obj:
return compress(cPickle.dumps(firerole_def_obj, -1))
else:
return CFG_ACC_EMPTY_ROLE_DEFINITION_SER
def deserialize(firerole_def_ser):
""" Deserialize and decompress a definition."""
if firerole_def_ser:
return cPickle.loads(decompress(blob_to_string(firerole_def_ser)))
else:
return CFG_ACC_EMPTY_ROLE_DEFINITION_OBJ
# IMPLEMENTATION
# Comment finder
_no_comment_re = re.compile(r'[\s]*(?<!\\)#.*')
# Rule dissecter
_rule_re = re.compile(r'(?P<command>allow|deny)[\s]+(?:(?P<not>not)[\s]+)?(?P<field>[\w]+)[\s]+(?P<expression>(?<!\\)\'.+?(?<!\\)\'|(?<!\\)\".+?(?<!\\)\"|(?<!\\)\/.+?(?<!\\)\/)(?P<more_expressions>([\s]*,[\s]*((?<!\\)\'.+?(?<!\\)\'|(?<!\\)\".+?(?<!\\)\"|(?<!\\)\/.+?(?<!\\)\/))*)(?:[\s]*(?<!\\).*)?', re.I)
_any_rule_re = re.compile(r'(?P<command>allow|deny)[\s]+(any|all)[\s]*', re.I)
# Sub expression finder
_expressions_re = re.compile(r'(?<!\\)\'.+?(?<!\\)\'|(?<!\\)\".+?(?<!\\)\"|(?<!\\)\/.+?(?<!\\)\/')
def _mkip(ip):
""" Compute a numerical value for a dotted IP """
num = 0L
if '.' in ip:
for i in map(int, ip.split('.')):
num = (num << 8) + i
return num
_full = 2L ** 32 - 1
_aliasTable = (('group', 'groups'), )
def _ip_matcher_builder(group):
""" Compile a string "ip/bitmask" (i.e. 127.0.0.0/24)
@param group: a classical "ip/bitmask" string
@return: a tuple containing the gip and mask in a binary version.
"""
gip, gmk = group.split('/')
gip = _mkip(gip)
gmk = int(gmk)
mask = (_full - (2L ** (32 - gmk) - 1))
if not (gip & mask == gip):
raise InvenioWebAccessFireroleError, "Netmask does not match IP (%Lx %Lx)" % (gip, mask)
return (gip, mask)
def _ipmatch(ip, ip_matcher):
""" Check if an ip matches an ip_group.
@param ip: the ip to check
@param ip_matcher: a compiled ip_group produced by ip_matcher_builder
@return: True if ip matches, False otherwise
"""
return _mkip(ip) & ip_matcher[1] == ip_matcher[0]
diff --git a/invenio/modules/access/scripts/webaccessadmin.py b/invenio/modules/access/scripts/webaccessadmin.py
index dedb8378c..491e337d6 100644
--- a/invenio/modules/access/scripts/webaccessadmin.py
+++ b/invenio/modules/access/scripts/webaccessadmin.py
@@ -1,122 +1,122 @@
## This file is part of Invenio.
## Copyright (C) 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011,
## 2012, 2013 CERN.
##
## Invenio is free software; you can redistribute it and/or
## modify it under the terms of the GNU General Public License as
## published by the Free Software Foundation; either version 2 of the
## License, or (at your option) any later version.
##
## Invenio is distributed in the hope that it will be useful, but
## WITHOUT ANY WARRANTY; without even the implied warranty of
## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
## General Public License for more details.
##
## You should have received a copy of the GNU General Public License
## along with Invenio; if not, write to the Free Software Foundation, Inc.,
## 59 Temple Place, Suite 330, Boston, MA 02111-1307, USA.
__revision__ = "$Id$"
import getopt
import sys
from invenio.base.helpers import with_app_context
def usage(exitcode=1, msg=""):
"""Prints usage info."""
if msg:
print >> sys.stderr, "Error: %s." % msg
print >> sys.stderr
print >> sys.stderr, """Usage: %s [options]
General options:
-h, --help\t\tprint this help
-V, --version\t\tprint version number
Authentication options:
-u, --user=USER\tUser name needed to perform the administrative task
Option to administrate authorizations:
-a, --add\t\tadd default authorization settings
-c, --compile\t\tcompile firewall like role definitions (FireRole)
-r, --reset\t\treset to default settings
-D, --demo\t\tto be used with -a or -r in order to consider demo site authorizations
""" % sys.argv[0]
sys.exit(exitcode)
@with_app_context()
def main():
"""Main function that analyzes command line input and calls whatever
is appropriate. """
from invenio.access_control_firerole import repair_role_definitions
import invenio.access_control_admin as acca
from invenio.base.globals import cfg
from invenio.bibtask import authenticate
from invenio.access_control_config import DEF_DEMO_USER_ROLES, \
DEF_DEMO_ROLES, DEF_DEMO_AUTHS
## parse command line:
# set user-defined options:
options = {'user' : '', 'reset' : 0, 'compile' : 0, 'add' : 0, 'demo' : 0}
try:
opts, args = getopt.getopt(sys.argv[1:], "hVu:racD",
["help", "version", "user=",
"reset", "add", "compile", "demo"])
except getopt.GetoptError, err:
usage(1, err)
try:
for opt in opts:
if opt[0] in ("-h", "--help"):
usage(0)
elif opt[0] in ("-V", "--version"):
print __revision__
sys.exit(0)
elif opt[0] in ("-u", "--user"):
options["user"] = opt[1]
elif opt[0] in ("-r", "--reset"):
options["reset"] = 1
elif opt[0] in ("-a", "--add"):
options["add"] = 1
elif opt[0] in ("-c", "--compile"):
options["compile"] = 1
elif opt[0] in ("-D", "--demo"):
options["demo"] = 1
else:
usage(1)
if options['add'] or options['reset'] or options['compile']:
#if acca.acc_get_action_id('cfgwebaccess'):
# # Action exists hence authentication works :-)
# options['user'] = authenticate(options['user'],
# authorization_msg="WebAccess Administration",
# authorization_action="cfgwebaccess")
if options['reset'] and options['demo']:
acca.acc_reset_default_settings([cfg['CFG_SITE_ADMIN_EMAIL']], DEF_DEMO_USER_ROLES, DEF_DEMO_ROLES, DEF_DEMO_AUTHS)
print "Reset default demo site settings."
elif options['reset']:
acca.acc_reset_default_settings([cfg['CFG_SITE_ADMIN_EMAIL']])
print "Reset default settings."
elif options['add'] and options['demo']:
acca.acc_add_default_settings([cfg['CFG_SITE_ADMIN_EMAIL']], DEF_DEMO_USER_ROLES, DEF_DEMO_ROLES, DEF_DEMO_AUTHS)
print "Added default demo site settings."
elif options['add']:
acca.acc_add_default_settings([cfg['CFG_SITE_ADMIN_EMAIL']])
print "Added default settings."
if options['compile']:
repair_role_definitions()
print "Compiled firewall like role definitions."
else:
usage(1, "You must specify at least one command")
except StandardError, e:
- from invenio.errorlib import register_exception
+ from invenio.ext.logging import register_exception
register_exception()
usage(e)
return
### okay, here we go:
if __name__ == '__main__':
main()
diff --git a/invenio/modules/bulletin/format_elements/bfe_webjournal_widget_weather.py b/invenio/modules/bulletin/format_elements/bfe_webjournal_widget_weather.py
index 5472da744..c1d5bd1af 100644
--- a/invenio/modules/bulletin/format_elements/bfe_webjournal_widget_weather.py
+++ b/invenio/modules/bulletin/format_elements/bfe_webjournal_widget_weather.py
@@ -1,224 +1,224 @@
# -*- coding: utf-8 -*-
##
## This file is part of Invenio.
## Copyright (C) 2007, 2008, 2009, 2010, 2011 CERN.
##
## Invenio is free software; you can redistribute it and/or
## modify it under the terms of the GNU General Public License as
## published by the Free Software Foundation; either version 2 of the
## License, or (at your option) any later version.
##
## Invenio is distributed in the hope that it will be useful, but
## WITHOUT ANY WARRANTY; without even the implied warranty of
## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
## General Public License for more details.
##
## You should have received a copy of the GNU General Public License
## along with Invenio; if not, write to the Free Software Foundation, Inc.,
## 59 Temple Place, Suite 330, Boston, MA 02111-1307, USA.
"""
WebJournal widget - Display weather forecast
"""
import os
import time
import re
import socket
try:
# Try to load feedparser. Remember for later if it was installed
# or not. Note that feedparser is slow to load: if we don't load
# it in a 'global' way, it will be loaded for every call to this
# element.
global feedparser
import feedparser
feedparser_available = 1
except ImportError:
feedparser_available = 0
from invenio.config import \
CFG_CACHEDIR, \
CFG_ACCESS_CONTROL_LEVEL_SITE
-from invenio.errorlib import register_exception
+from invenio.ext.logging import register_exception
from invenio.webjournal_utils import \
parse_url_string, WEBJOURNAL_OPENER
from invenio.base.i18n import gettext_set_language
re_image_pattern = re.compile(r'<img\s*(class=["\']imageScale["\'])*?\s*src="(?P<image>\S*)"\s*/>',
re.DOTALL | re.IGNORECASE | re.VERBOSE)
yahoo_weather_rss_base_url = 'http://weather.yahooapis.com/forecastrss?w=%(location)s&u=%(degree_unit)s'
def format_element(bfo, location='782041', degree_unit='c' ,
display_weather_icon='false', weather_icon_only='false'):
"""
Display the latest weather forecast from Yahoo Weather
(See http://developer.yahoo.com/weather/)
@param location: Yahoo location code for the forecast
@param degree_unit: Degree unit ('f'=Fahrenheit or 'c'=Celsius)
@param display_weather_icon: if 'true', display weather icon inside the forecasts
@param weather_icon_only: it 'true' display only the wheater icon (without text)
"""
if not feedparser_available:
return ""
args = parse_url_string(bfo.user_info['uri'])
journal_name = args["journal_name"]
cached_filename = "webjournal_widget_weather_%s.rss" % journal_name
expire_time_filename = "webjournal_widget_weather_%s_RSS_expires" % \
journal_name
out = get_widget_html(yahoo_weather_rss_base_url % \
{'location': location, 'degree_unit': degree_unit},
cached_filename,
expire_time_filename,
journal_name)
if weather_icon_only == 'true':
try:
out = '<img alt="" src="%s" align="bottom" />' % \
re_image_pattern.findall(out)[0][1]
except:
register_exception(req=bfo.req)
out = ''
elif display_weather_icon == 'false':
try:
out = re.sub(re_image_pattern, "", out)
except:
register_exception(req=bfo.req)
out = ''
return out
def escape_values(bfo):
"""
Called by BibFormat in order to check if output of this element
should be escaped.
"""
return 0
def get_widget_html(yahoo_weather_rss, cached_filename, expire_time_filename, journal_name):
"""
weather forecast using Yahoo! Weather service
we check and store the "expires" data from the rss feed to decide when
an update is needed.
there always resides a cached version in cds CFG_CACHEDIR along with a flat
file that indicates the time when the feed expires.
"""
cached_weather_box = _get_weather_from_cache(journal_name)
if cached_weather_box:
return cached_weather_box
# No HTML cache? Then read locally saved feed data, and even
# refresh it from Yahoo if it has expired.
try:
cached_rss_path = os.path.join(CFG_CACHEDIR, cached_filename)
assert(os.path.exists(cached_rss_path))
weather_feed = feedparser.parse(cached_rss_path)
assert(not weather_feed.bozo_exception)
except:
try:
_update_feed(yahoo_weather_rss, cached_filename, expire_time_filename)
weather_feed = feedparser.parse('%s/%s' % \
(CFG_CACHEDIR, cached_filename))
except:
return "<ul><li><i>" + _("No information available") + "</i></li></ul>"
now_in_gmt = time.gmtime()
try:
expire_time = time.strptime(open(expire_time_filename).read(),
"%a, %d %b %Y %H:%M:%S %Z")
diff = time.mktime(expire_time) - time.mktime(now_in_gmt)
except:
diff = -1
if diff < 0:
try:
_update_feed(yahoo_weather_rss, cached_filename, expire_time_filename)
weather_feed = feedparser.parse('%s/%s' % \
(CFG_CACHEDIR, cached_filename))
except:
return "<ul><li><i>" + _("No information available") + "</i></li></ul>"
# Construct the HTML. Well, simply take the one provided by
# Yahoo..
html = weather_feed.entries[0]['summary']
cache_weather(html, journal_name)
return html
def _get_weather_from_cache(journal_name):
"""
Try to get the weather information from cache. Return False if
cache does not exist
"""
cache_path = os.path.abspath('%s/webjournal/%s/weather.html' % \
(CFG_CACHEDIR,
journal_name))
if not cache_path.startswith(CFG_CACHEDIR + '/webjournal'):
# Make sure we are reading from correct directory (you
# know, in case there are '../../' inside journal name..)
return False
try:
last_update = os.path.getctime(cache_path)
except:
return False
now = time.time()
if (last_update + 15*60) < now:
# invalidate after 15 minutes
return False
try:
cached_file = open(cache_path).read()
except:
return False
return cached_file
def cache_weather(html, journal_name):
"""
Caches the weather box for 30 minutes.
"""
if not CFG_ACCESS_CONTROL_LEVEL_SITE == 2:
cache_path = os.path.abspath('%s/webjournal/%s/weather.html' % \
(CFG_CACHEDIR,
journal_name))
if cache_path.startswith(CFG_CACHEDIR + '/webjournal'):
# Do not try to cache if the journal name led us to some
# other directory ('../../' inside journal name for
# example)
cache_dir = CFG_CACHEDIR + '/webjournal/' + journal_name
if not os.path.isdir(cache_dir):
os.makedirs(cache_dir)
cache_file = file(cache_path, "w")
cache_file.write(html)
cache_file.close()
def _update_feed(yahoo_weather_rss, cached_filename, expire_time_filename):
"""
Retrieve the latest weather information from Yahoo and write it to
'cached_filename'. Also write the supposed expiration date
provided by Yahoo to 'expire_time_filename'.
"""
default_timeout = socket.getdefaulttimeout()
socket.setdefaulttimeout(2.0)
try:
try:
feed = WEBJOURNAL_OPENER.open(yahoo_weather_rss)
except:
return
finally:
socket.setdefaulttimeout(default_timeout)
cached_file = open('%s/%s' % (CFG_CACHEDIR, cached_filename), 'w')
cached_file.write(feed.read())
cached_file.close()
feed_data = feedparser.parse(yahoo_weather_rss)
expire_time = feed_data.headers['expires']
expire_file = open('%s/%s' % (CFG_CACHEDIR, expire_time_filename), 'w')
expire_file.write(expire_time)
expire_file.close()
_ = gettext_set_language('en')
dummy = _("Under the CERN sky")
diff --git a/invenio/modules/bulletin/format_elements/bfe_webjournal_widget_whatsNew.py b/invenio/modules/bulletin/format_elements/bfe_webjournal_widget_whatsNew.py
index 849592bfb..5b0ab28bf 100644
--- a/invenio/modules/bulletin/format_elements/bfe_webjournal_widget_whatsNew.py
+++ b/invenio/modules/bulletin/format_elements/bfe_webjournal_widget_whatsNew.py
@@ -1,299 +1,299 @@
# -*- coding: utf-8 -*-
## $Id: bfe_webjournal_widget_whatsNew.py,v 1.24 2009/01/27 07:25:12 jerome Exp $
##
## This file is part of Invenio.
## Copyright (C) 2009, 2010, 2011 CERN.
##
## Invenio is free software; you can redistribute it and/or
## modify it under the terms of the GNU General Public License as
## published by the Free Software Foundation; either version 2 of the
## License, or (at your option) any later version.
##
## Invenio is distributed in the hope that it will be useful, but
## WITHOUT ANY WARRANTY; without even the implied warranty of
## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
## General Public License for more details.
##
## You should have received a copy of the GNU General Public License
## along with Invenio; if not, write to the Free Software Foundation, Inc.,
## 59 Temple Place, Suite 330, Boston, MA 02111-1307, USA.
"""
WebJournal widget - Display the index of the lastest articles,
including 'breaking news'.
"""
import time
import os
from invenio.search_engine import search_pattern, record_exists
from invenio.modules.formatter.engine import BibFormatObject
from invenio.config import \
CFG_SITE_URL, \
CFG_CACHEDIR, \
CFG_ACCESS_CONTROL_LEVEL_SITE
from invenio.webjournal_utils import \
parse_url_string, \
make_journal_url, \
get_journal_info_path, \
get_journal_categories, \
get_journal_articles, \
get_current_issue
from invenio.base.i18n import gettext_set_language
-from invenio.errorlib import register_exception
+from invenio.ext.logging import register_exception
def format_element(bfo, latest_issue_only='yes', newest_articles_only='yes',
link_category_headers='yes', display_categories='', hide_when_only_new_records="no"):
"""
Display the index to the newest articles (of the latest issue, or of the displayed issue)
@param latest_issue_only: if 'yes', always display articles of the latest issue, even if viewing a past issue
@param newest_articles_only: only display new articles, not those that also appeared in previous issues
@param link_category_headers: if yes, category headers link to index page of that category
@param display_categories: comma-separated list of categories to display. If none, display all
@param hide_when_only_new_records: if 'yes' display new articles only if old articles exist in this issue
"""
args = parse_url_string(bfo.user_info['uri'])
journal_name = args["journal_name"]
ln = bfo.lang
_ = gettext_set_language(ln)
if latest_issue_only.lower() == 'yes':
issue_number = get_current_issue(bfo.lang, journal_name)
else:
issue_number = args["issue"]
# Try to get HTML from cache
if args['verbose'] == 0:
cached_html = _get_whatsNew_from_cache(journal_name, issue_number, ln)
if cached_html:
return cached_html
# No cache? Build from scratch
# 1. Get the articles
journal_categories = get_journal_categories(journal_name,
issue_number)
if display_categories:
display_categories = display_categories.lower().split(',')
journal_categories = [category for category in journal_categories \
if category.lower() in display_categories]
whats_new_articles = {}
for category in journal_categories:
whats_new_articles[category] = get_journal_articles(journal_name,
issue_number,
category,
newest_only=newest_articles_only.lower() == 'yes')
# Do we want to display new articles only if they have been added
# to an issue that contains non-new records?
if hide_when_only_new_records.lower() == "yes":
# First gather all articles in this issue
all_whats_new_articles = {}
for category in journal_categories:
all_whats_new_articles[category] = get_journal_articles(journal_name,
issue_number,
category,
newest_first=True,
newest_only=False)
# Then check if we have some articles at position > -1
has_old_articles = False
for articles in all_whats_new_articles.values():
if len([order for order in articles.keys() if order > -1]) > 0:
has_old_articles = True
break
if not has_old_articles:
# We don't have old articles? Thend don't consider any
for category in journal_categories:
whats_new_articles[category] = {}
# 2. Build the HTML
html_out = _get_breaking_news(ln, journal_name)
for category in journal_categories:
articles_in_category = whats_new_articles[category]
html_articles_in_category = ""
# Generate the list of articles in this category
order_numbers = articles_in_category.keys()
order_numbers.sort()
for order in order_numbers:
articles = articles_in_category[order]
for recid in articles:
link = make_journal_url(bfo.user_info['uri'], {'journal_name': journal_name,
'issue_number': issue_number.split('/')[0],
'issue_year': issue_number.split('/')[1],
'category': category,
'recid': recid,
'ln': bfo.lang})
temp_rec = BibFormatObject(recid)
if ln == 'fr':
try:
title = temp_rec.fields('246_1a')[0]
except:
try:
title = temp_rec.field('245__a')
except:
continue
else:
try:
title = temp_rec.field('245__a')
except:
continue
try:
html_articles_in_category += '<li><a href="%s">%s</a></li>' % \
(link, title)
except:
pass
if html_articles_in_category:
# Good, we found some new articles for this category.
# Then insert the genereated results into a larger list
# with category as "parent".
html_out += '<li>'
if link_category_headers.lower() == 'yes':
html_out += '<a href="'
html_out += make_journal_url(bfo.user_info['uri'],
{'journal_name': journal_name,
'issue_number': issue_number.split('/')[0],
'issue_year': issue_number.split('/')[1],
'category': category,
'recid': '',
'ln': bfo.lang})
html_out += '" class="whatsNewCategory">%s</a>' % _(category)
else:
html_out += '<span class="whatsNewCategory">%s</span>' % _(category)
html_out += '<ul class="whatsNewItem">'
html_out += html_articles_in_category
html_out += '</ul></li>'
if not html_out:
html_out = '<i>' + _('There are no new articles for the moment') + '</i>'
else:
html_out = '<ul class="whatsNew">' + html_out + '</ul>'
if args['verbose'] == 0:
cache_whatsNew(html_out, journal_name, issue_number, ln)
return html_out
def _get_breaking_news(lang, journal_name):
"""
Gets the 'Breaking News' articles that are currently active according to
start and end dates.
"""
# CERN Bulletin only
if not journal_name.lower() == 'cernbulletin':
return ''
# Look for active breaking news
breaking_news_recids = [recid for recid in search_pattern(p='980__a:BULLETINBREAKING') \
if record_exists(recid) == 1]
today = time.mktime(time.localtime())
breaking_news = ""
for recid in breaking_news_recids:
temp_rec = BibFormatObject(recid)
try:
end_date = time.mktime(time.strptime(temp_rec.field("925__b"),
"%m/%d/%Y"))
except:
end_date = time.mktime(time.strptime("01/01/1970", "%m/%d/%Y"))
if end_date < today:
continue
try:
start_date = time.mktime(time.strptime(temp_rec.field("925__a"),
"%m/%d/%Y"))
except:
start_date = time.mktime(time.strptime("01/01/2050", "%m/%d/%Y"))
if start_date > today:
continue
publish_date = temp_rec.field("269__c")
if lang == 'fr':
title = temp_rec.field("246_1a")
else:
title = temp_rec.field("245__a")
breaking_news += '''
<h2 class="%s">%s<br/>
<strong>
<a href="%s/journal/popup?name=%s&amp;type=breaking_news&amp;record=%s&amp;ln=%s" target="_blank">%s</a>
</strong>
</h2>
''' % ("", publish_date, CFG_SITE_URL, journal_name, recid, lang, title)
if breaking_news:
breaking_news = '<li>%s</li>' % breaking_news
return breaking_news
def _get_whatsNew_from_cache(journal_name, issue, ln):
"""
Try to get the "whats new" box from cache.
"""
issue = issue.replace("/", "_")
issue_number, year = issue.split("_", 1)
cache_path = os.path.abspath('%s/webjournal/%s/%s/%s/whatsNew_%s.html' % \
(CFG_CACHEDIR,
journal_name,
year, issue_number,
ln))
if not cache_path.startswith(CFG_CACHEDIR + '/webjournal'):
# Make sure we are reading from correct directory (you
# know, in case there are '../../' inside journal name..)
return False
try:
last_update = os.path.getctime(cache_path)
except:
return False
try:
# Get last journal update, based on journal info file last
# modification time
journal_info_path = get_journal_info_path(journal_name)
last_journal_update = os.path.getctime(journal_info_path)
except:
return False
now = time.time()
if ((last_update + 30*60) < now) or \
(last_journal_update > last_update):
# invalidate after 30 minutes or if last journal release is
# newer than cache
return False
try:
cached_file = open(cache_path).read()
except:
return False
return cached_file
def cache_whatsNew(html, journal_name, issue, ln):
"""
caches the whats new box for 30 minutes.
"""
if not CFG_ACCESS_CONTROL_LEVEL_SITE == 2:
issue = issue.replace("/", "_")
issue_number, year = issue.split("_", 1)
cache_path = os.path.abspath('%s/webjournal/%s/%s/%s/whatsNew_%s.html' % \
(CFG_CACHEDIR,
journal_name,
year, issue_number,
ln))
if cache_path.startswith(CFG_CACHEDIR + '/webjournal'):
# Do not try to cache if the journal name led us to some
# other directory ('../../' inside journal name for
# example)
try:
cache_dir = os.path.dirname(cache_path)
if not os.path.isdir(cache_dir):
os.makedirs(cache_dir)
cache_file = file(cache_path, "w")
cache_file.write(html)
cache_file.close()
except Exception:
register_exception(req=None,
prefix="Could not store 'Whats new' section",
alert_admin=True)
def escape_values(bfo):
"""
Called by BibFormat in order to check if output of this element
should be escaped.
"""
return 0
_ = gettext_set_language('en')
dummy = _("What's new")
diff --git a/invenio/modules/comments/api.py b/invenio/modules/comments/api.py
index ca78a0845..b4977c3d5 100644
--- a/invenio/modules/comments/api.py
+++ b/invenio/modules/comments/api.py
@@ -1,2180 +1,2180 @@
# -*- coding: utf-8 -*-
## This file is part of Invenio.
## Copyright (C) 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2012, 2013 CERN.
##
## Invenio is free software; you can redistribute it and/or
## modify it under the terms of the GNU General Public License as
## published by the Free Software Foundation; either version 2 of the
## License, or (at your option) any later version.
##
## Invenio is distributed in the hope that it will be useful, but
## WITHOUT ANY WARRANTY; without even the implied warranty of
## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
## General Public License for more details.
##
## You should have received a copy of the GNU General Public License
## along with Invenio; if not, write to the Free Software Foundation, Inc.,
## 59 Temple Place, Suite 330, Boston, MA 02111-1307, USA.
""" Comments and reviews for records """
__revision__ = "$Id$"
# non Invenio imports:
import time
import math
import os
import cgi
import re
from datetime import datetime, timedelta
# Invenio imports:
from invenio.dbquery import run_sql
from invenio.config import CFG_PREFIX, \
CFG_SITE_LANG, \
CFG_WEBALERT_ALERT_ENGINE_EMAIL,\
CFG_SITE_SUPPORT_EMAIL,\
CFG_WEBCOMMENT_ALERT_ENGINE_EMAIL,\
CFG_SITE_URL,\
CFG_SITE_NAME,\
CFG_WEBCOMMENT_ALLOW_REVIEWS,\
CFG_WEBCOMMENT_ALLOW_SHORT_REVIEWS,\
CFG_WEBCOMMENT_ALLOW_COMMENTS,\
CFG_WEBCOMMENT_ADMIN_NOTIFICATION_LEVEL,\
CFG_WEBCOMMENT_NB_REPORTS_BEFORE_SEND_EMAIL_TO_ADMIN,\
CFG_WEBCOMMENT_TIMELIMIT_PROCESSING_COMMENTS_IN_SECONDS,\
CFG_WEBCOMMENT_DEFAULT_MODERATOR, \
CFG_SITE_RECORD, \
CFG_WEBCOMMENT_EMAIL_REPLIES_TO, \
CFG_WEBCOMMENT_ROUND_DATAFIELD, \
CFG_WEBCOMMENT_RESTRICTION_DATAFIELD, \
CFG_WEBCOMMENT_MAX_COMMENT_THREAD_DEPTH
from invenio.webmessage_mailutils import \
email_quote_txt, \
email_quoted_txt2html
from invenio.htmlutils import tidy_html
from invenio.webuser import get_user_info, get_email, collect_user_info
from invenio.utils.date import convert_datetext_to_dategui, \
datetext_default, \
convert_datestruct_to_datetext
from invenio.ext.email import send_email
-from invenio.errorlib import register_exception
+from invenio.ext.logging import register_exception
from invenio.base.i18n import wash_language, gettext_set_language
from invenio.utils.url import wash_url_argument
from .config import CFG_WEBCOMMENT_ACTION_CODE, \
InvenioWebCommentError, \
InvenioWebCommentWarning
from invenio.access_control_engine import acc_authorize_action
from invenio.search_engine import \
guess_primary_collection_of_a_record, \
check_user_can_view_record, \
get_collection_reclist, \
get_colID
from invenio.search_engine_utils import get_fieldvalues
from invenio.utils.htmlwasher import EmailWasher
try:
import invenio.template
webcomment_templates = invenio.template.load('webcomment')
except:
pass
def perform_request_display_comments_or_remarks(req, recID, display_order='od', display_since='all', nb_per_page=100, page=1, ln=CFG_SITE_LANG, voted=-1, reported=-1, subscribed=0, reviews=0, uid=-1, can_send_comments=False, can_attach_files=False, user_is_subscribed_to_discussion=False, user_can_unsubscribe_from_discussion=False, display_comment_rounds=None):
"""
Returns all the comments (reviews) of a specific internal record or external basket record.
@param recID: record id where (internal record IDs > 0) or (external basket record IDs < -100)
@param display_order: hh = highest helpful score, review only
lh = lowest helpful score, review only
hs = highest star score, review only
ls = lowest star score, review only
od = oldest date
nd = newest date
@param display_since: all= no filtering by date
nd = n days ago
nw = n weeks ago
nm = n months ago
ny = n years ago
where n is a single digit integer between 0 and 9
@param nb_per_page: number of results per page
@param page: results page
@param voted: boolean, active if user voted for a review, see perform_request_vote function
@param reported: boolean, active if user reported a certain comment/review, perform_request_report function
@param subscribed: int, 1 if user just subscribed to discussion, -1 if unsubscribed
@param reviews: boolean, enabled if reviews, disabled for comments
@param uid: the id of the user who is reading comments
@param can_send_comments: if user can send comment or not
@param can_attach_files: if user can attach file to comment or not
@param user_is_subscribed_to_discussion: True if user already receives new comments by email
@param user_can_unsubscribe_from_discussion: True is user is allowed to unsubscribe from discussion
@return html body.
"""
_ = gettext_set_language(ln)
warnings = []
nb_reviews = 0
nb_comments = 0
# wash arguments
recID = wash_url_argument(recID, 'int')
ln = wash_language(ln)
display_order = wash_url_argument(display_order, 'str')
display_since = wash_url_argument(display_since, 'str')
nb_per_page = wash_url_argument(nb_per_page, 'int')
page = wash_url_argument(page, 'int')
voted = wash_url_argument(voted, 'int')
reported = wash_url_argument(reported, 'int')
reviews = wash_url_argument(reviews, 'int')
# vital argument check
(valid, error_body) = check_recID_is_in_range(recID, warnings, ln)
if not(valid):
return error_body
# CERN hack begins: filter out ATLAS comments
from invenio.config import CFG_CERN_SITE
if CFG_CERN_SITE:
restricted_comments_p = False
for report_number in get_fieldvalues(recID, '088__a'):
if report_number.startswith("ATL-"):
restricted_comments_p = True
break
if restricted_comments_p:
err_code, err_msg = acc_authorize_action(uid, 'viewrestrcoll',
collection='ATLAS Communications')
if err_code:
return err_msg
# CERN hack ends
# Query the database and filter results
user_info = collect_user_info(uid)
res = query_retrieve_comments_or_remarks(recID, display_order, display_since, reviews, user_info=user_info)
# res2 = query_retrieve_comments_or_remarks(recID, display_order, display_since, not reviews, user_info=user_info)
nb_res = len(res)
from invenio.webcommentadminlib import get_nb_reviews, get_nb_comments
nb_reviews = get_nb_reviews(recID, count_deleted=False)
nb_comments = get_nb_comments(recID, count_deleted=False)
# checking non vital arguemnts - will be set to default if wrong
#if page <= 0 or page.lower() != 'all':
if page < 0:
page = 1
try:
raise InvenioWebCommentWarning(_('Bad page number --> showing first page.'))
except InvenioWebCommentWarning, exc:
register_exception(stream='warning', req=req)
warnings.append((exc.message, ''))
#warnings.append(('WRN_WEBCOMMENT_INVALID_PAGE_NB',))
if nb_per_page < 0:
nb_per_page = 100
try:
raise InvenioWebCommentWarning(_('Bad number of results per page --> showing 10 results per page.'))
except InvenioWebCommentWarning, exc:
register_exception(stream='warning', req=req)
warnings.append((exc.message, ''))
#warnings.append(('WRN_WEBCOMMENT_INVALID_NB_RESULTS_PER_PAGE',))
if CFG_WEBCOMMENT_ALLOW_REVIEWS and reviews:
if display_order not in ['od', 'nd', 'hh', 'lh', 'hs', 'ls']:
display_order = 'hh'
try:
raise InvenioWebCommentWarning(_('Bad display order --> showing most helpful first.'))
except InvenioWebCommentWarning, exc:
register_exception(stream='warning', req=req)
warnings.append((exc.message, ''))
#warnings.append(('WRN_WEBCOMMENT_INVALID_REVIEW_DISPLAY_ORDER',))
else:
if display_order not in ['od', 'nd']:
display_order = 'od'
try:
raise InvenioWebCommentWarning(_('Bad display order --> showing oldest first.'))
except InvenioWebCommentWarning, exc:
register_exception(stream='warning', req=req)
warnings.append((exc.message, ''))
#warnings.append(('WRN_WEBCOMMENT_INVALID_DISPLAY_ORDER',))
if not display_comment_rounds:
display_comment_rounds = []
# filter results according to page and number of reults per page
if nb_per_page > 0:
if nb_res > 0:
last_page = int(math.ceil(nb_res / float(nb_per_page)))
else:
last_page = 1
if page > last_page:
page = 1
try:
raise InvenioWebCommentWarning(_('Bad page number --> showing first page.'))
except InvenioWebCommentWarning, exc:
register_exception(stream='warning', req=req)
warnings.append((exc.message, ''))
#warnings.append(("WRN_WEBCOMMENT_INVALID_PAGE_NB",))
if nb_res > nb_per_page: # if more than one page of results
if page < last_page:
res = res[(page-1)*(nb_per_page) : (page*nb_per_page)]
else:
res = res[(page-1)*(nb_per_page) : ]
else: # one page of results
pass
else:
last_page = 1
# Add information regarding visibility of comment for user
user_collapsed_comments = get_user_collapsed_comments_for_record(uid, recID)
if reviews:
res = [row[:] + (row[10] in user_collapsed_comments,) for row in res]
else:
res = [row[:] + (row[6] in user_collapsed_comments,) for row in res]
# Send to template
avg_score = 0.0
if not CFG_WEBCOMMENT_ALLOW_COMMENTS and not CFG_WEBCOMMENT_ALLOW_REVIEWS: # comments not allowed by admin
try:
raise InvenioWebCommentError(_('Comments on records have been disallowed by the administrator.'))
except InvenioWebCommentError, exc:
register_exception(req=req)
body = webcomment_templates.tmpl_error(exc.message, ln)
return body
# errors.append(('ERR_WEBCOMMENT_COMMENTS_NOT_ALLOWED',))
if reported > 0:
try:
raise InvenioWebCommentWarning(_('Your feedback has been recorded, many thanks.'))
except InvenioWebCommentWarning, exc:
register_exception(stream='warning', req=req)
warnings.append((exc.message, 'green'))
#warnings.append(('WRN_WEBCOMMENT_FEEDBACK_RECORDED',))
elif reported == 0:
try:
raise InvenioWebCommentWarning(_('You have already reported an abuse for this comment.'))
except InvenioWebCommentWarning, exc:
register_exception(stream='warning', req=req)
warnings.append((exc.message, ''))
#warnings.append(('WRN_WEBCOMMENT_ALREADY_REPORTED',))
elif reported == -2:
try:
raise InvenioWebCommentWarning(_('The comment you have reported no longer exists.'))
except InvenioWebCommentWarning, exc:
register_exception(stream='warning', req=req)
warnings.append((exc.message, ''))
#warnings.append(('WRN_WEBCOMMENT_INVALID_REPORT',))
if CFG_WEBCOMMENT_ALLOW_REVIEWS and reviews:
avg_score = calculate_avg_score(res)
if voted > 0:
try:
raise InvenioWebCommentWarning(_('Your feedback has been recorded, many thanks.'))
except InvenioWebCommentWarning, exc:
register_exception(stream='warning', req=req)
warnings.append((exc.message, 'green'))
#warnings.append(('WRN_WEBCOMMENT_FEEDBACK_RECORDED',))
elif voted == 0:
try:
raise InvenioWebCommentWarning(_('Sorry, you have already voted. This vote has not been recorded.'))
except InvenioWebCommentWarning, exc:
register_exception(stream='warning', req=req)
warnings.append((exc.message, ''))
#warnings.append(('WRN_WEBCOMMENT_ALREADY_VOTED',))
if subscribed == 1:
try:
raise InvenioWebCommentWarning(_('You have been subscribed to this discussion. From now on, you will receive an email whenever a new comment is posted.'))
except InvenioWebCommentWarning, exc:
register_exception(stream='warning', req=req)
warnings.append((exc.message, 'green'))
#warnings.append(('WRN_WEBCOMMENT_SUBSCRIBED',))
elif subscribed == -1:
try:
raise InvenioWebCommentWarning(_('You have been unsubscribed from this discussion.'))
except InvenioWebCommentWarning, exc:
register_exception(stream='warning', req=req)
warnings.append((exc.message, 'green'))
#warnings.append(('WRN_WEBCOMMENT_UNSUBSCRIBED',))
grouped_comments = group_comments_by_round(res, reviews)
# Clean list of comments round names
if not display_comment_rounds:
display_comment_rounds = []
elif 'all' in display_comment_rounds:
display_comment_rounds = [cmtgrp[0] for cmtgrp in grouped_comments]
elif 'latest' in display_comment_rounds:
if grouped_comments:
display_comment_rounds.append(grouped_comments[-1][0])
display_comment_rounds.remove('latest')
body = webcomment_templates.tmpl_get_comments(req,
recID,
ln,
nb_per_page, page, last_page,
display_order, display_since,
CFG_WEBCOMMENT_ALLOW_REVIEWS,
grouped_comments, nb_comments, avg_score,
warnings,
border=0,
reviews=reviews,
total_nb_reviews=nb_reviews,
uid=uid,
can_send_comments=can_send_comments,
can_attach_files=can_attach_files,
user_is_subscribed_to_discussion=\
user_is_subscribed_to_discussion,
user_can_unsubscribe_from_discussion=\
user_can_unsubscribe_from_discussion,
display_comment_rounds=display_comment_rounds)
return body
def perform_request_vote(cmt_id, client_ip_address, value, uid=-1):
"""
Vote positively or negatively for a comment/review
@param cmt_id: review id
@param value: +1 for voting positively
-1 for voting negatively
@return: integer 1 if successful, integer 0 if not
"""
cmt_id = wash_url_argument(cmt_id, 'int')
client_ip_address = wash_url_argument(client_ip_address, 'str')
value = wash_url_argument(value, 'int')
uid = wash_url_argument(uid, 'int')
if cmt_id > 0 and value in [-1, 1] and check_user_can_vote(cmt_id, client_ip_address, uid):
action_date = convert_datestruct_to_datetext(time.localtime())
action_code = CFG_WEBCOMMENT_ACTION_CODE['VOTE']
query = """INSERT INTO cmtACTIONHISTORY (id_cmtRECORDCOMMENT,
id_bibrec, id_user, client_host, action_time,
action_code)
VALUES (%s, NULL ,%s, inet_aton(%s), %s, %s)"""
params = (cmt_id, uid, client_ip_address, action_date, action_code)
run_sql(query, params)
return query_record_useful_review(cmt_id, value)
else:
return 0
def check_user_can_comment(recID, client_ip_address, uid=-1):
""" Check if a user hasn't already commented within the last seconds
time limit: CFG_WEBCOMMENT_TIMELIMIT_PROCESSING_COMMENTS_IN_SECONDS
@param recID: record id
@param client_ip_address: IP => use: str(req.remote_ip)
@param uid: user id, as given by invenio.webuser.getUid(req)
"""
recID = wash_url_argument(recID, 'int')
client_ip_address = wash_url_argument(client_ip_address, 'str')
uid = wash_url_argument(uid, 'int')
max_action_time = time.time() - CFG_WEBCOMMENT_TIMELIMIT_PROCESSING_COMMENTS_IN_SECONDS
max_action_time = convert_datestruct_to_datetext(time.localtime(max_action_time))
action_code = CFG_WEBCOMMENT_ACTION_CODE['ADD_COMMENT']
query = """SELECT id_bibrec
FROM cmtACTIONHISTORY
WHERE id_bibrec=%s AND
action_code=%s AND
action_time>%s
"""
params = (recID, action_code, max_action_time)
if uid < 0:
query += " AND client_host=inet_aton(%s)"
params += (client_ip_address,)
else:
query += " AND id_user=%s"
params += (uid,)
res = run_sql(query, params)
return len(res) == 0
def check_user_can_review(recID, client_ip_address, uid=-1):
""" Check if a user hasn't already reviewed within the last seconds
time limit: CFG_WEBCOMMENT_TIMELIMIT_PROCESSING_REVIEWS_IN_SECONDS
@param recID: record ID
@param client_ip_address: IP => use: str(req.remote_ip)
@param uid: user id, as given by invenio.webuser.getUid(req)
"""
action_code = CFG_WEBCOMMENT_ACTION_CODE['ADD_REVIEW']
query = """SELECT id_bibrec
FROM cmtACTIONHISTORY
WHERE id_bibrec=%s AND
action_code=%s
"""
params = (recID, action_code)
if uid < 0:
query += " AND client_host=inet_aton(%s)"
params += (client_ip_address,)
else:
query += " AND id_user=%s"
params += (uid,)
res = run_sql(query, params)
return len(res) == 0
def check_user_can_vote(cmt_id, client_ip_address, uid=-1):
""" Checks if a user hasn't already voted
@param cmt_id: comment id
@param client_ip_address: IP => use: str(req.remote_ip)
@param uid: user id, as given by invenio.webuser.getUid(req)
"""
cmt_id = wash_url_argument(cmt_id, 'int')
client_ip_address = wash_url_argument(client_ip_address, 'str')
uid = wash_url_argument(uid, 'int')
query = """SELECT id_cmtRECORDCOMMENT
FROM cmtACTIONHISTORY
WHERE id_cmtRECORDCOMMENT=%s"""
params = (cmt_id,)
if uid < 0:
query += " AND client_host=inet_aton(%s)"
params += (client_ip_address,)
else:
query += " AND id_user=%s"
params += (uid, )
res = run_sql(query, params)
return (len(res) == 0)
def get_comment_collection(cmt_id):
"""
Extract the collection where the comment is written
"""
query = "SELECT id_bibrec FROM cmtRECORDCOMMENT WHERE id=%s"
recid = run_sql(query, (cmt_id,))
record_primary_collection = guess_primary_collection_of_a_record(recid[0][0])
return record_primary_collection
def get_collection_moderators(collection):
"""
Return the list of comment moderators for the given collection.
"""
from invenio.access_control_engine import acc_get_authorized_emails
res = list(acc_get_authorized_emails('moderatecomments', collection=collection))
if not res:
return [CFG_WEBCOMMENT_DEFAULT_MODERATOR,]
return res
def perform_request_report(cmt_id, client_ip_address, uid=-1):
"""
Report a comment/review for inappropriate content.
Will send an email to the administrator if number of reports is a multiple of CFG_WEBCOMMENT_NB_REPORTS_BEFORE_SEND_EMAIL_TO_ADMIN
@param cmt_id: comment id
@return: integer 1 if successful, integer 0 if not. -2 if comment does not exist
"""
cmt_id = wash_url_argument(cmt_id, 'int')
if cmt_id <= 0:
return 0
(query_res, nb_abuse_reports) = query_record_report_this(cmt_id)
if query_res == 0:
return 0
elif query_res == -2:
return -2
if not(check_user_can_report(cmt_id, client_ip_address, uid)):
return 0
action_date = convert_datestruct_to_datetext(time.localtime())
action_code = CFG_WEBCOMMENT_ACTION_CODE['REPORT_ABUSE']
query = """INSERT INTO cmtACTIONHISTORY (id_cmtRECORDCOMMENT, id_bibrec,
id_user, client_host, action_time, action_code)
VALUES (%s, NULL, %s, inet_aton(%s), %s, %s)"""
params = (cmt_id, uid, client_ip_address, action_date, action_code)
run_sql(query, params)
if nb_abuse_reports % CFG_WEBCOMMENT_NB_REPORTS_BEFORE_SEND_EMAIL_TO_ADMIN == 0:
(cmt_id2,
id_bibrec,
id_user,
cmt_body,
cmt_date,
cmt_star,
cmt_vote, cmt_nb_votes_total,
cmt_title,
cmt_reported,
round_name,
restriction) = query_get_comment(cmt_id)
(user_nb_abuse_reports,
user_votes,
user_nb_votes_total) = query_get_user_reports_and_votes(int(id_user))
(nickname, user_email, last_login) = query_get_user_contact_info(id_user)
from_addr = '%s Alert Engine <%s>' % (CFG_SITE_NAME, CFG_WEBALERT_ALERT_ENGINE_EMAIL)
comment_collection = get_comment_collection(cmt_id)
to_addrs = get_collection_moderators(comment_collection)
subject = "A comment has been reported as inappropriate by a user"
body = '''
The following comment has been reported a total of %(cmt_reported)s times.
Author: nickname = %(nickname)s
email = %(user_email)s
user_id = %(uid)s
This user has:
total number of reports = %(user_nb_abuse_reports)s
%(votes)s
Comment: comment_id = %(cmt_id)s
record_id = %(id_bibrec)s
date written = %(cmt_date)s
nb reports = %(cmt_reported)s
%(review_stuff)s
body =
---start body---
%(cmt_body)s
---end body---
Please go to the record page %(comment_admin_link)s to delete this message if necessary. A warning will be sent to the user in question.''' % \
{ 'cfg-report_max' : CFG_WEBCOMMENT_NB_REPORTS_BEFORE_SEND_EMAIL_TO_ADMIN,
'nickname' : nickname,
'user_email' : user_email,
'uid' : id_user,
'user_nb_abuse_reports' : user_nb_abuse_reports,
'user_votes' : user_votes,
'votes' : CFG_WEBCOMMENT_ALLOW_REVIEWS and \
"total number of positive votes\t= %s\n\t\ttotal number of negative votes\t= %s" % \
(user_votes, (user_nb_votes_total - user_votes)) or "\n",
'cmt_id' : cmt_id,
'id_bibrec' : id_bibrec,
'cmt_date' : cmt_date,
'cmt_reported' : cmt_reported,
'review_stuff' : CFG_WEBCOMMENT_ALLOW_REVIEWS and \
"star score\t= %s\n\treview title\t= %s" % (cmt_star, cmt_title) or "",
'cmt_body' : cmt_body,
'comment_admin_link' : CFG_SITE_URL + "/"+ CFG_SITE_RECORD +"/" + str(id_bibrec) + '/comments#' + str(cmt_id),
'user_admin_link' : "user_admin_link" #! FIXME
}
#FIXME to be added to email when websession module is over:
#If you wish to ban the user, you can do so via the User Admin Panel %(user_admin_link)s.
send_email(from_addr, to_addrs, subject, body)
return 1
def check_user_can_report(cmt_id, client_ip_address, uid=-1):
""" Checks if a user hasn't already reported a comment
@param cmt_id: comment id
@param client_ip_address: IP => use: str(req.remote_ip)
@param uid: user id, as given by invenio.webuser.getUid(req)
"""
cmt_id = wash_url_argument(cmt_id, 'int')
client_ip_address = wash_url_argument(client_ip_address, 'str')
uid = wash_url_argument(uid, 'int')
query = """SELECT id_cmtRECORDCOMMENT
FROM cmtACTIONHISTORY
WHERE id_cmtRECORDCOMMENT=%s"""
params = (uid,)
if uid < 0:
query += " AND client_host=inet_aton(%s)"
params += (client_ip_address,)
else:
query += " AND id_user=%s"
params += (uid,)
res = run_sql(query, params)
return (len(res) == 0)
def query_get_user_contact_info(uid):
"""
Get the user contact information
@return: tuple (nickname, email, last_login), if none found return ()
Note: for the moment, if no nickname, will return email address up to the '@'
"""
query1 = """SELECT nickname, email,
DATE_FORMAT(last_login, '%%Y-%%m-%%d %%H:%%i:%%s')
FROM user WHERE id=%s"""
params1 = (uid,)
res1 = run_sql(query1, params1)
if res1:
return res1[0]
else:
return ()
def query_get_user_reports_and_votes(uid):
"""
Retrieve total number of reports and votes of a particular user
@param uid: user id
@return: tuple (total_nb_reports, total_nb_votes_yes, total_nb_votes_total)
if none found return ()
"""
query1 = """SELECT nb_votes_yes,
nb_votes_total,
nb_abuse_reports
FROM cmtRECORDCOMMENT
WHERE id_user=%s"""
params1 = (uid,)
res1 = run_sql(query1, params1)
if len(res1) == 0:
return ()
nb_votes_yes = nb_votes_total = nb_abuse_reports = 0
for cmt_tuple in res1:
nb_votes_yes += int(cmt_tuple[0])
nb_votes_total += int(cmt_tuple[1])
nb_abuse_reports += int(cmt_tuple[2])
return (nb_abuse_reports, nb_votes_yes, nb_votes_total)
def query_get_comment(comID):
"""
Get all fields of a comment
@param comID: comment id
@return: tuple (comID, id_bibrec, id_user, body, date_creation, star_score, nb_votes_yes, nb_votes_total, title, nb_abuse_reports, round_name, restriction)
if none found return ()
"""
query1 = """SELECT id,
id_bibrec,
id_user,
body,
DATE_FORMAT(date_creation, '%%Y-%%m-%%d %%H:%%i:%%s'),
star_score,
nb_votes_yes,
nb_votes_total,
title,
nb_abuse_reports,
round_name,
restriction
FROM cmtRECORDCOMMENT
WHERE id=%s"""
params1 = (comID,)
res1 = run_sql(query1, params1)
if len(res1)>0:
return res1[0]
else:
return ()
def query_record_report_this(comID):
"""
Increment the number of reports for a comment
@param comID: comment id
@return: tuple (success, new_total_nb_reports_for_this_comment) where
success is integer 1 if success, integer 0 if not, -2 if comment does not exist
"""
#retrieve nb_abuse_reports
query1 = "SELECT nb_abuse_reports FROM cmtRECORDCOMMENT WHERE id=%s"
params1 = (comID,)
res1 = run_sql(query1, params1)
if len(res1) == 0:
return (-2, 0)
#increment and update
nb_abuse_reports = int(res1[0][0]) + 1
query2 = "UPDATE cmtRECORDCOMMENT SET nb_abuse_reports=%s WHERE id=%s"
params2 = (nb_abuse_reports, comID)
res2 = run_sql(query2, params2)
return (int(res2), nb_abuse_reports)
def query_record_useful_review(comID, value):
"""
private funciton
Adjust the number of useful votes and number of total votes for a comment.
@param comID: comment id
@param value: +1 or -1
@return: integer 1 if successful, integer 0 if not
"""
# retrieve nb_useful votes
query1 = "SELECT nb_votes_total, nb_votes_yes FROM cmtRECORDCOMMENT WHERE id=%s"
params1 = (comID,)
res1 = run_sql(query1, params1)
if len(res1)==0:
return 0
# modify and insert new nb_useful votes
nb_votes_yes = int(res1[0][1])
if value >= 1:
nb_votes_yes = int(res1[0][1]) + 1
nb_votes_total = int(res1[0][0]) + 1
query2 = "UPDATE cmtRECORDCOMMENT SET nb_votes_total=%s, nb_votes_yes=%s WHERE id=%s"
params2 = (nb_votes_total, nb_votes_yes, comID)
res2 = run_sql(query2, params2)
return int(res2)
def query_retrieve_comments_or_remarks(recID, display_order='od', display_since='0000-00-00 00:00:00',
ranking=0, limit='all', user_info=None):
"""
Private function
Retrieve tuple of comments or remarks from the database
@param recID: record id
@param display_order: hh = highest helpful score
lh = lowest helpful score
hs = highest star score
ls = lowest star score
od = oldest date
nd = newest date
@param display_since: datetime, e.g. 0000-00-00 00:00:00
@param ranking: boolean, enabled if reviews, disabled for comments
@param limit: number of comments/review to return
@return: tuple of comment where comment is
tuple (nickname, uid, date_creation, body, status, id) if ranking disabled or
tuple (nickname, uid, date_creation, body, status, nb_votes_yes, nb_votes_total, star_score, title, id)
Note: for the moment, if no nickname, will return email address up to '@'
"""
display_since = calculate_start_date(display_since)
order_dict = { 'hh' : "cmt.nb_votes_yes/(cmt.nb_votes_total+1) DESC, cmt.date_creation DESC ",
'lh' : "cmt.nb_votes_yes/(cmt.nb_votes_total+1) ASC, cmt.date_creation ASC ",
'ls' : "cmt.star_score ASC, cmt.date_creation DESC ",
'hs' : "cmt.star_score DESC, cmt.date_creation DESC ",
'nd' : "cmt.reply_order_cached_data DESC ",
'od' : "cmt.reply_order_cached_data ASC "
}
# Ranking only done for comments and when allowed
if ranking and recID > 0:
try:
display_order = order_dict[display_order]
except:
display_order = order_dict['od']
else:
# in case of recID > 0 => external record => no ranking!
ranking = 0
try:
if display_order[-1] == 'd':
display_order = order_dict[display_order]
else:
display_order = order_dict['od']
except:
display_order = order_dict['od']
#display_order = order_dict['nd']
query = """SELECT user.nickname,
cmt.id_user,
DATE_FORMAT(cmt.date_creation, '%%%%Y-%%%%m-%%%%d %%%%H:%%%%i:%%%%s'),
cmt.body,
cmt.status,
cmt.nb_abuse_reports,
%(ranking)s cmt.id,
cmt.round_name,
cmt.restriction,
%(reply_to_column)s
FROM cmtRECORDCOMMENT cmt LEFT JOIN user ON
user.id=cmt.id_user
WHERE cmt.id_bibrec=%%s
%(ranking_only)s
%(display_since)s
ORDER BY %(display_order)s
""" % {'ranking' : ranking and ' cmt.nb_votes_yes, cmt.nb_votes_total, cmt.star_score, cmt.title, ' or '',
'ranking_only' : ranking and ' AND cmt.star_score>0 ' or ' AND cmt.star_score=0 ',
# 'id_bibrec' : recID > 0 and 'cmt.id_bibrec' or 'cmt.id_bibrec_or_bskEXTREC',
# 'table' : recID > 0 and 'cmtRECORDCOMMENT' or 'bskRECORDCOMMENT',
'display_since' : display_since == '0000-00-00 00:00:00' and ' ' or 'AND cmt.date_creation>=\'%s\' ' % display_since,
'display_order': display_order,
'reply_to_column': recID > 0 and 'cmt.in_reply_to_id_cmtRECORDCOMMENT' or 'cmt.in_reply_to_id_bskRECORDCOMMENT'}
params = (recID,)
res = run_sql(query, params)
# return res
new_limit = limit
comments_list = []
for row in res:
if ranking:
# when dealing with reviews, row[12] holds restriction info:
restriction = row[12]
else:
# when dealing with comments, row[8] holds restriction info:
restriction = row[8]
if user_info and check_user_can_view_comment(user_info, None, restriction)[0] != 0:
# User cannot view comment. Look further
continue
comments_list.append(row)
if limit.isdigit():
new_limit -= 1
if limit < 1:
break
if comments_list:
if limit.isdigit():
return comments_list[:limit]
else:
return comments_list
return ()
## def get_comment_children(comID):
## """
## Returns the list of children (i.e. direct descendants) ordered by time of addition.
## @param comID: the ID of the comment for which we want to retrieve children
## @type comID: int
## @return the list of children
## @rtype: list
## """
## res = run_sql("SELECT id FROM cmtRECORDCOMMENT WHERE in_reply_to_id_cmtRECORDCOMMENT=%s", (comID,))
## return [row[0] for row in res]
## def get_comment_descendants(comID, depth=None):
## """
## Returns the list of descendants of the given comment, orderd from
## oldest to newest ("top-down"), down to depth specified as parameter.
## @param comID: the ID of the comment for which we want to retrieve descendant
## @type comID: int
## @param depth: the max depth down to which we want to retrieve
## descendants. Specify None for no limit, 1 for direct
## children only, etc.
## @return the list of ancestors
## @rtype: list(tuple(comment ID, descendants comments IDs))
## """
## if depth == 0:
## return (comID, [])
## res = run_sql("SELECT id FROM cmtRECORDCOMMENT WHERE in_reply_to_id_cmtRECORDCOMMENT=%s", (comID,))
## if res:
## children_comID = [row[0] for row in res]
## children_descendants = []
## if depth:
## depth -= 1
## children_descendants = [get_comment_descendants(child_comID, depth) for child_comID in children_comID]
## return (comID, children_descendants)
## else:
## return (comID, [])
def get_comment_ancestors(comID, depth=None):
"""
Returns the list of ancestors of the given comment, ordered from
oldest to newest ("top-down": direct parent of comID is at last position),
up to given depth
@param comID: the ID of the comment for which we want to retrieve ancestors
@type comID: int
@param depth: the maximum of levels up from the given comment we
want to retrieve ancestors. None for no limit, 1 for
direct parent only, etc.
@type depth: int
@return the list of ancestors
@rtype: list
"""
if depth == 0:
return []
res = run_sql("SELECT in_reply_to_id_cmtRECORDCOMMENT FROM cmtRECORDCOMMENT WHERE id=%s", (comID,))
if res:
parent_comID = res[0][0]
if parent_comID == 0:
return []
parent_ancestors = []
if depth:
depth -= 1
parent_ancestors = get_comment_ancestors(parent_comID, depth)
parent_ancestors.append(parent_comID)
return parent_ancestors
else:
return []
def get_reply_order_cache_data(comid):
"""
Prepare a representation of the comment ID given as parameter so
that it is suitable for byte ordering in MySQL.
"""
return "%s%s%s%s" % (chr((comid >> 24) % 256), chr((comid >> 16) % 256),
chr((comid >> 8) % 256), chr(comid % 256))
def query_add_comment_or_remark(reviews=0, recID=0, uid=-1, msg="",
note="", score=0, priority=0,
client_ip_address='', editor_type='textarea',
req=None, reply_to=None, attached_files=None):
"""
Private function
Insert a comment/review or remarkinto the database
@param recID: record id
@param uid: user id
@param msg: comment body
@param note: comment title
@param score: review star score
@param priority: remark priority #!FIXME
@param editor_type: the kind of editor used to submit the comment: 'textarea', 'ckeditor'
@param req: request object. If provided, email notification are sent after we reply to user request.
@param reply_to: the id of the comment we are replying to with this inserted comment.
@return: integer >0 representing id if successful, integer 0 if not
"""
current_date = calculate_start_date('0d')
#change utf-8 message into general unicode
msg = msg.decode('utf-8')
note = note.decode('utf-8')
#change general unicode back to utf-8
msg = msg.encode('utf-8')
note = note.encode('utf-8')
msg_original = msg
(restriction, round_name) = get_record_status(recID)
if attached_files is None:
attached_files = {}
if reply_to and CFG_WEBCOMMENT_MAX_COMMENT_THREAD_DEPTH >= 0:
# Check that we have not reached max depth
comment_ancestors = get_comment_ancestors(reply_to)
if len(comment_ancestors) >= CFG_WEBCOMMENT_MAX_COMMENT_THREAD_DEPTH:
if CFG_WEBCOMMENT_MAX_COMMENT_THREAD_DEPTH == 0:
reply_to = None
else:
reply_to = comment_ancestors[CFG_WEBCOMMENT_MAX_COMMENT_THREAD_DEPTH - 1]
# Inherit restriction and group/round of 'parent'
comment = query_get_comment(reply_to)
if comment:
(round_name, restriction) = comment[10:12]
if editor_type == 'ckeditor':
# Here we remove the line feeds introduced by CKEditor (they
# have no meaning for the user) and replace the HTML line
# breaks by linefeeds, so that we are close to an input that
# would be done without the CKEditor. That's much better if a
# reply to a comment is made with a browser that does not
# support CKEditor.
msg = msg.replace('\n', '').replace('\r', '')
# We clean the quotes that could have been introduced by
# CKEditor when clicking the 'quote' button, as well as those
# that we have introduced when quoting the original message.
# We can however not use directly '>>' chars to quote, as it
# will be washed/fixed when calling tidy_html(): double-escape
# all &gt; first, and use &gt;&gt;
msg = msg.replace('&gt;', '&amp;gt;')
msg = re.sub('^\s*<blockquote', '<br/> <blockquote', msg)
msg = re.sub('<blockquote.*?>\s*<(p|div).*?>', '&gt;&gt;', msg)
msg = re.sub('</(p|div)>\s*</blockquote>', '', msg)
# Then definitely remove any blockquote, whatever it is
msg = re.sub('<blockquote.*?>', '<div>', msg)
msg = re.sub('</blockquote>', '</div>', msg)
# Tidy up the HTML
msg = tidy_html(msg)
# We remove EOL that might have been introduced when tidying
msg = msg.replace('\n', '').replace('\r', '')
# Now that HTML has been cleaned, unescape &gt;
msg = msg.replace('&gt;', '>')
msg = msg.replace('&amp;gt;', '&gt;')
msg = re.sub('<br .*?(/>)', '\n', msg)
msg = msg.replace('&nbsp;', ' ')
# In case additional <p> or <div> got inserted, interpret
# these as new lines (with a sad trick to do it only once)
# (note that it has been deactivated, as it is messing up
# indentation with >>)
#msg = msg.replace('</div><', '</div>\n<')
#msg = msg.replace('</p><', '</p>\n<')
query = """INSERT INTO cmtRECORDCOMMENT (id_bibrec,
id_user,
body,
date_creation,
star_score,
nb_votes_total,
title,
round_name,
restriction,
in_reply_to_id_cmtRECORDCOMMENT)
VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s)"""
params = (recID, uid, msg, current_date, score, 0, note, round_name, restriction, reply_to or 0)
res = run_sql(query, params)
if res:
new_comid = int(res)
move_attached_files_to_storage(attached_files, recID, new_comid)
parent_reply_order = run_sql("""SELECT reply_order_cached_data from cmtRECORDCOMMENT where id=%s""", (reply_to,))
if not parent_reply_order or parent_reply_order[0][0] is None:
# This is not a reply, but a first 0-level comment
parent_reply_order = ''
else:
parent_reply_order = parent_reply_order[0][0]
run_sql("""UPDATE cmtRECORDCOMMENT SET reply_order_cached_data=%s WHERE id=%s""",
(parent_reply_order + get_reply_order_cache_data(new_comid), new_comid))
action_code = CFG_WEBCOMMENT_ACTION_CODE[reviews and 'ADD_REVIEW' or 'ADD_COMMENT']
action_time = convert_datestruct_to_datetext(time.localtime())
query2 = """INSERT INTO cmtACTIONHISTORY (id_cmtRECORDCOMMENT,
id_bibrec, id_user, client_host, action_time, action_code)
VALUES (%s, %s, %s, inet_aton(%s), %s, %s)"""
params2 = (res, recID, uid, client_ip_address, action_time, action_code)
run_sql(query2, params2)
def notify_subscribers_callback(data):
"""
Define a callback that retrieves subscribed users, and
notify them by email.
@param data: contains the necessary parameters in a tuple:
(recid, uid, comid, msg, note, score, editor_type, reviews)
"""
recid, uid, comid, msg, note, score, editor_type, reviews = data
# Email this comment to 'subscribers'
(subscribers_emails1, subscribers_emails2) = \
get_users_subscribed_to_discussion(recid)
email_subscribers_about_new_comment(recid, reviews=reviews,
emails1=subscribers_emails1,
emails2=subscribers_emails2,
comID=comid, msg=msg,
note=note, score=score,
editor_type=editor_type, uid=uid)
# Register our callback to notify subscribed people after
# having replied to our current user.
data = (recID, uid, res, msg, note, score, editor_type, reviews)
if req:
req.register_cleanup(notify_subscribers_callback, data)
else:
notify_subscribers_callback(data)
return int(res)
def move_attached_files_to_storage(attached_files, recID, comid):
"""
Move the files that were just attached to a new comment to their
final location.
@param attached_files: the mappings of desired filename to attach
and path where to find the original file
@type attached_files: dict {filename, filepath}
@param recID: the record ID to which we attach the files
@param comid: the comment ID to which we attach the files
"""
for filename, filepath in attached_files.iteritems():
os.renames(filepath,
os.path.join(CFG_PREFIX, 'var', 'data', 'comments',
str(recID), str(comid), filename))
def get_attached_files(recid, comid):
"""
Returns a list with tuples (filename, filepath, fileurl)
@param recid: the recid to which the comment belong
@param comid: the commment id for which we want to retrieve files
"""
base_dir = os.path.join(CFG_PREFIX, 'var', 'data', 'comments',
str(recid), str(comid))
if os.path.isdir(base_dir):
filenames = os.listdir(base_dir)
return [(filename, os.path.join(CFG_PREFIX, 'var', 'data', 'comments',
str(recid), str(comid), filename),
CFG_SITE_URL + '/'+ CFG_SITE_RECORD +'/' + str(recid) + '/comments/attachments/get/' + str(comid) + '/' + filename) \
for filename in filenames]
else:
return []
def subscribe_user_to_discussion(recID, uid):
"""
Subscribe a user to a discussion, so the she receives by emails
all new new comments for this record.
@param recID: record ID corresponding to the discussion we want to
subscribe the user
@param uid: user id
"""
query = """INSERT INTO cmtSUBSCRIPTION (id_bibrec, id_user, creation_time)
VALUES (%s, %s, %s)"""
params = (recID, uid, convert_datestruct_to_datetext(time.localtime()))
try:
run_sql(query, params)
except:
return 0
return 1
def unsubscribe_user_from_discussion(recID, uid):
"""
Unsubscribe users from a discussion.
@param recID: record ID corresponding to the discussion we want to
unsubscribe the user
@param uid: user id
@return 1 if successful, 0 if not
"""
query = """DELETE FROM cmtSUBSCRIPTION
WHERE id_bibrec=%s AND id_user=%s"""
params = (recID, uid)
try:
res = run_sql(query, params)
except:
return 0
if res > 0:
return 1
return 0
def get_user_subscription_to_discussion(recID, uid):
"""
Returns the type of subscription for the given user to this
discussion. This does not check authorizations (for eg. if user
was subscribed, but is suddenly no longer authorized).
@param recID: record ID
@param uid: user id
@return:
- 0 if user is not subscribed to discussion
- 1 if user is subscribed, and is allowed to unsubscribe
- 2 if user is subscribed, but cannot unsubscribe
"""
user_email = get_email(uid)
(emails1, emails2) = get_users_subscribed_to_discussion(recID, check_authorizations=False)
if user_email in emails1:
return 1
elif user_email in emails2:
return 2
else:
return 0
def get_users_subscribed_to_discussion(recID, check_authorizations=True):
"""
Returns the lists of users subscribed to a given discussion.
Two lists are returned: the first one is the list of emails for
users who can unsubscribe from the discussion, the second list
contains the emails of users who cannot unsubscribe (for eg. author
of the document, etc).
Users appear in only one list. If a user has manually subscribed
to a discussion AND is an automatic recipients for updates, it
will only appear in the second list.
@param recID: record ID for which we want to retrieve subscribed users
@param check_authorizations: if True, check again if users are authorized to view comment
@return tuple (emails1, emails2)
"""
subscribers_emails = {}
# Get users that have subscribed to this discussion
query = """SELECT id_user FROM cmtSUBSCRIPTION WHERE id_bibrec=%s"""
params = (recID,)
res = run_sql(query, params)
for row in res:
uid = row[0]
if check_authorizations:
user_info = collect_user_info(uid)
(auth_code, auth_msg) = check_user_can_view_comments(user_info, recID)
else:
# Don't check and grant access
auth_code = False
if auth_code:
# User is no longer authorized to view comments.
# Delete subscription
unsubscribe_user_from_discussion(recID, uid)
else:
email = get_email(uid)
if '@' in email:
subscribers_emails[email] = True
# Get users automatically subscribed, based on the record metadata
collections_with_auto_replies = CFG_WEBCOMMENT_EMAIL_REPLIES_TO.keys()
for collection in collections_with_auto_replies:
if (get_colID(collection) is not None) and \
(recID in get_collection_reclist(collection)):
fields = CFG_WEBCOMMENT_EMAIL_REPLIES_TO[collection]
for field in fields:
emails = get_fieldvalues(recID, field)
for email in emails:
if not '@' in email:
# Is a group: add domain name
subscribers_emails[email + '@' + \
CFG_SITE_SUPPORT_EMAIL.split('@')[1]] = False
else:
subscribers_emails[email] = False
return ([email for email, can_unsubscribe_p \
in subscribers_emails.iteritems() if can_unsubscribe_p],
[email for email, can_unsubscribe_p \
in subscribers_emails.iteritems() if not can_unsubscribe_p] )
def email_subscribers_about_new_comment(recID, reviews, emails1,
emails2, comID, msg="",
note="", score=0,
editor_type='textarea',
ln=CFG_SITE_LANG, uid=-1):
"""
Notify subscribers that a new comment was posted.
FIXME: consider recipient preference to send email in correct language.
@param recID: record id
@param emails1: list of emails for users who can unsubscribe from discussion
@param emails2: list of emails for users who cannot unsubscribe from discussion
@param comID: the comment id
@param msg: comment body
@param note: comment title
@param score: review star score
@param editor_type: the kind of editor used to submit the comment: 'textarea', 'ckeditor'
@rtype: bool
@return: True if email was sent okay, False if it was not.
"""
_ = gettext_set_language(ln)
if not emails1 and not emails2:
return 0
# Get title
titles = get_fieldvalues(recID, "245__a")
if not titles:
# usual title not found, try conference title:
titles = get_fieldvalues(recID, "111__a")
title = ''
if titles:
title = titles[0]
else:
title = _("Record %i") % recID
# Get report number
report_numbers = get_fieldvalues(recID, "037__a")
if not report_numbers:
report_numbers = get_fieldvalues(recID, "088__a")
if not report_numbers:
report_numbers = get_fieldvalues(recID, "021__a")
# Prepare email subject and body
if reviews:
email_subject = _('%(report_number)s"%(title)s" has been reviewed') % \
{'report_number': report_numbers and ('[' + report_numbers[0] + '] ') or '',
'title': title}
else:
email_subject = _('%(report_number)s"%(title)s" has been commented') % \
{'report_number': report_numbers and ('[' + report_numbers[0] + '] ') or '',
'title': title}
washer = EmailWasher()
msg = washer.wash(msg)
msg = msg.replace('&gt;&gt;', '>')
email_content = msg
if note:
email_content = note + email_content
# Send emails to people who can unsubscribe
email_header = webcomment_templates.tmpl_email_new_comment_header(recID,
title,
reviews,
comID,
report_numbers,
can_unsubscribe=True,
ln=ln,
uid=uid)
email_footer = webcomment_templates.tmpl_email_new_comment_footer(recID,
title,
reviews,
comID,
report_numbers,
can_unsubscribe=True,
ln=ln)
res1 = True
if emails1:
res1 = send_email(fromaddr=CFG_WEBCOMMENT_ALERT_ENGINE_EMAIL,
toaddr=emails1,
subject=email_subject,
content=email_content,
header=email_header,
footer=email_footer,
ln=ln)
# Then send email to people who have been automatically
# subscribed to the discussion (they cannot unsubscribe)
email_header = webcomment_templates.tmpl_email_new_comment_header(recID,
title,
reviews,
comID,
report_numbers,
can_unsubscribe=False,
ln=ln,
uid=uid)
email_footer = webcomment_templates.tmpl_email_new_comment_footer(recID,
title,
reviews,
comID,
report_numbers,
can_unsubscribe=False,
ln=ln)
res2 = True
if emails2:
res2 = send_email(fromaddr=CFG_WEBCOMMENT_ALERT_ENGINE_EMAIL,
toaddr=emails2,
subject=email_subject,
content=email_content,
header=email_header,
footer=email_footer,
ln=ln)
return res1 and res2
def get_record_status(recid):
"""
Returns the current status of the record, i.e. current restriction to apply for newly submitted
comments, and current commenting round.
The restriction to apply can be found in the record metadata, in
field(s) defined by config CFG_WEBCOMMENT_RESTRICTION_DATAFIELD. The restriction is empty string ""
in cases where the restriction has not explicitely been set, even
if the record itself is restricted.
@param recid: the record id
@type recid: int
@return tuple(restriction, round_name), where 'restriction' is empty string when no restriction applies
@rtype (string, int)
"""
collections_with_rounds = CFG_WEBCOMMENT_ROUND_DATAFIELD.keys()
commenting_round = ""
for collection in collections_with_rounds:
# Find the first collection defines rounds field for this
# record
if get_colID(collection) is not None and \
(recid in get_collection_reclist(collection)):
commenting_rounds = get_fieldvalues(recid, CFG_WEBCOMMENT_ROUND_DATAFIELD.get(collection, ""))
if commenting_rounds:
commenting_round = commenting_rounds[0]
break
collections_with_restrictions = CFG_WEBCOMMENT_RESTRICTION_DATAFIELD.keys()
restriction = ""
for collection in collections_with_restrictions:
# Find the first collection that defines restriction field for
# this record
if get_colID(collection) is not None and \
recid in get_collection_reclist(collection):
restrictions = get_fieldvalues(recid, CFG_WEBCOMMENT_RESTRICTION_DATAFIELD.get(collection, ""))
if restrictions:
restriction = restrictions[0]
break
return (restriction, commenting_round)
def calculate_start_date(display_since):
"""
Private function
Returns the datetime of display_since argument in MYSQL datetime format
calculated according to the local time.
@param display_since: = all= no filtering
nd = n days ago
nw = n weeks ago
nm = n months ago
ny = n years ago
where n is a single digit number
@return: string of wanted datetime.
If 'all' given as argument, will return datetext_default
datetext_default is defined in miscutils/lib/dateutils and
equals 0000-00-00 00:00:00 => MySQL format
If bad arguement given, will return datetext_default
If library 'dateutil' is not found return datetext_default
and register exception.
"""
time_types = {'d':0, 'w':0, 'm':0, 'y':0}
today = datetime.today()
try:
nb = int(display_since[:-1])
except:
return datetext_default
if display_since in [None, 'all']:
return datetext_default
if str(display_since[-1]) in time_types:
time_type = str(display_since[-1])
else:
return datetext_default
# year
if time_type == 'y':
if (int(display_since[:-1]) > today.year - 1) or (int(display_since[:-1]) < 1):
# 1 < nb years < 2008
return datetext_default
else:
final_nb_year = today.year - nb
yesterday = today.replace(year=final_nb_year)
# month
elif time_type == 'm':
try:
from dateutil.relativedelta import relativedelta
except ImportError:
# The dateutil library is only recommended: if not
# available, then send warning about this.
register_exception(alert_admin=True)
return datetext_default
# obtain only the date: yyyy-mm-dd
date_today = datetime.now().date()
final_date = date_today - relativedelta(months=nb)
yesterday = today.replace(year=final_date.year, month=final_date.month, day=final_date.day)
# week
elif time_type == 'w':
delta = timedelta(weeks=nb)
yesterday = today - delta
# day
elif time_type == 'd':
delta = timedelta(days=nb)
yesterday = today - delta
return yesterday.strftime("%Y-%m-%d %H:%M:%S")
def get_first_comments_or_remarks(recID=-1,
ln=CFG_SITE_LANG,
nb_comments='all',
nb_reviews='all',
voted=-1,
reported=-1,
user_info=None,
show_reviews=False):
"""
Gets nb number comments/reviews or remarks.
In the case of comments, will get both comments and reviews
Comments and remarks sorted by most recent date, reviews sorted by highest helpful score
@param recID: record id
@param ln: language
@param nb_comments: number of comment or remarks to get
@param nb_reviews: number of reviews or remarks to get
@param voted: 1 if user has voted for a remark
@param reported: 1 if user has reported a comment or review
@return: if comment, tuple (comments, reviews) both being html of first nb comments/reviews
if remark, tuple (remakrs, None)
"""
_ = gettext_set_language(ln)
warnings = []
voted = wash_url_argument(voted, 'int')
reported = wash_url_argument(reported, 'int')
## check recID argument
if type(recID) is not int:
return ()
if recID >= 1: #comment or review. NB: suppressed reference to basket (handled in webbasket)
if CFG_WEBCOMMENT_ALLOW_REVIEWS:
res_reviews = query_retrieve_comments_or_remarks(recID=recID, display_order="hh", ranking=1,
limit=nb_comments, user_info=user_info)
nb_res_reviews = len(res_reviews)
## check nb argument
if type(nb_reviews) is int and nb_reviews < len(res_reviews):
first_res_reviews = res_reviews[:nb_reviews]
else:
first_res_reviews = res_reviews
if CFG_WEBCOMMENT_ALLOW_COMMENTS:
res_comments = query_retrieve_comments_or_remarks(recID=recID, display_order="od", ranking=0,
limit=nb_reviews, user_info=user_info)
nb_res_comments = len(res_comments)
## check nb argument
if type(nb_comments) is int and nb_comments < len(res_comments):
first_res_comments = res_comments[:nb_comments]
else:
first_res_comments = res_comments
else: #error
try:
raise InvenioWebCommentError(_('%s is an invalid record ID') % recID)
except InvenioWebCommentError, exc:
register_exception()
body = webcomment_templates.tmpl_error(exc.message, ln)
return body
#errors.append(('ERR_WEBCOMMENT_RECID_INVALID', recID)) #!FIXME dont return error anywhere since search page
# comment
if recID >= 1:
comments = reviews = ""
if reported > 0:
try:
raise InvenioWebCommentWarning(_('Your feedback has been recorded, many thanks.'))
except InvenioWebCommentWarning, exc:
register_exception(stream='warning')
warnings.append((exc.message, 'green'))
#warnings.append(('WRN_WEBCOMMENT_FEEDBACK_RECORDED_GREEN_TEXT',))
elif reported == 0:
try:
raise InvenioWebCommentWarning(_('Your feedback could not be recorded, please try again.'))
except InvenioWebCommentWarning, exc:
register_exception(stream='warning')
warnings.append((exc.message, ''))
#warnings.append(('WRN_WEBCOMMENT_FEEDBACK_NOT_RECORDED_RED_TEXT',))
if CFG_WEBCOMMENT_ALLOW_COMMENTS: # normal comments
grouped_comments = group_comments_by_round(first_res_comments, ranking=0)
comments = webcomment_templates.tmpl_get_first_comments_without_ranking(recID, ln, grouped_comments, nb_res_comments, warnings)
if show_reviews:
if CFG_WEBCOMMENT_ALLOW_REVIEWS: # ranked comments
#calculate average score
avg_score = calculate_avg_score(res_reviews)
if voted > 0:
try:
raise InvenioWebCommentWarning(_('Your feedback has been recorded, many thanks.'))
except InvenioWebCommentWarning, exc:
register_exception(stream='warning')
warnings.append((exc.message, 'green'))
#warnings.append(('WRN_WEBCOMMENT_FEEDBACK_RECORDED_GREEN_TEXT',))
elif voted == 0:
try:
raise InvenioWebCommentWarning(_('Your feedback could not be recorded, please try again.'))
except InvenioWebCommentWarning, exc:
register_exception(stream='warning')
warnings.append((exc.message, ''))
#warnings.append(('WRN_WEBCOMMENT_FEEDBACK_NOT_RECORDED_RED_TEXT',))
grouped_reviews = group_comments_by_round(first_res_reviews, ranking=0)
reviews = webcomment_templates.tmpl_get_first_comments_with_ranking(recID, ln, grouped_reviews, nb_res_reviews, avg_score, warnings)
return (comments, reviews)
# remark
else:
return(webcomment_templates.tmpl_get_first_remarks(first_res_comments, ln, nb_res_comments), None)
def group_comments_by_round(comments, ranking=0):
"""
Group comments by the round to which they belong
"""
comment_rounds = {}
ordered_comment_round_names = []
for comment in comments:
comment_round_name = ranking and comment[11] or comment[7]
if not comment_rounds.has_key(comment_round_name):
comment_rounds[comment_round_name] = []
ordered_comment_round_names.append(comment_round_name)
comment_rounds[comment_round_name].append(comment)
return [(comment_round_name, comment_rounds[comment_round_name]) \
for comment_round_name in ordered_comment_round_names]
def calculate_avg_score(res):
"""
private function
Calculate the avg score of reviews present in res
@param res: tuple of tuple returned from query_retrieve_comments_or_remarks
@return: a float of the average score rounded to the closest 0.5
"""
c_star_score = 6
avg_score = 0.0
nb_reviews = 0
for comment in res:
if comment[c_star_score] > 0:
avg_score += comment[c_star_score]
nb_reviews += 1
if nb_reviews == 0:
return 0.0
avg_score = avg_score / nb_reviews
avg_score_unit = avg_score - math.floor(avg_score)
if avg_score_unit < 0.25:
avg_score = math.floor(avg_score)
elif avg_score_unit > 0.75:
avg_score = math.floor(avg_score) + 1
else:
avg_score = math.floor(avg_score) + 0.5
if avg_score > 5:
avg_score = 5.0
return avg_score
def perform_request_add_comment_or_remark(recID=0,
uid=-1,
action='DISPLAY',
ln=CFG_SITE_LANG,
msg=None,
score=None,
note=None,
priority=None,
reviews=0,
comID=0,
client_ip_address=None,
editor_type='textarea',
can_attach_files=False,
subscribe=False,
req=None,
attached_files=None,
warnings=None):
"""
Add a comment/review or remark
@param recID: record id
@param uid: user id
@param action: 'DISPLAY' to display add form
'SUBMIT' to submit comment once form is filled
'REPLY' to reply to an existing comment
@param ln: language
@param msg: the body of the comment/review or remark
@param score: star score of the review
@param note: title of the review
@param priority: priority of remark (int)
@param reviews: boolean, if enabled will add a review, if disabled will add a comment
@param comID: if replying, this is the comment id of the comment we are replying to
@param editor_type: the kind of editor/input used for the comment: 'textarea', 'ckeditor'
@param can_attach_files: if user can attach files to comments or not
@param subscribe: if True, subscribe user to receive new comments by email
@param req: request object. Used to register callback to send email notification
@param attached_files: newly attached files to this comment, mapping filename to filepath
@type attached_files: dict
@param warnings: list of warning tuples (warning_text, warning_color) that should be considered
@return:
- html add form if action is display or reply
- html successful added form if action is submit
"""
_ = gettext_set_language(ln)
if warnings is None:
warnings = []
actions = ['DISPLAY', 'REPLY', 'SUBMIT']
_ = gettext_set_language(ln)
## check arguments
check_recID_is_in_range(recID, warnings, ln)
if uid <= 0:
try:
raise InvenioWebCommentError(_('%s is an invalid user ID.') % uid)
except InvenioWebCommentError, exc:
register_exception()
body = webcomment_templates.tmpl_error(exc.message, ln)
return body
#errors.append(('ERR_WEBCOMMENT_UID_INVALID', uid))
return ''
if attached_files is None:
attached_files = {}
user_contact_info = query_get_user_contact_info(uid)
nickname = ''
if user_contact_info:
if user_contact_info[0]:
nickname = user_contact_info[0]
# show the form
if action == 'DISPLAY':
if reviews and CFG_WEBCOMMENT_ALLOW_REVIEWS:
return webcomment_templates.tmpl_add_comment_form_with_ranking(recID, uid, nickname, ln, msg, score, note, warnings, can_attach_files=can_attach_files)
elif not reviews and CFG_WEBCOMMENT_ALLOW_COMMENTS:
return webcomment_templates.tmpl_add_comment_form(recID, uid, nickname, ln, msg, warnings, can_attach_files=can_attach_files)
else:
try:
raise InvenioWebCommentError(_('Comments on records have been disallowed by the administrator.'))
except InvenioWebCommentError, exc:
register_exception(req=req)
body = webcomment_templates.tmpl_error(exc.message, ln)
return body
#errors.append(('ERR_WEBCOMMENT_COMMENTS_NOT_ALLOWED',))
elif action == 'REPLY':
if reviews and CFG_WEBCOMMENT_ALLOW_REVIEWS:
try:
raise InvenioWebCommentError(_('Cannot reply to a review.'))
except InvenioWebCommentError, exc:
register_exception(req=req)
body = webcomment_templates.tmpl_error(exc.message, ln)
return body
#errors.append(('ERR_WEBCOMMENT_REPLY_REVIEW',))
return webcomment_templates.tmpl_add_comment_form_with_ranking(recID, uid, nickname, ln, msg, score, note, warnings, can_attach_files=can_attach_files)
elif not reviews and CFG_WEBCOMMENT_ALLOW_COMMENTS:
textual_msg = msg
if comID > 0:
comment = query_get_comment(comID)
if comment:
user_info = get_user_info(comment[2])
if user_info:
date_creation = convert_datetext_to_dategui(str(comment[4]))
# Build two msg: one mostly textual, the other one with HTML markup, for the CkEditor.
msg = _("%(x_name)s wrote on %(x_date)s:")% {'x_name': user_info[2], 'x_date': date_creation}
textual_msg = msg
# 1 For CkEditor input
msg += '\n\n'
msg += comment[3]
msg = email_quote_txt(text=msg)
# Now that we have a text-quoted version, transform into
# something that CkEditor likes, using <blockquote> that
# do still enable users to insert comments inline
msg = email_quoted_txt2html(text=msg,
indent_html=('<blockquote><div>', '&nbsp;&nbsp;</div></blockquote>'),
linebreak_html="&nbsp;<br/>",
indent_block=False)
# Add some space for users to easily add text
# around the quoted message
msg = '<br/>' + msg + '<br/>'
# Due to how things are done, we need to
# escape the whole msg again for the editor
msg = cgi.escape(msg)
# 2 For textarea input
textual_msg += "\n\n"
textual_msg += comment[3]
textual_msg = email_quote_txt(text=textual_msg)
return webcomment_templates.tmpl_add_comment_form(recID, uid, nickname, ln, msg, warnings, textual_msg, can_attach_files=can_attach_files, reply_to=comID)
else:
try:
raise InvenioWebCommentError(_('Comments on records have been disallowed by the administrator.'))
except InvenioWebCommentError, exc:
register_exception(req=req)
body = webcomment_templates.tmpl_error(exc.message, ln)
return body
#errors.append(('ERR_WEBCOMMENT_COMMENTS_NOT_ALLOWED',))
# check before submitting form
elif action == 'SUBMIT':
if reviews and CFG_WEBCOMMENT_ALLOW_REVIEWS:
if note.strip() in ["", "None"] and not CFG_WEBCOMMENT_ALLOW_SHORT_REVIEWS:
try:
raise InvenioWebCommentWarning(_('You must enter a title.'))
except InvenioWebCommentWarning, exc:
register_exception(stream='warning', req=req)
warnings.append((exc.message, ''))
#warnings.append(('WRN_WEBCOMMENT_ADD_NO_TITLE',))
if score == 0 or score > 5:
try:
raise InvenioWebCommentWarning(_('You must choose a score.'))
except InvenioWebCommentWarning, exc:
register_exception(stream='warning', req=req)
warnings.append((exc.message, ''))
#warnings.append(("WRN_WEBCOMMENT_ADD_NO_SCORE",))
if msg.strip() in ["", "None"] and not CFG_WEBCOMMENT_ALLOW_SHORT_REVIEWS:
try:
raise InvenioWebCommentWarning(_('You must enter a text.'))
except InvenioWebCommentWarning, exc:
register_exception(stream='warning', req=req)
warnings.append((exc.message, ''))
#warnings.append(('WRN_WEBCOMMENT_ADD_NO_BODY',))
# if no warnings, submit
if len(warnings) == 0:
if reviews:
if check_user_can_review(recID, client_ip_address, uid):
success = query_add_comment_or_remark(reviews, recID=recID, uid=uid, msg=msg,
note=note, score=score, priority=0,
client_ip_address=client_ip_address,
editor_type=editor_type,
req=req,
reply_to=comID)
else:
try:
raise InvenioWebCommentWarning(_('You already wrote a review for this record.'))
except InvenioWebCommentWarning, exc:
register_exception(stream='warning', req=req)
warnings.append((exc.message, ''))
#warnings.append('WRN_WEBCOMMENT_CANNOT_REVIEW_TWICE')
success = 1
else:
if check_user_can_comment(recID, client_ip_address, uid):
success = query_add_comment_or_remark(reviews, recID=recID, uid=uid, msg=msg,
note=note, score=score, priority=0,
client_ip_address=client_ip_address,
editor_type=editor_type,
req=req,
reply_to=comID, attached_files=attached_files)
if success > 0 and subscribe:
subscribe_user_to_discussion(recID, uid)
else:
try:
raise InvenioWebCommentWarning(_('You already posted a comment short ago. Please retry later.'))
except InvenioWebCommentWarning, exc:
register_exception(stream='warning', req=req)
warnings.append((exc.message, ''))
#warnings.append('WRN_WEBCOMMENT_TIMELIMIT')
success = 1
if success > 0:
if CFG_WEBCOMMENT_ADMIN_NOTIFICATION_LEVEL > 0:
notify_admin_of_new_comment(comID=success)
return webcomment_templates.tmpl_add_comment_successful(recID, ln, reviews, warnings, success)
else:
try:
raise InvenioWebCommentError(_('Failed to insert your comment to the database. Please try again.'))
except InvenioWebCommentError, exc:
register_exception(req=req)
body = webcomment_templates.tmpl_error(exc.message, ln)
return body
#errors.append(('ERR_WEBCOMMENT_DB_INSERT_ERROR'))
# if are warnings or if inserting comment failed, show user where warnings are
if reviews and CFG_WEBCOMMENT_ALLOW_REVIEWS:
return webcomment_templates.tmpl_add_comment_form_with_ranking(recID, uid, nickname, ln, msg, score, note, warnings, can_attach_files=can_attach_files)
else:
return webcomment_templates.tmpl_add_comment_form(recID, uid, nickname, ln, msg, warnings, can_attach_files=can_attach_files)
# unknown action send to display
else:
try:
raise InvenioWebCommentWarning(_('Unknown action --> showing you the default add comment form.'))
except InvenioWebCommentWarning, exc:
register_exception(stream='warning', req=req)
warnings.append((exc.message, ''))
#warnings.append(('WRN_WEBCOMMENT_ADD_UNKNOWN_ACTION',))
if reviews and CFG_WEBCOMMENT_ALLOW_REVIEWS:
return webcomment_templates.tmpl_add_comment_form_with_ranking(recID, uid, ln, msg, score, note, warnings, can_attach_files=can_attach_files)
else:
return webcomment_templates.tmpl_add_comment_form(recID, uid, ln, msg, warnings, can_attach_files=can_attach_files)
return ''
def notify_admin_of_new_comment(comID):
"""
Sends an email to the admin with details regarding comment with ID = comID
"""
comment = query_get_comment(comID)
if len(comment) > 0:
(comID2,
id_bibrec,
id_user,
body,
date_creation,
star_score, nb_votes_yes, nb_votes_total,
title,
nb_abuse_reports, round_name, restriction) = comment
else:
return
user_info = query_get_user_contact_info(id_user)
if len(user_info) > 0:
(nickname, email, last_login) = user_info
if not len(nickname) > 0:
nickname = email.split('@')[0]
else:
nickname = email = last_login = "ERROR: Could not retrieve"
review_stuff = '''
Star score = %s
Title = %s''' % (star_score, title)
washer = EmailWasher()
try:
body = washer.wash(body)
except:
body = cgi.escape(body)
record_info = webcomment_templates.tmpl_email_new_comment_admin(id_bibrec)
out = '''
The following %(comment_or_review)s has just been posted (%(date)s).
AUTHOR:
Nickname = %(nickname)s
Email = %(email)s
User ID = %(uid)s
RECORD CONCERNED:
Record ID = %(recID)s
URL = <%(siteurl)s/%(CFG_SITE_RECORD)s/%(recID)s/%(comments_or_reviews)s/>
%(record_details)s
%(comment_or_review_caps)s:
%(comment_or_review)s ID = %(comID)s %(review_stuff)s
Body =
<--------------->
%(body)s
<--------------->
ADMIN OPTIONS:
To moderate the %(comment_or_review)s go to %(siteurl)s/%(CFG_SITE_RECORD)s/%(recID)s/%(comments_or_reviews)s/display?%(arguments)s
''' % \
{ 'comment_or_review' : star_score > 0 and 'review' or 'comment',
'comment_or_review_caps': star_score > 0 and 'REVIEW' or 'COMMENT',
'comments_or_reviews' : star_score > 0 and 'reviews' or 'comments',
'date' : date_creation,
'nickname' : nickname,
'email' : email,
'uid' : id_user,
'recID' : id_bibrec,
'record_details' : record_info,
'comID' : comID2,
'review_stuff' : star_score > 0 and review_stuff or "",
'body' : body.replace('<br />','\n'),
'siteurl' : CFG_SITE_URL,
'CFG_SITE_RECORD' : CFG_SITE_RECORD,
'arguments' : 'ln=en&do=od#%s' % comID
}
from_addr = '%s WebComment <%s>' % (CFG_SITE_NAME, CFG_WEBALERT_ALERT_ENGINE_EMAIL)
comment_collection = get_comment_collection(comID)
to_addrs = get_collection_moderators(comment_collection)
rec_collection = guess_primary_collection_of_a_record(id_bibrec)
report_nums = get_fieldvalues(id_bibrec, "037__a")
report_nums += get_fieldvalues(id_bibrec, "088__a")
report_nums = ', '.join(report_nums)
subject = "A new comment/review has just been posted [%s|%s]" % (rec_collection, report_nums)
send_email(from_addr, to_addrs, subject, out)
def check_recID_is_in_range(recID, warnings=[], ln=CFG_SITE_LANG):
"""
Check that recID is >= 0
@param recID: record id
@param warnings: list of warning tuples (warning_text, warning_color)
@return: tuple (boolean, html) where boolean (1=true, 0=false)
and html is the body of the page to display if there was a problem
"""
_ = gettext_set_language(ln)
try:
recID = int(recID)
except:
pass
if type(recID) is int:
if recID > 0:
from invenio.search_engine import record_exists
success = record_exists(recID)
if success == 1:
return (1,"")
else:
try:
raise InvenioWebCommentWarning(_('Record ID %s does not exist in the database.') % recID)
except InvenioWebCommentWarning, exc:
register_exception(stream='warning')
warnings.append((exc.message, ''))
#warnings.append(('ERR_WEBCOMMENT_RECID_INEXISTANT', recID))
return (0, webcomment_templates.tmpl_record_not_found(status='inexistant', recID=recID, ln=ln))
elif recID == 0:
try:
raise InvenioWebCommentWarning(_('No record ID was given.'))
except InvenioWebCommentWarning, exc:
register_exception(stream='warning')
warnings.append((exc.message, ''))
#warnings.append(('ERR_WEBCOMMENT_RECID_MISSING',))
return (0, webcomment_templates.tmpl_record_not_found(status='missing', recID=recID, ln=ln))
else:
try:
raise InvenioWebCommentWarning(_('Record ID %s is an invalid ID.') % recID)
except InvenioWebCommentWarning, exc:
register_exception(stream='warning')
warnings.append((exc.message, ''))
#warnings.append(('ERR_WEBCOMMENT_RECID_INVALID', recID))
return (0, webcomment_templates.tmpl_record_not_found(status='invalid', recID=recID, ln=ln))
else:
try:
raise InvenioWebCommentWarning(_('Record ID %s is not a number.') % recID)
except InvenioWebCommentWarning, exc:
register_exception(stream='warning')
warnings.append((exc.message, ''))
#warnings.append(('ERR_WEBCOMMENT_RECID_NAN', recID))
return (0, webcomment_templates.tmpl_record_not_found(status='nan', recID=recID, ln=ln))
def check_int_arg_is_in_range(value, name, gte_value, lte_value=None):
"""
Check that variable with name 'name' >= gte_value and optionally <= lte_value
@param value: variable value
@param name: variable name
@param errors: list of error tuples (error_id, value)
@param gte_value: greater than or equal to value
@param lte_value: less than or equal to value
@return: boolean (1=true, 0=false)
"""
if type(value) is not int:
try:
raise InvenioWebCommentError('%s is not a number.' % value)
except InvenioWebCommentError, exc:
register_exception()
body = webcomment_templates.tmpl_error(exc.message)
return body
#errors.append(('ERR_WEBCOMMENT_ARGUMENT_NAN', value))
return 0
if value < gte_value:
try:
raise InvenioWebCommentError('%s invalid argument.' % value)
except InvenioWebCommentError, exc:
register_exception()
body = webcomment_templates.tmpl_error(exc.message)
return body
#errors.append(('ERR_WEBCOMMENT_ARGUMENT_INVALID', value))
return 0
if lte_value:
if value > lte_value:
try:
raise InvenioWebCommentError('%s invalid argument.' % value)
except InvenioWebCommentError, exc:
register_exception()
body = webcomment_templates.tmpl_error(exc.message)
return body
#errors.append(('ERR_WEBCOMMENT_ARGUMENT_INVALID', value))
return 0
return 1
def get_mini_reviews(recid, ln=CFG_SITE_LANG):
"""
Returns the web controls to add reviews to a record from the
detailed record pages mini-panel.
@param recid: the id of the displayed record
@param ln: the user's language
"""
if CFG_WEBCOMMENT_ALLOW_SHORT_REVIEWS:
action = 'SUBMIT'
else:
action = 'DISPLAY'
reviews = query_retrieve_comments_or_remarks(recid, ranking=1)
return webcomment_templates.tmpl_mini_review(recid, ln, action=action,
avg_score=calculate_avg_score(reviews),
nb_comments_total=len(reviews))
def check_user_can_view_comments(user_info, recid):
"""Check if the user is authorized to view comments for given
recid.
Returns the same type as acc_authorize_action
"""
# Check user can view the record itself first
(auth_code, auth_msg) = check_user_can_view_record(user_info, recid)
if auth_code:
return (auth_code, auth_msg)
# Check if user can view the comments
## But first can we find an authorization for this case action,
## for this collection?
record_primary_collection = guess_primary_collection_of_a_record(recid)
return acc_authorize_action(user_info, 'viewcomment', authorized_if_no_roles=True, collection=record_primary_collection)
def check_user_can_view_comment(user_info, comid, restriction=None):
"""Check if the user is authorized to view a particular comment,
given the comment restriction. Note that this function does not
check if the record itself is restricted to the user, which would
mean that the user should not see the comment.
You can omit 'comid' if you already know the 'restriction'
@param user_info: the user info object
@param comid: the comment id of that we want to check
@param restriction: the restriction applied to given comment (if known. Otherwise retrieved automatically)
@return: the same type as acc_authorize_action
"""
if restriction is None:
comment = query_get_comment(comid)
if comment:
restriction = comment[11]
else:
return (1, 'Comment %i does not exist' % comid)
if restriction == "":
return (0, '')
return acc_authorize_action(user_info, 'viewrestrcomment', status=restriction)
def check_user_can_send_comments(user_info, recid):
"""Check if the user is authorized to comment the given
recid. This function does not check that user can view the record
or view the comments
Returns the same type as acc_authorize_action
"""
## First can we find an authorization for this case, action + collection
record_primary_collection = guess_primary_collection_of_a_record(recid)
return acc_authorize_action(user_info, 'sendcomment', authorized_if_no_roles=True, collection=record_primary_collection)
def check_comment_belongs_to_record(comid, recid):
"""
Return True if the comment is indeed part of given record (even if comment or/and record have
been "deleted"). Else return False.
@param comid: the id of the comment to check membership
@param recid: the recid of the record we want to check if comment belongs to
"""
query = """SELECT id_bibrec from cmtRECORDCOMMENT WHERE id=%s"""
params = (comid,)
res = run_sql(query, params)
if res and res[0][0] == recid:
return True
return False
def check_user_can_attach_file_to_comments(user_info, recid):
"""Check if the user is authorized to attach a file to comments
for given recid. This function does not check that user can view
the comments or send comments.
Returns the same type as acc_authorize_action
"""
## First can we find an authorization for this case action, for
## this collection?
record_primary_collection = guess_primary_collection_of_a_record(recid)
return acc_authorize_action(user_info, 'attachcommentfile', authorized_if_no_roles=False, collection=record_primary_collection)
def toggle_comment_visibility(uid, comid, collapse, recid):
"""
Toggle the visibility of the given comment (collapse) for the
given user. Return the new visibility
@param uid: the user id for which the change applies
@param comid: the comment id to close/open
@param collapse: if the comment is to be closed (1) or opened (0)
@param recid: the record id to which the comment belongs
@return: if the comment is visible or not after the update
"""
# We rely on the client to tell if comment should be collapsed or
# developed, to ensure consistency between our internal state and
# client state. Even if not strictly necessary, we store the
# record ID for quicker retrieval of the collapsed comments of a
# given discussion page. To prevent unnecessary population of the
# table, only one distinct tuple (record ID, comment ID, user ID)
# can be inserted (due to table definition). For the same purpose
# we also check that comment to collapse exists, and corresponds
# to an existing record: we cannot rely on the recid found as part
# of the URL, as no former check is done. This rule is not applied
# when deleting an entry, as in the worst case no line would be
# removed. For optimized retrieval of row to delete, the id_bibrec
# column is used, though not strictly necessary.
if collapse:
query = """SELECT id_bibrec from cmtRECORDCOMMENT WHERE id=%s"""
params = (comid,)
res = run_sql(query, params)
if res:
query = """INSERT DELAYED IGNORE INTO cmtCOLLAPSED (id_bibrec, id_cmtRECORDCOMMENT, id_user)
VALUES (%s, %s, %s)"""
params = (res[0][0], comid, uid)
run_sql(query, params)
return True
else:
query = """DELETE FROM cmtCOLLAPSED WHERE
id_cmtRECORDCOMMENT=%s and
id_user=%s and
id_bibrec=%s"""
params = (comid, uid, recid)
run_sql(query, params)
return False
def get_user_collapsed_comments_for_record(uid, recid):
"""
Get the comments collapsed for given user on given recid page
"""
# Collapsed state is not an attribute of cmtRECORDCOMMENT table
# (vary per user) so it cannot be found when querying for the
# comment. We must therefore provide a efficient way to retrieve
# the collapsed state for a given discussion page and user.
query = """SELECT id_cmtRECORDCOMMENT from cmtCOLLAPSED WHERE id_user=%s and id_bibrec=%s"""
params = (uid, recid)
return [res[0] for res in run_sql(query, params)]
def is_comment_deleted(comid):
"""
Return True of the comment is deleted. Else False
@param comid: ID of comment to check
"""
query = "SELECT status from cmtRECORDCOMMENT WHERE id=%s"
params = (comid,)
res = run_sql(query, params)
if res and res[0][0] != 'ok':
return True
return False
def perform_display_your_comments(user_info,
page_number=1,
selected_order_by_option="lcf",
selected_display_number_option="all",
selected_display_format_option="rc",
ln=CFG_SITE_LANG):
"""
Display all comments submitted by the user.
@TODO: support reviews too
@param user_info: standard user info object.
@param comments: ordered list of tuples (id_bibrec, comid, date_creation, body, status, in_reply_to_id_cmtRECORDCOMMENT)
@param page_number: page on which the user is.
@type page_number: integer
@param selected_order_by_option: seleccted ordering option. Can be one of:
- ocf: Oldest comment first
- lcf: Latest comment first
- grof: Group by record, oldest commented first
- grlf: Group by record, latest commented first
@type selected_order_by_option: string
@param selected_display_number_option: number of results to show per page. Can be a string-digit or 'all'.
@type selected_display_number_option: string
@param selected_display_format_option: how to show records. Can be one of:
- rc: Records and comments
- ro: Records only
- co: Comments only
@type selected_display_format_option: string
@ln: language
@type ln: string
"""
query_params = ""
nb_total_pages = 0
if selected_display_format_option in ('rc', 'co'):
nb_total_results = run_sql("SELECT count(id) from cmtRECORDCOMMENT WHERE id_user=%s AND star_score = 0", \
(user_info['uid'], ))[0][0]
else:
if selected_order_by_option in ('grlf', 'grof'):
nb_total_results = run_sql("SELECT count(distinct(id_bibrec)) from cmtRECORDCOMMENT WHERE id_user=%s AND star_score = 0", \
(user_info['uid'], ))[0][0]
else:
nb_total_results = run_sql("SELECT count(id_bibrec) from cmtRECORDCOMMENT WHERE id_user=%s AND star_score = 0", \
(user_info['uid'], ))[0][0]
if page_number < 1:
page_number = 1
if selected_display_number_option != 'all' and \
not selected_display_number_option.isdigit():
# must be some garbage
selected_display_number_option = 'all'
query = ''
if selected_order_by_option == "lcf":
query_params += " ORDER BY date_creation DESC"
elif selected_order_by_option == "ocf":
query_params += " ORDER BY date_creation ASC"
elif selected_order_by_option == "grlf":
query = "SELECT cmt.id_bibrec, cmt.id, cmt.date_creation, cmt.body, cmt.status, cmt.in_reply_to_id_cmtRECORDCOMMENT FROM cmtRECORDCOMMENT as cmt left join (SELECT max(date_creation) as maxdatecreation, id_bibrec FROM cmtRECORDCOMMENT WHERE id_user=%s AND star_score = 0 GROUP BY id_bibrec) as grp on cmt.id_bibrec = grp.id_bibrec WHERE id_user=%s AND star_score = 0 ORDER BY grp.maxdatecreation DESC, cmt.date_creation DESC"
elif selected_order_by_option == "grof":
query = "SELECT cmt.id_bibrec, cmt.id, cmt.date_creation, cmt.body, cmt.status, cmt.in_reply_to_id_cmtRECORDCOMMENT FROM cmtRECORDCOMMENT as cmt left join (SELECT min(date_creation) as mindatecreation, id_bibrec FROM cmtRECORDCOMMENT WHERE id_user=%s AND star_score = 0 GROUP BY id_bibrec) as grp on cmt.id_bibrec = grp.id_bibrec WHERE id_user=%s AND star_score = 0 ORDER BY grp.mindatecreation ASC"
if selected_display_number_option.isdigit():
selected_display_number_option_as_int = int(selected_display_number_option)
if selected_display_number_option_as_int < 5:
selected_display_number_option_as_int = 5
selected_display_number_option = str(selected_display_number_option_as_int)
from_index = (page_number - 1) * int(selected_display_number_option)
query_params += ' LIMIT ' + \
str(from_index) + \
',' + \
str(int(selected_display_number_option))
nb_total_pages = int(math.ceil(float(nb_total_results) / selected_display_number_option_as_int))
if selected_order_by_option in ("grlf", "grof"):
res = run_sql(query + query_params, (user_info['uid'], user_info['uid']))
else:
res = run_sql("SELECT id_bibrec, id, date_creation, body, status, in_reply_to_id_cmtRECORDCOMMENT FROM cmtRECORDCOMMENT WHERE id_user=%s AND star_score = 0" + query_params, (user_info['uid'], ))
return webcomment_templates.tmpl_your_comments(user_info, res,
page_number=page_number,
selected_order_by_option=selected_order_by_option,
selected_display_number_option=selected_display_number_option,
selected_display_format_option=selected_display_format_option,
nb_total_results=nb_total_results,
nb_total_pages=nb_total_pages,
ln=ln)
diff --git a/invenio/modules/deposit/tasks.py b/invenio/modules/deposit/tasks.py
index 23bd532f1..d1f03431c 100644
--- a/invenio/modules/deposit/tasks.py
+++ b/invenio/modules/deposit/tasks.py
@@ -1,263 +1,263 @@
# -*- coding: utf-8 -*-
#
# This file is part of Invenio.
# Copyright (C) 2012, 2013 CERN.
#
# Invenio is free software; you can redistribute it and/or
# modify it under the terms of the GNU General Public License as
# published by the Free Software Foundation; either version 2 of the
# License, or (at your option) any later version.
#
# Invenio is distributed in the hope that it will be useful, but
# WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
# General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with Invenio; if not, write to the Free Software Foundation, Inc.,
# 59 Temple Place, Suite 330, Boston, MA 02111-1307, USA.
"""
"""
import os
from tempfile import mkstemp
from flask import current_app, abort
from flask.ext.login import current_user
from invenio.bibtask import task_low_level_submission, \
bibtask_allocate_sequenceid
from invenio.bibfield_jsonreader import JsonReader
from invenio.config import CFG_TMPSHAREDDIR
from invenio.dbquery import run_sql
from invenio.modules.deposit.models import Deposition, Agent, \
DepositionDraftCacheManager
-from invenio.errorlib import register_exception
+from invenio.ext.logging import register_exception
try:
from invenio.pidstore_model import PersistentIdentifier
HAS_PIDSUPPORT = True
except ImportError:
HAS_PIDSUPPORT = False
def authorize_user(action, **params):
"""
Check if current user is authorized to perform the action.
"""
def _authorize_user(obj, dummy_eng):
from invenio.access_control_engine import acc_authorize_action
auth, message = acc_authorize_action(
current_user.get_id(),
action,
**dict((k, v() if callable(v) else v)
for (k, v) in params.items()))
if auth != 0:
current_app.logger.info(message)
abort(401)
return _authorize_user
def prefill_draft(form_class, draft_id='_default', clear=True):
"""
Fill draft values with values from pre-filled cache
"""
def _prefill_draft(obj, eng):
draft_cache = DepositionDraftCacheManager.get()
if draft_cache.has_data():
d = Deposition(obj)
draft_cache.fill_draft(
d, draft_id, form_class=form_class, clear=clear
)
d.update()
return _prefill_draft
def render_form(form_class, draft_id='_default'):
"""
Renders a form if the draft associated with it has not yet been completed.
:param form_class: The form class which should be rendered.
:param draft_id: The name of the draft to create. Must be specified if you
put more than two ``render_form'''s in your deposition workflow.
"""
def _render_form(obj, eng):
d = Deposition(obj)
draft = d.get_or_create_draft(draft_id, form_class=form_class)
if draft.is_completed():
eng.jumpCallForward(1)
else:
form = draft.get_form(validate_draft=draft.validate)
form.validate = True
d.set_render_context(dict(
template_name_or_list=form.get_template(),
deposition=d,
deposition_type=(
None if d.type.is_default() else d.type.get_identifier()
),
uuid=d.id,
draft=draft,
form=form,
my_depositions=Deposition.get_depositions(
current_user, type=d.type
),
))
d.update()
eng.halt('Wait for form submission.')
return _render_form
def create_recid():
"""
Create a new record id.
"""
def _create_recid(obj, dummy_eng):
d = Deposition(obj)
sip = d.get_latest_sip(include_sealed=False)
if sip is None:
raise Exception("No submission information package found.")
if 'recid' not in sip.metadata:
sip.metadata['recid'] = run_sql(
"INSERT INTO bibrec (creation_date, modification_date) "
"VALUES (NOW(), NOW())"
)
d.update()
return _create_recid
def mint_pid(pid_field='doi', pid_creator=None, pid_store_type='doi',
existing_pid_checker=None):
"""
Register a persistent identifier internally.
:param pid_field: The recjson key for where to look for a pre-reserved pid.
Defaults to 'pid'.
:param pid_creator: Callable taking one argument (the recjson) that when
called will generate and return a pid string.
:param pid_store_type: The PID store type. Defaults to 'doi'.
:param existing_pid_checker: A callable taking two arguments
(pid_str, recjson) that will check if an pid found using ``pid_field''
should be registered or not.
"""
if not HAS_PIDSUPPORT:
def _mint_pid_dummy(dummy_obj, dummy_eng):
pass
return _mint_pid_dummy
def _mint_pid(obj, dummy_eng):
d = Deposition(obj)
recjson = d.get_latest_sip(include_sealed=False).metadata
if 'recid' not in recjson:
raise Exception("'recid' not found in sip metadata.")
pid_text = None
pid = recjson.get(pid_field, None)
if not pid:
# No pid found in recjson, so create new pid with user supplied
# function.
current_app.logger.info("Registering pid %s" % pid_text)
pid_text = recjson[pid_field] = pid_creator(recjson)
else:
# Pid found - check if it should be minted
if existing_pid_checker and existing_pid_checker(pid, recjson):
pid_text = pid
# Create an assign pid internally - actually registration will happen
# asynchronously later.
if pid_text:
current_app.logger.info("Registering pid %s" % pid_text)
pid_obj = PersistentIdentifier.create(pid_store_type, pid_text)
if pid_obj is None:
pid_obj = PersistentIdentifier.get(pid_store_type, pid_text)
try:
pid_obj.assign("rec", recjson['recid'])
except Exception:
register_exception(alert_admin=True)
d.update()
return _mint_pid
def prepare_sip():
"""
Prepare a submission information package
"""
def _prepare_sip(obj, dummy_eng):
d = Deposition(obj)
sip = d.get_latest_sip(include_sealed=False)
if sip is None:
sip = d.create_sip()
sip.metadata['fft'] = sip.metadata['files']
del sip.metadata['files']
sip.agents = [Agent(role='creator', from_request_context=True)]
d.update()
return _prepare_sip
def finalize_record_sip():
"""
Finalizes the SIP by generating the MARC and storing it in the SIP.
"""
def _finalize_sip(obj, dummy_eng):
d = Deposition(obj)
sip = d.get_latest_sip(include_sealed=False)
jsonreader = JsonReader()
for k, v in sip.metadata.items():
jsonreader[k] = v
sip.package = jsonreader.legacy_export_as_marc()
current_app.logger.info(jsonreader['__error_messages'])
current_app.logger.info(sip.package)
d.update()
return _finalize_sip
def upload_record_sip():
"""
Generates the record from marc.
The function requires the marc to be generated,
so the function export_marc_from_json must have been called successfully
before
"""
def create(obj, dummy_eng):
current_app.logger.info("Upload sip")
d = Deposition(obj)
sip = d.get_latest_sip(include_sealed=False)
sip.seal()
tmp_file_fd, tmp_file_path = mkstemp(
prefix="webdeposit-%s-%s" % (d.id, sip.uuid),
suffix='.xml',
dir=CFG_TMPSHAREDDIR,
)
os.write(tmp_file_fd, sip.package)
os.close(tmp_file_fd)
# Trick to have access to task_sequence_id in subsequent tasks.
d.workflow_object.task_sequence_id = bibtask_allocate_sequenceid()
task_low_level_submission(
'bibupload', 'webdeposit',
'-r' if 'recid' in sip.metadata else '-i', tmp_file_path, '-P5',
'-I', str(d.workflow_object.task_sequence_id)
)
d.update()
return create
diff --git a/invenio/modules/encoder/profiles.py b/invenio/modules/encoder/profiles.py
index 646399c00..0fdc061f7 100644
--- a/invenio/modules/encoder/profiles.py
+++ b/invenio/modules/encoder/profiles.py
@@ -1,98 +1,98 @@
# -*- coding: utf-8 -*-
##
## This file is part of Invenio.
## Copyright (C) 2011 CERN.
##
## Invenio is free software; you can redistribute it and/or
## modify it under the terms of the GNU General Public License as
## published by the Free Software Foundation; either version 2 of the
## License, or (at your option) any later version.
##
## Invenio is distributed in the hope that it will be useful, but
## WITHOUT ANY WARRANTY; without even the implied warranty of
## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
## General Public License for more details.
##
## You should have received a copy of the GNU General Public License
## along with Invenio; if not, write to the Free Software Foundation, Inc.,
## 59 Temple Place, Suite 330, Boston, MA 02111-1307, USA.
""" BibEncode profile submodule """
import os
import shutil
from invenio.utils.json import json_decode_file
from invenio.modules.encoder.config import (
CFG_BIBENCODE_PROFILES_ENCODING,
CFG_BIBENCODE_PROFILES_EXTRACT,
CFG_BIBENCODE_PROFILES_ENCODING_LOCAL,
CFG_BIBENCODE_PROFILES_EXTRACT_LOCAL
)
from invenio.modules.encoder.utils import getval
-from invenio.errorlib import register_exception
+from invenio.ext.logging import register_exception
#-------------------#
# Encoding profiles #
#-------------------#
def get_encoding_profiles():
""" Returns a dictionary representation of the encoding profiles
"""
if not os.path.exists(CFG_BIBENCODE_PROFILES_ENCODING_LOCAL):
shutil.copy(CFG_BIBENCODE_PROFILES_ENCODING, CFG_BIBENCODE_PROFILES_ENCODING_LOCAL)
default_profiles = json_decode_file(CFG_BIBENCODE_PROFILES_ENCODING)
local_profiles = json_decode_file(CFG_BIBENCODE_PROFILES_ENCODING_LOCAL)
default_profiles.update(local_profiles)
return default_profiles
def get_encoding_profile(key):
""" Returns a dictionary representation of an encoding profile by key
"""
profile = get_encoding_profiles()[key]
def san_bitrate(bitrate):
""" Sanitizes bitrates
"""
if type(str()) == type(bitrate):
if bitrate.endswith('k'):
try:
bitrate = int(bitrate[:-1])
bitrate *= 1000
return int(bitrate)
except ValueError:
register_exception(alert_admin=True)
raise
elif type(int) == type(bitrate):
return bitrate
else:
register_exception(alert_admin=True)
raise
if getval(profile, 'videobitrate'):
profile['videobitrate'] = san_bitrate(getval(profile, 'videobitrate'))
if getval(profile, 'audiobitrate'):
profile['audiobitrate'] = san_bitrate(getval(profile, 'audiobitrate'))
return profile
#---------------------#
# Extraction profiles #
#---------------------#
def get_extract_profiles():
""" Returns a dictionary representation of the frame extraction profiles
"""
if not os.path.exists(CFG_BIBENCODE_PROFILES_EXTRACT_LOCAL):
shutil.copy(CFG_BIBENCODE_PROFILES_EXTRACT, CFG_BIBENCODE_PROFILES_EXTRACT_LOCAL)
default_profiles = json_decode_file(CFG_BIBENCODE_PROFILES_EXTRACT)
local_profiles = json_decode_file(CFG_BIBENCODE_PROFILES_EXTRACT_LOCAL)
default_profiles.update(local_profiles)
return default_profiles
def get_extract_profile(key):
""" Returns a dictionary representation of an extrtaction profile by key
"""
return get_extract_profiles()[key]
diff --git a/invenio/modules/error_log/__init__.py b/invenio/modules/error_log/__init__.py
deleted file mode 100644
index eb537e874..000000000
--- a/invenio/modules/error_log/__init__.py
+++ /dev/null
@@ -1,18 +0,0 @@
-# -*- coding: utf-8 -*-
-##
-## This file is part of Invenio.
-## Copyright (C) 2013 CERN.
-##
-## Invenio is free software; you can redistribute it and/or
-## modify it under the terms of the GNU General Public License as
-## published by the Free Software Foundation; either version 2 of the
-## License, or (at your option) any later version.
-##
-## Invenio is distributed in the hope that it will be useful, but
-## WITHOUT ANY WARRANTY; without even the implied warranty of
-## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
-## General Public License for more details.
-##
-## You should have received a copy of the GNU General Public License
-## along with Invenio; if not, write to the Free Software Foundation, Inc.,
-## 59 Temple Place, Suite 330, Boston, MA 02111-1307, USA.
diff --git a/invenio/modules/error_log/models.py b/invenio/modules/error_log/models.py
deleted file mode 100644
index ab2f359fa..000000000
--- a/invenio/modules/error_log/models.py
+++ /dev/null
@@ -1,50 +0,0 @@
-# -*- coding: utf-8 -*-
-#
-## This file is part of Invenio.
-## Copyright (C) 2011, 2012 CERN.
-##
-## Invenio is free software; you can redistribute it and/or
-## modify it under the terms of the GNU General Public License as
-## published by the Free Software Foundation; either version 2 of the
-## License, or (at your option) any later version.
-##
-## Invenio is distributed in the hope that it will be useful, but
-## WITHOUT ANY WARRANTY; without even the implied warranty of
-## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
-## General Public License for more details.
-##
-## You should have received a copy of the GNU General Public License
-## along with Invenio; if not, write to the Free Software Foundation, Inc.,
-## 59 Temple Place, Suite 330, Boston, MA 02D111-1307, USA.
-
-"""
-errorlib database models.
-"""
-
-# General imports.
-from invenio.ext.sqlalchemy import db
-
-# Create your models here.
-
-class HstEXCEPTION(db.Model):
- """Represents a HstEXCEPTION record."""
- __tablename__ = 'hstEXCEPTION'
- id = db.Column(db.Integer(15, unsigned=True), nullable=False,
- primary_key=True, autoincrement=True)
- name = db.Column(db.String(50), nullable=False)
- filename = db.Column(db.String(255), nullable=True)
- line = db.Column(db.Integer(9), nullable=True)
- last_seen = db.Column(db.DateTime, nullable=False,
- server_default='1900-01-01 00:00:00', index=True)
- last_notified = db.Column(db.DateTime, nullable=False,
- server_default='1900-01-01 00:00:00', index=True)
- counter = db.Column(db.Integer(15), nullable=False,
- server_default='0')
- total = db.Column(db.Integer(15), nullable=False,
- server_default='0', index=True)
-
- __table_args__ = (db.Index('name', name, filename, line, unique=True),
- db.Model.__table_args__)
-
-
-__all__ = ['HstEXCEPTION']
diff --git a/invenio/modules/formatter/__init__.py b/invenio/modules/formatter/__init__.py
index 199b26bb2..35a25ad55 100644
--- a/invenio/modules/formatter/__init__.py
+++ b/invenio/modules/formatter/__init__.py
@@ -1,662 +1,662 @@
# -*- coding: utf-8 -*-
##
## This file is part of Invenio.
## Copyright (C) 2006, 2007, 2008, 2009, 2010, 2011, 2012, 2013 CERN.
##
## Invenio is free software; you can redistribute it and/or
## modify it under the terms of the GNU General Public License as
## published by the Free Software Foundation; either version 2 of the
## License, or (at your option) any later version.
##
## Invenio is distributed in the hope that it will be useful, but
## WITHOUT ANY WARRANTY; without even the implied warranty of
## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
## General Public License for more details.
##
## You should have received a copy of the GNU General Public License
## along with Invenio; if not, write to the Free Software Foundation, Inc.,
## 59 Temple Place, Suite 330, Boston, MA 02111-1307, USA.
"""
Format records using chosen format.
The main APIs are:
- format_record
- format_records
- create_excel
- get_output_format_content_type
This module wraps the BibFormat engine and its associated
functions. This is also where special formatting functions of multiple
records (that the engine does not handle, as it works on a single
record basis) should be defined, with name C{def create_*}.
@see: bibformat_utils.py
"""
__revision__ = "$Id$"
import getopt
import sys
import zlib
from invenio.base.globals import cfg
-from invenio.errorlib import register_exception
from invenio.messages import wash_language
+from invenio.ext.logging import register_exception
from invenio.ext.template import render_template_to_string
# Functions to format a single record
##
def format_record(recID, of, ln=None, verbose=0, search_pattern=None,
xml_record=None, user_info=None, on_the_fly=False):
"""
Format a record in given output format.
Return a formatted version of the record in the specified
language, search pattern, and with the specified output format.
The function will define which format template must be applied.
The record to be formatted can be specified with its ID (with
'recID' parameter) or given as XML representation (with
'xml_record' parameter). If 'xml_record' is specified 'recID' is
ignored (but should still be given for reference. A dummy recid 0
or -1 could be used).
'user_info' allows to grant access to some functionalities on a
page depending on the user's priviledges. The 'user_info' object
makes sense only in the case of on-the-fly formatting. 'user_info'
is the same object as the one returned by
'webuser.collect_user_info(req)'
@param recID: the ID of record to format.
@type recID: int
@param of: an output format code (or short identifier for the output format)
@type of: string
@param ln: the language to use to format the record
@type ln: string
@param verbose: the level of verbosity from 0 to 9 (O: silent,
5: errors,
7: errors and warnings, stop if error in format elements
9: errors and warnings, stop if error (debug mode ))
@type verbose: int
@param search_pattern: list of strings representing the user request in web interface
@type search_pattern: list(string)
@param xml_record: an xml string represention of the record to format
@type xml_record: string or None
@param user_info: the information of the user who will view the formatted page (if applicable)
@param on_the_fly: if False, try to return an already preformatted version of the record in the database
@type on_the_fly: boolean
@return: formatted record
@rtype: string
"""
ln = ln or cfg['CFG_SITE_LANG']
from invenio.search_engine import record_exists
if search_pattern is None:
search_pattern = []
out = ""
if verbose == 9:
out += """\n<span class="quicknote">
Formatting record %i with output format %s.
</span>""" % (recID, of)
############### FIXME: REMOVE WHEN MIGRATION IS DONE ###############
if cfg['CFG_BIBFORMAT_USE_OLD_BIBFORMAT'] and cfg['CFG_PATH_PHP']:
from . import engine as bibformat_engine
return bibformat_engine.call_old_bibformat(recID, of=of, on_the_fly=on_the_fly)
############################# END ##################################
if not on_the_fly and \
(ln == cfg['CFG_SITE_LANG'] or \
of.lower() == 'xm' or \
cfg['CFG_BIBFORMAT_USE_OLD_BIBFORMAT'] or \
(of.lower() in cfg['CFG_BIBFORMAT_DISABLE_I18N_FOR_CACHED_FORMATS'])) and \
record_exists(recID) != -1:
# Try to fetch preformatted record. Only possible for records
# formatted in CFG_SITE_LANG language (other are never
# stored), or of='xm' which does not depend on language.
# Exceptions are made for output formats defined in
# CFG_BIBFORMAT_DISABLE_I18N_FOR_CACHED_FORMATS, which are
# always served from the same cache for any language. Also,
# do not fetch from DB when record has been deleted: we want
# to return an "empty" record in that case
from . import api
res = api.get_preformatted_record(recID, of)
if res is not None:
# record 'recID' is formatted in 'of', so return it
if verbose == 9:
last_updated = api.get_preformatted_record_date(recID, of)
out += """\n<br/><span class="quicknote">
Found preformatted output for record %i (cache updated on %s).
</span><br/>""" % (recID, last_updated)
if of.lower() == 'xm':
res = filter_hidden_fields(res, user_info)
# try to replace language links in pre-cached res, if applicable:
if ln != cfg['CFG_SITE_LANG'] and of.lower() in cfg['CFG_BIBFORMAT_DISABLE_I18N_FOR_CACHED_FORMATS']:
# The following statements try to quickly replace any
# language arguments in URL links. Not an exact
# science, but should work most of the time for most
# of the formats, with not too many false positives.
# We don't have time to parse output much here.
res = res.replace('?ln=' + cfg['CFG_SITE_LANG'], '?ln=' + ln)
res = res.replace('&ln=' + cfg['CFG_SITE_LANG'], '&ln=' + ln)
res = res.replace('&amp;ln=' + cfg['CFG_SITE_LANG'], '&amp;ln=' + ln)
out += res
return out
else:
if verbose == 9:
out += """\n<br/><span class="quicknote">
No preformatted output found for record %s.
</span>"""% recID
# Live formatting of records in all other cases
if verbose == 9:
out += """\n<br/><span class="quicknote">
Formatting record %i on-the-fly.
</span>""" % recID
try:
from . import engine as bibformat_engine
out += bibformat_engine.format_record(recID=recID,
of=of,
ln=ln,
verbose=verbose,
search_pattern=search_pattern,
xml_record=xml_record,
user_info=user_info)
if of.lower() == 'xm':
out = filter_hidden_fields(out, user_info)
return out
except Exception, e:
register_exception(prefix="An error occured while formatting record %i in %s" % \
(recID, of),
alert_admin=True)
#Failsafe execution mode
import invenio.template
websearch_templates = invenio.template.load('websearch')
if verbose == 9:
out += """\n<br/><span class="quicknote">
An error occured while formatting record %i. (%s)
</span>""" % (recID, str(e))
if of.lower() == 'hd':
if verbose == 9:
out += """\n<br/><span class="quicknote">
Formatting record %i with websearch_templates.tmpl_print_record_detailed.
</span><br/>""" % recID
return out + websearch_templates.tmpl_print_record_detailed(
ln = ln,
recID = recID,
)
if verbose == 9:
out += """\n<br/><span class="quicknote">
Formatting record %i with websearch_templates.tmpl_print_record_brief.
</span><br/>""" % recID
return out + websearch_templates.tmpl_print_record_brief(ln=ln,
recID=recID,
)
def record_get_xml(recID, format='xm', decompress=zlib.decompress):
"""
Returns an XML string of the record given by recID.
The function builds the XML directly from the database,
without using the standard formatting process.
'format' allows to define the flavour of XML:
- 'xm' for standard XML
- 'marcxml' for MARC XML
- 'oai_dc' for OAI Dublin Core
- 'xd' for XML Dublin Core
If record does not exist, returns empty string.
@param recID: the id of the record to retrieve
@param format: the format to use
@param decompress: the library to use to decompress cache from DB
@return: the xml string of the record
"""
from . import utils as bibformat_utils
return bibformat_utils.record_get_xml(recID=recID, format=format, decompress=decompress)
# Helper functions to do complex formatting of multiple records
#
# You should not modify format_records when adding a complex
# formatting of multiple records, but add a create_* method
# that relies on format_records to do the formatting.
##
def format_records(recIDs, of, ln=None, verbose=0, search_pattern=None,
xml_records=None, user_info=None, record_prefix=None,
record_separator=None, record_suffix=None, prologue="",
epilogue="", req=None, on_the_fly=False):
"""
Format records given by a list of record IDs or a list of records
as xml. Adds a prefix before each record, a suffix after each
record, plus a separator between records.
Also add optional prologue and epilogue to the complete formatted
list.
You can either specify a list of record IDs to format, or a list
of xml records, but not both (if both are specified recIDs is
ignored).
'record_separator' is a function that returns a string as
separator between records. The function must take an integer as
unique parameter, which is the index in recIDs (or xml_records) of
the record that has just been formatted. For example separator(i)
must return the separator between recID[i] and recID[i+1].
Alternatively separator can be a single string, which will be used
to separate all formatted records. The same applies to
'record_prefix' and 'record_suffix'.
'req' is an optional parameter on which the result of the function
are printed lively (prints records after records) if it is given.
Note that you should set 'req' content-type by yourself, and send
http header before calling this function as it will not do it.
This function takes the same parameters as 'format_record' except for:
@param recIDs: a list of record IDs
@type recIDs: list(int)
@param of: an output format code (or short identifier for the output format)
@type of: string
@param ln: the language to use to format the record
@type ln: string
@param verbose: the level of verbosity from 0 to 9 (0: silent,
5: errors,
7: errors and warnings, stop if error in format elements
9: errors and warnings, stop if error (debug mode ))
@type verbose: int
@param search_pattern: list of strings representing the user request in web interface
@type search_pattern: list(string)
@param user_info: the information of the user who will view the formatted page (if applicable)
@param xml_records: a list of xml string representions of the records to format
@type xml_records: list(string)
@param record_prefix: a string printed before B{each} formatted records (n times)
@type record_prefix: string
@param record_suffix: a string printed after B{each} formatted records (n times)
@type record_suffix: string
@param prologue: a string printed at the beginning of the complete formatted records (1x)
@type prologue: string
@param epilogue: a string printed at the end of the complete formatted output (1x)
@type epilogue: string
@param record_separator: either a string or a function that returns string to join formatted records
@param record_separator: string or function
@param req: an optional request object where to print records
@param on_the_fly: if False, try to return an already preformatted version of the record in the database
@type on_the_fly: boolean
@rtype: string
"""
if req is not None:
req.write(prologue)
formatted_records = ''
#Fill one of the lists with Nones
if xml_records is not None:
recIDs = map(lambda x:None, xml_records)
else:
xml_records = map(lambda x:None, recIDs)
total_rec = len(recIDs)
last_iteration = False
for i in range(total_rec):
if i == total_rec - 1:
last_iteration = True
#Print prefix
if record_prefix is not None:
if isinstance(record_prefix, str):
formatted_records += record_prefix
if req is not None:
req.write(record_prefix)
else:
string_prefix = record_prefix(i)
formatted_records += string_prefix
if req is not None:
req.write(string_prefix)
#Print formatted record
ln = ln or cfg['CFG_SITE_LANG']
formatted_record = format_record(recIDs[i], of, ln, verbose, \
search_pattern, xml_records[i],\
user_info, on_the_fly)
formatted_records += formatted_record
if req is not None:
req.write(formatted_record)
#Print suffix
if record_suffix is not None:
if isinstance(record_suffix, str):
formatted_records += record_suffix
if req is not None:
req.write(record_suffix)
else:
string_suffix = record_suffix(i)
formatted_records += string_suffix
if req is not None:
req.write(string_suffix)
#Print separator if needed
if record_separator is not None and not last_iteration:
if isinstance(record_separator, str):
formatted_records += record_separator
if req is not None:
req.write(record_separator)
else:
string_separator = record_separator(i)
formatted_records += string_separator
if req is not None:
req.write(string_separator)
if req is not None:
req.write(epilogue)
return prologue + formatted_records + epilogue
def create_excel(recIDs, req=None, ln=None, ot=None, ot_sep="; ", user_info=None):
"""
Returns an Excel readable format containing the given recIDs.
If 'req' is given, also prints the output in 'req' while individual
records are being formatted.
This method shows how to create a custom formatting of multiple
records.
The excel format is a basic HTML table that most spreadsheets
applications can parse.
If 'ot' is given, the BibFormat engine is overridden and the
output is produced on the basis of the fields that 'ot' defines
(see search_engine.perform_request_search(..) 'ot' param).
@param req: the request object
@param recIDs: a list of record IDs
@param ln: language
@param ot: a list of fields that should be included in the excel output as columns(see perform_request_search 'ot' param)
@param ot_sep: a separator used to separate values for the same record, in the same columns, if any
@param user_info: the user_info dictionary
@return: a string in Excel format
"""
from . import utils as bibformat_utils
# Prepare the column headers to display in the Excel file
column_headers_list = ['Title',
'Authors',
'Addresses',
'Affiliation',
'Date',
'Publisher',
'Place',
'Abstract',
'Keywords',
'Notes']
# Prepare Content
column_headers = '</b></td><td style="border-color:black; border-style:solid; border-width:thin; background-color:black;color:white"><b>'.join(column_headers_list) + ''
column_headers = '<table style="border-collapse: collapse;">\n'+ '<td style="border-color:black; border-style:solid; border-width:thin; background-color:black;color:white"><b>' + column_headers + '</b></td>'
footer = '</table>'
# Apply content_type and print column headers
if req is not None:
req.content_type = get_output_format_content_type('excel')
req.headers_out["Content-Disposition"] = "inline; filename=%s" % 'results.xls'
req.send_http_header()
if ot is not None and len(ot) > 0:
# Skip BibFormat engine, produce our own output based on
# specified fields. Each field will be a column of the
# output. If a field has multiple values, then they are joined
# into the same cell.
out = "<table>"
if req: req.write("<table>")
for recID in recIDs:
row = '<tr>'
row += '<td><a href="%(CFG_SITE_URL)s/%(CFG_SITE_RECORD)s/%(recID)i">%(recID)i</a></td>' % \
{'recID': recID, 'CFG_SITE_RECORD': cfg['CFG_SITE_RECORD'],
'CFG_SITE_URL': cfg['CFG_SITE_URL']}
for field in ot:
row += '<td>' + \
ot_sep.join(bibformat_utils.get_all_fieldvalues(recID, field)) + \
'</td>'
row += '</tr>'
out += row
if req: req.write(row)
out += '</table>'
if req: req.write('</table>')
return out
#Format the records
excel_formatted_records = format_records(recIDs, 'excel',
ln=ln or cfg['CFG_SITE_LANG'],
record_separator='\n',
prologue = '<meta http-equiv="Content-Type" content="text/html; charset=utf-8"><table>',
epilogue = footer,
req=req,
user_info=user_info)
return excel_formatted_records
# Utility functions
##
def make_filter_line(hide_tag):
"""
Generate a line used for filtering MARCXML
"""
hide_tag = str(hide_tag)
tag = hide_tag[:3]
ind1 = hide_tag[3:4]
ind2 = hide_tag[4:5]
if ind1 == "_":
ind1 = " "
if ind2 == "_":
ind2 = " "
if not ind1 and not ind2:
return 'datafield tag="%s"' % tag
if not ind2 and ind1:
return 'datafield tag="%s" ind1="%s"' % (tag, ind1)
return 'datafield tag="%s" ind1="%s" ind2="%s"' % (tag, ind1, ind2)
def filter_hidden_fields(recxml, user_info=None, filter_tags=None,
force_filtering=False):
"""
Filter out tags specified by filter_tags from MARCXML. If the user
is allowed to run bibedit, then filter nothing, unless
force_filtering is set to True.
@param recxml: marcxml presentation of the record
@param user_info: user information; if None, then assume invoked via CLI with all rights
@param filter_tags: list of MARC tags to be filtered
@param force_filtering: do we force filtering regardless of user rights?
@return: recxml without the hidden fields
"""
filter_tags = filter_tags or cfg['CFG_BIBFORMAT_HIDDEN_TAGS']
if force_filtering:
pass
else:
if user_info is None:
#by default
return recxml
else:
from invenio.access_control_engine import acc_authorize_action
if (acc_authorize_action(user_info, 'runbibedit')[0] == 0):
#no need to filter
return recxml
#filter..
out = ""
omit = False
filter_lines = map(make_filter_line, filter_tags)
for line in recxml.splitlines(True):
#check if this block needs to be omitted
for hide_line in filter_lines:
if line.count(hide_line):
omit = True
if not omit:
out += line
if omit and line.count('</datafield>'):
omit = False
return out
def get_output_format_content_type(of, default_content_type="text/html"):
"""
Returns the content type (for example 'text/html' or 'application/ms-excel') \
of the given output format.
@param of: the code of output format for which we want to get the content type
@param default_content_type: default content-type when content-type was not set up
@return: the content-type to use for this output format
"""
from . import api
content_type = api.get_output_format_content_type(of)
if content_type == '':
content_type = default_content_type
return content_type
def print_records(recIDs, of='hb', ln=None, verbose=0,
search_pattern='', on_the_fly=False, **ctx):
"""
Returns records using Jinja template.
"""
import time
from math import ceil
from flask import request
from invenio.modules.formatter.engine import format_record
from invenio.modules.search.models import Format
from invenio.utils.pagination import Pagination
from invenio.modules.formatter.engine import TEMPLATE_CONTEXT_FUNCTIONS_CACHE
of = of.lower()
jrec = request.values.get('jrec', ctx.get('jrec', 1), type=int)
rg = request.values.get('rg', ctx.get('rg', 10), type=int)
ln = ln or wash_language(request.values.get('ln', cfg['CFG_SITE_LANG']))
pages = int(ceil(jrec / float(rg))) if rg > 0 else 1
context = dict(
of=of, jrec=jrec, rg=rg, ln=ln,
facets={},
time=time,
recids=recIDs,
pagination=Pagination(pages, rg, ctx.get('records', len(recIDs))),
verbose=verbose,
export_formats=Format.get_export_formats(),
format_record=format_record,
**TEMPLATE_CONTEXT_FUNCTIONS_CACHE.template_context_functions
)
context.update(ctx)
return render_template_to_string(
['format/records/%s.tpl' % of,
'format/records/%s.tpl' % of[0],
'format/records/%s.tpl' % get_output_format_content_type(of).
replace('/', '_')],
**context)
def usage(exitcode=1, msg=""):
"""
Prints usage info.
@param exitcode: exit code to use (eg. 1 for error, 0 for okay)
@param msg: message to print
@return: exit the process
"""
if msg:
sys.stderr.write("Error: %s.\n" % msg)
print """BibFormat: outputs the result of the formatting of a record.
Usage: bibformat required [options]
Examples:
$ bibformat -i 10 -o HB
$ bibformat -i 10,11,13 -o HB
$ bibformat -i 10:13
$ bibformat -i 10 -o HB -v 9
Required:
-i, --id=ID[ID2,ID3:ID5] ID (or range of IDs) of the record(s) to be formatted.
Options:
-o, --output=CODE short code of the output format used for formatting (default HB).
-l, --lang=LN language used for formatting.
-y, --onthefly on-the-fly formatting, avoiding caches created by BibReformat.
General options:
-h, --help print this help and exit
-v, --verbose=LEVEL verbose level (from 0 to 9, default 0)
-V --version print the script version
"""
sys.exit(exitcode)
def main():
"""
Main entry point for biformat via command line
@return: formatted record(s) as specified by options, or help/errors
"""
options = {} # will hold command-line options
options["verbose"] = 0
options["onthefly"] = False
options["lang"] = CFG_SITE_LANG
options["output"] = "HB"
options["recID"] = None
try:
opts, args = getopt.getopt(sys.argv[1:],
"hVv:yl:i:o:",
["help",
"version",
"verbose=",
"onthefly",
"lang=",
"id=",
"output="])
except getopt.GetoptError, err:
usage(1, err)
pass
try:
for opt in opts:
if opt[0] in ["-h", "--help"]:
usage(0)
elif opt[0] in ["-V", "--version"]:
print __revision__
sys.exit(0)
elif opt[0] in ["-v", "--verbose"]:
options["verbose"] = int(opt[1])
elif opt[0] in ["-y", "--onthefly"]:
options["onthefly"] = True
elif opt[0] in ["-l", "--lang"]:
options["lang"] = opt[1]
elif opt[0] in ["-i", "--id"]:
recIDs = []
for recID in opt[1].split(','):
if ":" in recID:
start = int(recID.split(':')[0])
end = int(recID.split(':')[1])
recIDs.extend(range(start, end))
else:
recIDs.append(int(recID))
options["recID"] = recIDs
elif opt[0] in ["-o", "--output"]:
options["output"] = opt[1]
if options["recID"] == None:
usage(1, "-i argument is needed")
except StandardError, e:
usage(e)
print format_records(recIDs=options["recID"],
of=options["output"],
ln=options["lang"],
verbose=options["verbose"],
on_the_fly=options["onthefly"])
return
if __name__ == "__main__":
main()
diff --git a/invenio/modules/formatter/engine.py b/invenio/modules/formatter/engine.py
index 4ca9d8ab5..5bbd0c0cd 100644
--- a/invenio/modules/formatter/engine.py
+++ b/invenio/modules/formatter/engine.py
@@ -1,2243 +1,2243 @@
# -*- coding: utf-8 -*-
##
## This file is part of Invenio.
## Copyright (C) 2006, 2007, 2008, 2009, 2010, 2011, 2013 CERN.
##
## Invenio is free software; you can redistribute it and/or
## modify it under the terms of the GNU General Public License as
## published by the Free Software Foundation; either version 2 of the
## License, or (at your option) any later version.
##
## Invenio is distributed in the hope that it will be useful, but
## WITHOUT ANY WARRANTY; without even the implied warranty of
## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
## General Public License for more details.
##
## You should have received a copy of the GNU General Public License
## along with Invenio; if not, write to the Free Software Foundation, Inc.,
## 59 Temple Place, Suite 330, Boston, MA 02111-1307, USA.
"""
Formats a single XML Marc record using specified format.
There is no API for the engine. Instead use module L{bibformat}.
You can have a look at the various escaping modes available in
X{BibFormatObject} in function L{escape_field}
Still it is useful sometimes for debugging purpose to use the
L{BibFormatObject} class directly. For eg:
>>> from invenio.modules.formatter.engine import BibFormatObject
>>> bfo = BibFormatObject(102)
>>> bfo.field('245__a')
The order Rodentia in South America
>>> from invenio.modules.formatter.format_elements import bfe_title
>>> bfe_title.format_element(bfo)
The order Rodentia in South America
@see: bibformat.py, bibformat_utils.py
"""
__revision__ = "$Id$"
import re
import sys
import os
import inspect
import traceback
import zlib
import cgi
import types
from flask import has_app_context
from operator import itemgetter
from werkzeug.utils import cached_property
from invenio.base.globals import cfg
from invenio.base.utils import (autodiscover_template_context_functions,
autodiscover_format_elements)
from invenio.config import \
CFG_PATH_PHP, \
CFG_BINDIR, \
CFG_SITE_LANG
-from invenio.errorlib import \
+from invenio.ext.logging import \
register_exception
from invenio.legacy.bibrecord import \
create_record, \
record_get_field_instances, \
record_get_field_value, \
record_get_field_values, \
record_xml_output
from invenio.bibformat_xslt_engine import format
from invenio.dbquery import run_sql
from invenio.base.i18n import \
language_list_long, \
wash_language, \
gettext_set_language
from . import api as bibformat_dblayer
from .config import \
CFG_BIBFORMAT_TEMPLATES_DIR, \
CFG_BIBFORMAT_FORMAT_TEMPLATE_EXTENSION, \
CFG_BIBFORMAT_FORMAT_JINJA_TEMPLATE_EXTENSION, \
CFG_BIBFORMAT_FORMAT_OUTPUT_EXTENSION, \
CFG_BIBFORMAT_OUTPUTS_PATH, \
InvenioBibFormatError
from invenio.modules.formatter.utils import \
record_get_xml, \
parse_tag
from invenio.htmlutils import \
HTMLWasher, \
CFG_HTML_BUFFER_ALLOWED_TAG_WHITELIST, \
CFG_HTML_BUFFER_ALLOWED_ATTRIBUTE_WHITELIST
from invenio.bibknowledge import get_kbr_values
from invenio.ext.template import render_template_to_string
from HTMLParser import HTMLParseError
from invenio.shellutils import escape_shell_arg
if CFG_PATH_PHP: #Remove when call_old_bibformat is removed
from xml.dom import minidom
import tempfile
# Cache for data we have already read and parsed
format_templates_cache = {}
format_elements_cache = {}
format_outputs_cache = {}
html_field = '<!--HTML-->' # String indicating that field should be
# treated as HTML (and therefore no escaping of
# HTML tags should occur.
# Appears in some field values.
washer = HTMLWasher() # Used to remove dangerous tags from HTML
# sources
# Regular expression for finding <lang>...</lang> tag in format templates
pattern_lang = re.compile(r'''
<lang #<lang tag (no matter case)
\s* #any number of white spaces
> #closing <lang> start tag
(?P<langs>.*?) #anything but the next group (greedy)
(</lang\s*>) #end tag
''', re.IGNORECASE | re.DOTALL | re.VERBOSE)
# Builds regular expression for finding each known language in <lang> tags
ln_pattern_text = r"<("
for lang in language_list_long(enabled_langs_only=False):
ln_pattern_text += lang[0] +r"|"
ln_pattern_text = ln_pattern_text.rstrip(r"|")
ln_pattern_text += r")>(.*?)</\1>"
ln_pattern = re.compile(ln_pattern_text, re.IGNORECASE | re.DOTALL)
# Regular expression for finding text to be translated
translation_pattern = re.compile(r'_\((?P<word>.*?)\)_', \
re.IGNORECASE | re.DOTALL | re.VERBOSE)
# Regular expression for finding <name> tag in format templates
pattern_format_template_name = re.compile(r'''
<name #<name tag (no matter case)
\s* #any number of white spaces
> #closing <name> start tag
(?P<name>.*?) #name value. any char that is not end tag
(</name\s*>)(\n)? #end tag
''', re.IGNORECASE | re.DOTALL | re.VERBOSE)
# Regular expression for finding <description> tag in format templates
pattern_format_template_desc = re.compile(r'''
<description #<decription tag (no matter case)
\s* #any number of white spaces
> #closing <description> start tag
(?P<desc>.*?) #description value. any char that is not end tag
</description\s*>(\n)? #end tag
''', re.IGNORECASE | re.DOTALL | re.VERBOSE)
# Regular expression for finding <BFE_ > tags in format templates
pattern_tag = re.compile(r'''
<BFE_ #every special tag starts with <BFE_ (no matter case)
(?P<function_name>[^/\s]+) #any char but a space or slash
\s* #any number of spaces
(?P<params>(\s* #params here
(?P<param>([^=\s])*)\s* #param name: any chars that is not a white space or equality. Followed by space(s)
=\s* #equality: = followed by any number of spaces
(?P<sep>[\'"]) #one of the separators
(?P<value>.*?) #param value: any chars that is not a separator like previous one
(?P=sep) #same separator as starting one
)*) #many params
\s* #any number of spaces
(/)?> #end of the tag
''', re.IGNORECASE | re.DOTALL | re.VERBOSE)
# Regular expression for finding params inside <BFE_ > tags in format templates
pattern_function_params = re.compile('''
(?P<param>([^=\s])*)\s* # Param name: any chars that is not a white space or equality. Followed by space(s)
=\s* # Equality: = followed by any number of spaces
(?P<sep>[\'"]) # One of the separators
(?P<value>.*?) # Param value: any chars that is not a separator like previous one
(?P=sep) # Same separator as starting one
''', re.VERBOSE | re.DOTALL )
# Regular expression for finding format elements "params" attributes
# (defined by @param)
pattern_format_element_params = re.compile('''
@param\s* # Begins with AT param keyword followed by space(s)
(?P<name>[^\s=]*):\s* # A single keyword and comma, then space(s)
#(=\s*(?P<sep>[\'"]) # Equality, space(s) and then one of the separators
#(?P<default>.*?) # Default value: any chars that is not a separator like previous one
#(?P=sep) # Same separator as starting one
#)?\s* # Default value for param is optional. Followed by space(s)
(?P<desc>.*) # Any text that is not end of line (thanks to MULTILINE parameter)
''', re.VERBOSE | re.MULTILINE)
# Regular expression for finding format elements "see also" attribute
# (defined by @see)
pattern_format_element_seealso = re.compile('''@see:\s*(?P<see>.*)''',
re.VERBOSE | re.MULTILINE)
#Regular expression for finding 2 expressions in quotes, separated by
#comma (as in template("1st","2nd") )
#Used when parsing output formats
## pattern_parse_tuple_in_quotes = re.compile('''
## (?P<sep1>[\'"])
## (?P<val1>.*)
## (?P=sep1)
## \s*,\s*
## (?P<sep2>[\'"])
## (?P<val2>.*)
## (?P=sep2)
## ''', re.VERBOSE | re.MULTILINE)
sub_non_alnum = re.compile('[^0-9a-zA-Z]+')
fix_tag_name = lambda s: sub_non_alnum.sub('_', s.lower())
from invenio.utils.memoise import memoize
class LazyTemplateContextFunctionsCache(object):
"""Loads bibformat elements using plugin builder and caches results."""
@cached_property
def template_context_functions(self):
"""Returns template context functions"""
modules = autodiscover_template_context_functions()
elem = {}
for m in modules:
register_func = getattr(m, 'template_context_function', None)
if register_func and isinstance(register_func, types.FunctionType):
elem[m.__name__.split('.')[-1]] = register_func
return elem
@memoize
def bibformat_elements(self, packages=None):
"""Returns bibformat elements."""
if cfg['CFG_BIBFORMAT_ELEMENTS_IMPORT_PATH'] is not None:
packages = [cfg['CFG_BIBFORMAT_ELEMENTS_IMPORT_PATH']]
modules = autodiscover_format_elements(packages=packages, silent=True)
elem = {}
for m in modules:
name = m.__name__.split('.')[-1]
register_func = getattr(m, 'format_element',
getattr(m, 'format', None))
escape_values = getattr(m, 'escape_values', None)
if register_func and isinstance(register_func, types.FunctionType):
register_func._escape_values = escape_values
elem[name] = register_func
return elem
#@cached_property
#def bibformat_elements(self):
# return self._bibformat_elements()
@cached_property
def functions(self):
def insert(name):
def _bfe_element(bfo, **kwargs):
# convert to utf-8 for legacy app
kwargs = dict((k, v.encode('utf-8') if isinstance(v, unicode) else v)
for k, v in kwargs.iteritems())
format_element = get_format_element(name)
(out, dummy) = eval_format_element(format_element,
bfo,
kwargs)
# returns unicode for jinja2
return out.decode('utf-8')
return _bfe_element
# Old bibformat templates
tfn_from_files = dict((name.lower(), insert(name.lower()))
for name in self.bibformat_elements().keys())
# Update with new template context functions
tfn_from_files.update(self.template_context_functions)
bfe_from_tags = {}
if has_app_context():
from invenio.ext.sqlalchemy import db
from invenio.modules.search.models import Tag
# get functions from tag table
bfe_from_tags = dict(('bfe_'+fix_tag_name(name),
insert(fix_tag_name(name)))
for name in map(itemgetter(0),
db.session.query(Tag.name).all()))
# overwrite functions from tag table with functions from files
bfe_from_tags.update(tfn_from_files)
return bfe_from_tags
TEMPLATE_CONTEXT_FUNCTIONS_CACHE = LazyTemplateContextFunctionsCache()
def call_old_bibformat(recID, of="HD", on_the_fly=False, verbose=0):
"""
FIXME: REMOVE FUNCTION WHEN MIGRATION IS DONE
Calls BibFormat for the record RECID in the desired output format 'of'.
Note: this functions always try to return HTML, so when
bibformat returns XML with embedded HTML format inside the tag
FMT $g, as is suitable for prestoring output formats, we
perform un-XML-izing here in order to return HTML body only.
@param recID: record ID to format
@param of: output format to be used for formatting
@param on_the_fly: if False, try to return an already preformatted version of the record in the database
@param verbose: verbosity
@return: a formatted output using old BibFormat
"""
out = ""
res = []
if not on_the_fly:
# look for formatted record existence:
query = "SELECT value, last_updated FROM bibfmt WHERE "\
"id_bibrec='%s' AND format='%s'" % (recID, of)
res = run_sql(query, None, 1)
if res:
# record 'recID' is formatted in 'of', so print it
if verbose == 9:
last_updated = res[0][1]
out += """\n<br/><span class="quicknote">
Found preformatted output for record %i (cache updated on %s).
</span>""" % (recID, last_updated)
decompress = zlib.decompress
return "%s" % decompress(res[0][0])
else:
# record 'recID' is not formatted in 'of',
# so try to call BibFormat on the fly or use default format:
if verbose == 9:
out += """\n<br/><span class="quicknote">
Formatting record %i on-the-fly with old BibFormat.
</span><br/>""" % recID
# Retrieve MARCXML
# Build it on-the-fly only if 'call_old_bibformat' was called
# with format=xm and on_the_fly=True
xm_record = record_get_xml(recID, 'xm',
on_the_fly=(on_the_fly and of == 'xm'))
## import platform
## # Some problem have been found using either popen() or os.system().
## # Here is a temporary workaround until the issue is solved.
## if platform.python_compiler().find('Red Hat') > -1:
## # use os.system
(result_code, result_path) = tempfile.mkstemp()
command = "( %s/bibformat otype=%s ) > %s" % \
(CFG_BINDIR, escape_shell_arg(of), result_path)
(xm_code, xm_path) = tempfile.mkstemp()
xm_file = open(xm_path, "w")
xm_file.write(xm_record)
xm_file.close()
command = command + " <" + xm_path
os.system(command)
result_file = open(result_path,"r")
bibformat_output = result_file.read()
result_file.close()
os.close(result_code)
os.remove(result_path)
os.close(xm_code)
os.remove(xm_path)
## else:
## # use popen
## pipe_input, pipe_output, pipe_error = os.popen3(["%s/bibformat" % CFG_BINDIR,
## "otype=%s" % format],
## 'rw')
## pipe_input.write(xm_record)
## pipe_input.flush()
## pipe_input.close()
## bibformat_output = pipe_output.read()
## pipe_output.close()
## pipe_error.close()
if bibformat_output.startswith("<record>"):
dom = minidom.parseString(bibformat_output)
for e in dom.getElementsByTagName('subfield'):
if e.getAttribute('code') == 'g':
for t in e.childNodes:
out += t.data.encode('utf-8')
else:
out += bibformat_output
return out
def format_record(recID, of, ln=CFG_SITE_LANG, verbose=0,
search_pattern=None, xml_record=None, user_info=None, qid=""):
"""
Formats a record given output format. Main entry function of
bibformat engine.
Returns a formatted version of the record in the specified
language, search pattern, and with the specified output format.
The function will define which format template must be applied.
You can either specify an record ID to format, or give its xml
representation. if 'xml_record' is not None, then use it instead
of recID.
'user_info' allows to grant access to some functionalities on a
page depending on the user's priviledges. 'user_info' is the same
object as the one returned by 'webuser.collect_user_info(req)'
@param recID: the ID of record to format
@param of: an output format code (or short identifier for the output format)
@param ln: the language to use to format the record
@param verbose: the level of verbosity from 0 to 9 (O: silent,
5: errors,
7: errors and warnings, stop if error in format elements
9: errors and warnings, stop if error (debug mode ))
@param search_pattern: list of strings representing the user request in web interface
@param xml_record: an xml string representing the record to format
@param user_info: the information of the user who will view the formatted page
@return: formatted record
"""
if search_pattern is None:
search_pattern = []
out = ""
ln = wash_language(ln)
_ = gettext_set_language(ln)
# Temporary workflow (during migration of formats):
# Call new BibFormat
# But if format not found for new BibFormat, then call old BibFormat
#Create a BibFormat Object to pass that contain record and context
bfo = BibFormatObject(recID, ln, search_pattern, xml_record, user_info, of)
if of.lower() != 'xm' and \
(not bfo.get_record() or len(bfo.get_record()) <= 1):
# Record only has recid: do not format, excepted
# for xm format
return ""
#Find out which format template to use based on record and output format.
template = decide_format_template(bfo, of)
if verbose == 9 and template is not None:
out += """\n<br/><span class="quicknote">
Using %s template for record %i.
</span>""" % (template, recID)
############### FIXME: REMOVE WHEN MIGRATION IS DONE ###############
path = "%s%s%s" % (cfg['CFG_BIBFORMAT_TEMPLATES_PATH'], os.sep, template)
if template is None or not (
os.access(path, os.R_OK) or
template.endswith("." + CFG_BIBFORMAT_FORMAT_JINJA_TEMPLATE_EXTENSION)):
# template not found in new BibFormat. Call old one
if verbose == 9:
if template is None:
out += """\n<br/><span class="quicknote">
No template found for output format %s and record %i.
(Check invenio.err log file for more details)
</span>""" % (of, recID)
else:
out += """\n<br/><span class="quicknote">
Template %s could not be read.
</span>""" % (template)
if CFG_PATH_PHP and os.path.isfile(os.path.join(CFG_BINDIR, 'bibformat')):
if verbose == 9:
out += """\n<br/><span class="quicknote">
Using old BibFormat for record %s.
</span>""" % recID
return out + call_old_bibformat(recID, of=of, on_the_fly=True,
verbose=verbose)
############################# END ##################################
try:
raise InvenioBibFormatError(_('No template could be found for output format %s.') % of)
except InvenioBibFormatError, exc:
register_exception(req=bfo.req)
if verbose > 5:
return out + str(exc.message)
return out
# Format with template
out_ = format_with_format_template(template, bfo, verbose, qid=qid)
out += out_
return out
def decide_format_template(bfo, of):
"""
Returns the format template name that should be used for formatting
given output format and L{BibFormatObject}.
Look at of rules, and take the first matching one.
If no rule matches, returns None
To match we ignore lettercase and spaces before and after value of
rule and value of record
@param bfo: a L{BibFormatObject}
@param of: the code of the output format to use
@return: name of a format template
"""
output_format = get_output_format(of)
for rule in output_format['rules']:
if rule['field'].startswith('00'):
# Rule uses controlfield
values = [bfo.control_field(rule['field']).strip()] #Remove spaces
else:
# Rule uses datafield
values = bfo.fields(rule['field'])
# loop over multiple occurences, but take the first match
if len(values) > 0:
for value in values:
value = value.strip() #Remove spaces
pattern = rule['value'].strip() #Remove spaces
match_obj = re.match(pattern, value, re.IGNORECASE)
if match_obj is not None and \
match_obj.end() == len(value):
return rule['template']
template = output_format['default']
if template != '':
return template
else:
return None
def format_with_format_template(format_template_filename, bfo,
verbose=0, format_template_code=None, qid=""):
""" Format a record given a
format template.
Returns a formatted version of the record represented by bfo,
in the language specified in bfo, and with the specified format template.
If format_template_code is provided, the template will not be loaded from
format_template_filename (but format_template_filename will still be used to
determine if bft or xsl transformation applies). This allows to preview format
code without having to save file on disk.
@param format_template_filename: the dilename of a format template
@param bfo: the object containing parameters for the current formatting
@param format_template_code: if not empty, use code as template instead of reading format_template_filename (used for previews)
@param verbose: the level of verbosity from 0 to 9 (O: silent,
5: errors,
7: errors and warnings,
9: errors and warnings, stop if error (debug mode ))
@return: formatted text
"""
_ = gettext_set_language(bfo.lang)
def translate(match):
"""
Translate matching values
"""
word = match.group("word")
translated_word = _(word)
return translated_word
if format_template_code is not None:
format_content = str(format_template_code)
elif not format_template_filename.endswith("." + CFG_BIBFORMAT_FORMAT_JINJA_TEMPLATE_EXTENSION):
format_content = get_format_template(format_template_filename)['code']
if format_template_filename is None or \
format_template_filename.endswith("." + CFG_BIBFORMAT_FORMAT_TEMPLATE_EXTENSION):
# .bft
filtered_format = filter_languages(format_content, bfo.lang)
localized_format = translation_pattern.sub(translate, filtered_format)
evaluated_format = eval_format_template_elements(localized_format,
bfo,
verbose)
elif format_template_filename.endswith("." + CFG_BIBFORMAT_FORMAT_JINJA_TEMPLATE_EXTENSION):
evaluated_format = '<!-- empty -->'
#try:
from functools import wraps
from invenio.bibfield import \
create_record as bibfield_create_record, \
get_record as bibfield_get_record
from invenio.search_engine import print_record
from flask.ext.login import current_user
from invenio.base.helpers import unicodifier
def _format_record(recid, of='hb', user_info=current_user, *args, **kwargs):
return print_record(recid, format=of, user_info=user_info, *args, **kwargs)
# Fixes unicode problems in Jinja2 templates.
def encode_utf8(f):
@wraps(f)
def wrapper(*args, **kwds):
return unicodifier(f(*args, **kwds))
return wrapper
if bfo.recID:
record = bibfield_get_record(bfo.recID)
else:
record = bibfield_create_record(bfo.xml_record, master_format='marc')
bfo.recID = bfo.recID if bfo.recID else 0
record.__getitem__ = encode_utf8(record.__getitem__)
record.get = encode_utf8(record.get)
evaluated_format = render_template_to_string(
'format/record/'+format_template_filename,
recid=bfo.recID,
record=record,
format_record=_format_record,
qid=qid,
bfo=bfo, **TEMPLATE_CONTEXT_FUNCTIONS_CACHE.functions).encode('utf-8')
#except Exception:
# register_exception()
else:
#.xsl
if bfo.xml_record:
# bfo was initialized with a custom MARCXML
xml_record = '<?xml version="1.0" encoding="UTF-8"?>\n' + \
record_xml_output(bfo.record)
else:
# Fetch MARCXML. On-the-fly xm if we are now formatting in xm
xml_record = '<?xml version="1.0" encoding="UTF-8"?>\n' + \
record_get_xml(bfo.recID, 'xm', on_the_fly=False)
# Transform MARCXML using stylesheet
evaluated_format = format(xml_record, template_source=format_content)
try:
evaluated_format = evaluated_format.decode('utf8').encode('utf8')
except:
try:
evaluated_format = evaluated_format.encode('utf8')
except:
evaluated_format = '<!-- Error -->'.encode('utf8')
return evaluated_format
def eval_format_template_elements(format_template, bfo, verbose=0):
"""
Evalutes the format elements of the given template and replace each element with its value.
Prepare the format template content so that we can directly replace the marc code by their value.
This implies:
1. Look for special tags
2. replace special tags by their evaluation
@param format_template: the format template code
@param bfo: the object containing parameters for the current formatting
@param verbose: the level of verbosity from 0 to 9 (O: silent,
5: errors, 7: errors and warnings,
9: errors and warnings, stop if error (debug mode ))
@return: tuple (result, errors)
"""
_ = gettext_set_language(bfo.lang)
# First define insert_element_code(match), used in re.sub() function
def insert_element_code(match):
"""
Analyses 'match', interpret the corresponding code, and return the result of the evaluation.
Called by substitution in 'eval_format_template_elements(...)'
@param match: a match object corresponding to the special tag that must be interpreted
"""
function_name = match.group("function_name")
try:
format_element = get_format_element(function_name, verbose)
except Exception, e:
format_element = None
if verbose >= 5:
return '<b><span style="color: rgb(255, 0, 0);">' + \
cgi.escape(str(e)).replace('\n', '<br/>') + \
'</span>'
if format_element is None:
try:
raise InvenioBibFormatError(_('Could not find format element named %s.') % function_name)
except InvenioBibFormatError, exc:
register_exception(req=bfo.req)
if verbose >= 5:
return '<b><span style="color: rgb(255, 0, 0);">' + \
str(exc.message)+'</span></b>'
else:
params = {}
# Look for function parameters given in format template code
all_params = match.group('params')
if all_params is not None:
function_params_iterator = pattern_function_params.finditer(all_params)
for param_match in function_params_iterator:
name = param_match.group('param')
value = param_match.group('value')
params[name] = value
# Evaluate element with params and return (Do not return errors)
(result, dummy) = eval_format_element(format_element,
bfo,
params,
verbose)
return result
# Substitute special tags in the format by our own text.
# Special tags have the form <BNE_format_element_name [param="value"]* />
format = pattern_tag.sub(insert_element_code, format_template)
return format
def eval_format_element(format_element, bfo, parameters=None, verbose=0):
"""
Returns the result of the evaluation of the given format element
name, with given L{BibFormatObject} and parameters. Also returns
the errors of the evaluation.
@param format_element: a format element structure as returned by get_format_element
@param bfo: a L{BibFormatObject} used for formatting
@param parameters: a dict of parameters to be used for formatting. Key is parameter and value is value of parameter
@param verbose: the level of verbosity from 0 to 9 (O: silent,
5: errors,
7: errors and warnings,
9: errors and warnings, stop if error (debug mode ))
@return: tuple (result, errors)
"""
if parameters is None:
parameters = {}
errors = []
#Load special values given as parameters
prefix = parameters.get('prefix', "")
suffix = parameters.get('suffix', "")
default_value = parameters.get('default', "")
escape = parameters.get('escape', "")
output_text = ''
_ = gettext_set_language(bfo.lang)
# 3 possible cases:
# a) format element file is found: we execute it
# b) format element file is not found, but exist in tag table (e.g. bfe_isbn)
# c) format element is totally unknown. Do nothing or report error
if format_element is not None and format_element['type'] == "python":
# a) We found an element with the tag name, of type "python"
# Prepare a dict 'params' to pass as parameter to 'format'
# function of element
params = {}
# Look for parameters defined in format element
# Fill them with specified default values and values
# given as parameters.
# Also remember if the element overrides the 'escape'
# parameter
format_element_overrides_escape = False
for param in format_element['attrs']['params']:
name = param['name']
default = param['default']
params[name] = parameters.get(name, default)
if name == 'escape':
format_element_overrides_escape = True
# Add BibFormatObject
params['bfo'] = bfo
# Execute function with given parameters and return result.
function = format_element['code']
_ = gettext_set_language(bfo.lang)
try:
output_text = apply(function, (), params)
except Exception, e:
name = format_element['attrs']['name']
try:
raise InvenioBibFormatError(_('Error when evaluating format element %s with parameters %s.') % (name, str(params)))
except InvenioBibFormatError, exc:
register_exception(req=bfo.req)
errors.append(exc.message)
if verbose >= 5:
tb = sys.exc_info()[2]
stack = traceback.format_exception(Exception, e, tb, limit=None)
output_text = '<b><span style="color: rgb(255, 0, 0);">'+ \
str(exc.message) + "".join(stack) +'</span></b> '
# None can be returned when evaluating function
if output_text is None:
output_text = ""
else:
output_text = str(output_text)
# Escaping:
# (1) By default, everything is escaped in mode 1
# (2) If evaluated element has 'escape_values()' function, use
# its returned value as escape mode, and override (1)
# (3) If template has a defined parameter 'escape' (in allowed
# values), use it, and override (1) and (2). If this
# 'escape' parameter is overriden by the format element
# (defined in the 'format' function of the element), leave
# the escaping job to this element
# (1)
escape_mode = 1
# (2)
escape_function = format_element['escape_function']
if escape_function is not None:
try:
escape_mode = apply(escape_function, (), {'bfo': bfo})
except Exception, e:
try:
raise InvenioBibFormatError(_('Escape mode for format element %s could not be retrieved. Using default mode instead.') % name)
except InvenioBibFormatError, exc:
register_exception(req=bfo.req)
errors.append(exc.message)
if verbose >= 5:
tb = sys.exc_info()[2]
output_text += '<b><span style="color: rgb(255, 0, 0);">'+ \
str(exc.message) +'</span></b> '
# (3)
if escape in ['0', '1', '2', '3', '4', '5', '6', '7', '8', '9']:
escape_mode = int(escape)
# If escape is equal to 1, then escape all
# HTML reserved chars.
if escape_mode > 0 and not format_element_overrides_escape:
output_text = escape_field(output_text, mode=escape_mode)
# Add prefix and suffix if they have been given as parameters and if
# the evaluation of element is not empty
if output_text.strip() != "":
output_text = prefix + output_text + suffix
# Add the default value if output_text is empty
if output_text == "":
output_text = default_value
return (output_text, errors)
elif format_element is not None and format_element['type'] == "field":
# b) We have not found an element in files that has the tag
# name. Then look for it in the table "tag"
#
# <BFE_LABEL_IN_TAG prefix = "" suffix = "" separator = ""
# nbMax="" escape="0"/>
#
# Load special values given as parameters
separator = parameters.get('separator ', "")
nbMax = parameters.get('nbMax', "")
escape = parameters.get('escape', "1") # By default, escape here
# Get the fields tags that have to be printed
tags = format_element['attrs']['tags']
output_text = []
# Get values corresponding to tags
for tag in tags:
p_tag = parse_tag(tag)
values = record_get_field_values(bfo.get_record(),
p_tag[0],
p_tag[1],
p_tag[2],
p_tag[3])
if len(values)>0 and isinstance(values[0], dict):
#flatten dict to its values only
values_list = map(lambda x: x.values(), values)
#output_text.extend(values)
for values in values_list:
output_text.extend(values)
else:
output_text.extend(values)
if nbMax != "":
try:
nbMax = int(nbMax)
output_text = output_text[:nbMax]
except:
name = format_element['attrs']['name']
try:
raise InvenioBibFormatError(_('"nbMax" parameter for %s must be an "int".') % name)
except InvenioBibFormatError, exc:
register_exception(req=bfo.req)
errors.append(exc.message)
if verbose >= 5:
output_text = output_text.append(exc.message)
# Add prefix and suffix if they have been given as parameters and if
# the evaluation of element is not empty.
# If evaluation is empty string, return default value if it exists.
# Else return empty string
if ("".join(output_text)).strip() != "":
# If escape is equal to 1, then escape all
# HTML reserved chars.
if escape == '1':
output_text = cgi.escape(separator.join(output_text))
else:
output_text = separator.join(output_text)
output_text = prefix + output_text + suffix
else:
#Return default value
output_text = default_value
return (output_text, errors)
else:
# c) Element is unknown
try:
raise InvenioBibFormatError(_('Could not find format element named %s.') % format_element)
except InvenioBibFormatError, exc:
register_exception(req=bfo.req)
errors.append(exc.message)
if verbose < 5:
return ("", errors)
elif verbose >= 5:
if verbose >= 9:
sys.exit(exc.message)
return ('<b><span style="color: rgb(255, 0, 0);">' + \
str(exc.message)+'</span></b>', errors)
def filter_languages(format_template, ln='en'):
"""
Filters the language tags that do not correspond to the specified language.
@param format_template: the format template code
@param ln: the language that is NOT filtered out from the template
@return: the format template with unnecessary languages filtered out
"""
# First define search_lang_tag(match) and clean_language_tag(match), used
# in re.sub() function
def search_lang_tag(match):
"""
Searches for the <lang>...</lang> tag and remove inner localized tags
such as <en>, <fr>, that are not current_lang.
If current_lang cannot be found inside <lang> ... </lang>, try to use 'CFG_SITE_LANG'
@param match: a match object corresponding to the special tag that must be interpreted
"""
current_lang = ln
def clean_language_tag(match):
"""
Return tag text content if tag language of match is output language.
Called by substitution in 'filter_languages(...)'
@param match: a match object corresponding to the special tag that must be interpreted
"""
if match.group(1) == current_lang:
return match.group(2)
else:
return ""
# End of clean_language_tag
lang_tag_content = match.group("langs")
# Try to find tag with current lang. If it does not exists,
# then current_lang becomes CFG_SITE_LANG until the end of this
# replace
pattern_current_lang = re.compile(r"<("+current_lang+ \
r")\s*>(.*?)(</"+current_lang+r"\s*>)", re.IGNORECASE | re.DOTALL)
if re.search(pattern_current_lang, lang_tag_content) is None:
current_lang = CFG_SITE_LANG
cleaned_lang_tag = ln_pattern.sub(clean_language_tag, lang_tag_content)
return cleaned_lang_tag
# End of search_lang_tag
filtered_format_template = pattern_lang.sub(search_lang_tag, format_template)
return filtered_format_template
def get_format_template(filename, with_attributes=False):
"""
Returns the structured content of the given formate template.
if 'with_attributes' is true, returns the name and description. Else 'attrs' is not
returned as key in dictionary (it might, if it has already been loaded previously)::
{'code':"<b>Some template code</b>"
'attrs': {'name': "a name", 'description': "a description"}
}
@param filename: the filename of an format template
@param with_attributes: if True, fetch the attributes (names and description) for format'
@return: strucured content of format template
"""
_ = gettext_set_language(CFG_SITE_LANG)
# Get from cache whenever possible
global format_templates_cache
if not filename.endswith("."+CFG_BIBFORMAT_FORMAT_TEMPLATE_EXTENSION) and \
not filename.endswith(".xsl"):
return None
if format_templates_cache.has_key(filename):
# If we must return with attributes and template exist in
# cache with attributes then return cache.
# Else reload with attributes
if with_attributes and \
format_templates_cache[filename].has_key('attrs'):
return format_templates_cache[filename]
format_template = {'code':""}
try:
path = "%s%s%s" % (cfg['CFG_BIBFORMAT_TEMPLATES_PATH'], os.sep, filename)
format_file = open(path)
format_content = format_file.read()
format_file.close()
# Load format template code
# Remove name and description
if filename.endswith("."+CFG_BIBFORMAT_FORMAT_TEMPLATE_EXTENSION):
code_and_description = pattern_format_template_name.sub("",
format_content, 1)
code = pattern_format_template_desc.sub("", code_and_description, 1)
else:
code = format_content
format_template['code'] = code
except Exception, e:
try:
raise InvenioBibFormatError(_('Could not read format template named %s. %s.') % (filename, str(e)))
except InvenioBibFormatError, exc:
register_exception()
# Save attributes if necessary
if with_attributes:
format_template['attrs'] = get_format_template_attrs(filename)
# Cache and return
format_templates_cache[filename] = format_template
return format_template
def get_format_templates(with_attributes=False):
"""
Returns the list of all format templates, as dictionary with filenames as keys
if 'with_attributes' is true, returns the name and description. Else 'attrs' is not
returned as key in each dictionary (it might, if it has already been loaded previously)::
[{'code':"<b>Some template code</b>"
'attrs': {'name': "a name", 'description': "a description"}
},
...
}
@param with_attributes: if True, fetch the attributes (names and description) for formats
@return: the list of format templates (with code and info)
"""
format_templates = {}
files = os.listdir(cfg['CFG_BIBFORMAT_TEMPLATES_PATH'])
for filename in files:
if filename.endswith("."+CFG_BIBFORMAT_FORMAT_TEMPLATE_EXTENSION) or \
filename.endswith(".xsl"):
format_templates[filename] = get_format_template(filename,
with_attributes)
return format_templates
def get_format_template_attrs(filename):
"""
Returns the attributes of the format template with given filename
The attributes are {'name', 'description'}
Caution: the function does not check that path exists or
that the format element is valid.
@param filename: the name of a format template
@return: a structure with detailed information about given format template
"""
_ = gettext_set_language(CFG_SITE_LANG)
attrs = {}
attrs['name'] = ""
attrs['description'] = ""
try:
template_file = open("%s%s%s" % (cfg['CFG_BIBFORMAT_TEMPLATES_PATH'],
os.sep,
filename))
code = template_file.read()
template_file.close()
match = None
if filename.endswith(".xsl"):
# .xsl
attrs['name'] = filename[:-4]
else:
# .bft
match = pattern_format_template_name.search(code)
if match is not None:
attrs['name'] = match.group('name')
else:
attrs['name'] = filename
match = pattern_format_template_desc.search(code)
if match is not None:
attrs['description'] = match.group('desc').rstrip('.')
except Exception, e:
try:
raise InvenioBibFormatError(_('Could not read format template named %s. %s.') % (filename, str(e)))
except InvenioBibFormatError, exc:
register_exception()
attrs['name'] = filename
return attrs
def get_format_element(element_name, verbose=0, with_built_in_params=False):
"""
Returns the format element structured content.
Return None if element cannot be loaded (file not found, not readable or
invalid)
The returned structure is::
{'attrs': {some attributes in dict. See get_format_element_attrs_from_*}
'code': the_function_code,
'type':"field" or "python" depending if element is defined in file or table,
'escape_function': the function to call to know if element output must be escaped}
@param element_name: the name of the format element to load
@param verbose: the level of verbosity from 0 to 9 (O: silent,
5: errors,
7: errors and warnings,
9: errors and warnings, stop if error (debug mode ))
@param with_built_in_params: if True, load the parameters built in all elements
@return: a dictionary with format element attributes
"""
_ = gettext_set_language(CFG_SITE_LANG)
# Get from cache whenever possible
global format_elements_cache
errors = []
# Resolve filename and prepare 'name' as key for the cache
filename = resolve_format_element_filename(element_name)
if filename is not None:
name = filename.upper()
else:
name = element_name.upper()
if format_elements_cache.has_key(name):
element = format_elements_cache[name]
if not with_built_in_params or \
(with_built_in_params and \
element['attrs'].has_key('builtin_params')):
return element
if filename is None:
# Element is maybe in tag table
if bibformat_dblayer.tag_exists_for_name(element_name):
format_element = {'attrs': get_format_element_attrs_from_table( \
element_name,
with_built_in_params),
'code':None,
'escape_function':None,
'type':"field"}
# Cache and returns
format_elements_cache[name] = format_element
return format_element
else:
try:
raise InvenioBibFormatError(_('Format element %s could not be found.') % element_name)
except InvenioBibFormatError, exc:
register_exception()
if verbose >= 5:
sys.stderr.write(exc.message)
return None
else:
format_element = {}
module_name = filename
if module_name.endswith(".py"):
module_name = module_name[:-3]
# Load function 'format_element()' inside element
try:
packages = cfg['CFG_BIBFORMAT_ELEMENTS_IMPORT_PATH']
packages = [packages] if packages is not None else None
function_format = TEMPLATE_CONTEXT_FUNCTIONS_CACHE.\
bibformat_elements(packages)[module_name]
format_element['code'] = function_format
except KeyError:
try:
raise InvenioBibFormatError(_('Format element %s has no function named "format".') % element_name)
except InvenioBibFormatError, exc:
register_exception()
errors.append(exc.message)
if verbose >= 5:
sys.stderr.write(exc.message)
if errors:
if verbose >= 7:
raise Exception, exc.message
return None
# Load function 'escape_values()' inside element
format_element['escape_function'] = function_format._escape_values
# Prepare, cache and return
format_element['attrs'] = get_format_element_attrs_from_function( \
function_format,
element_name,
with_built_in_params)
format_element['type'] = "python"
format_elements_cache[name] = format_element
return format_element
def get_format_elements(with_built_in_params=False):
"""
Returns the list of format elements attributes as dictionary structure
Elements declared in files have priority over element declared in 'tag' table
The returned object has this format::
{element_name1: {'attrs': {'description':..., 'seealso':...
'params':[{'name':..., 'default':..., 'description':...}, ...]
'builtin_params':[{'name':..., 'default':..., 'description':...}, ...]
},
'code': code_of_the_element
},
element_name2: {...},
...}
Returns only elements that could be loaded (not error in code)
@return: a dict of format elements with name as key, and a dict as attributes
@param with_built_in_params: if True, load the parameters built in all elements
"""
format_elements = {}
mappings = bibformat_dblayer.get_all_name_tag_mappings()
for name in mappings:
format_elements[name.upper().replace(" ", "_").strip()] = get_format_element(name, with_built_in_params=with_built_in_params)
files = os.listdir(cfg['CFG_BIBFORMAT_ELEMENTS_PATH'])
for filename in files:
filename_test = filename.upper().replace(" ", "_")
if filename_test.endswith(".PY") and filename.upper() != "__INIT__.PY":
if filename_test.startswith("BFE_"):
filename_test = filename_test[4:]
element_name = filename_test[:-3]
element = get_format_element(element_name,
with_built_in_params=with_built_in_params)
if element is not None:
format_elements[element_name] = element
return format_elements
def get_format_element_attrs_from_function(function, element_name,
with_built_in_params=False):
"""
Returns the attributes of the function given as parameter.
It looks for standard parameters of the function, default
values and comments in the docstring.
The attributes are::
{'name' : "name of element" #basically the name of 'name' parameter
'description': "a string description of the element",
'seealso' : ["element_1.py", "element_2.py", ...] #a list of related elements
'params': [{'name':"param_name", #a list of parameters for this element (except 'bfo')
'default':"default value",
'description': "a description"}, ...],
'builtin_params': {name: {'name':"param_name",#the parameters builtin for all elem of this kind
'default':"default value",
'description': "a description"}, ...},
}
@param function: the formatting function of a format element
@param element_name: the name of the element
@param with_built_in_params: if True, load the parameters built in all elements
@return: a structure with detailed information of a function
"""
attrs = {}
attrs['description'] = ""
attrs['name'] = element_name.replace(" ", "_").upper()
attrs['seealso'] = []
docstring = function.__doc__
if isinstance(docstring, str):
# Look for function description in docstring
#match = pattern_format_element_desc.search(docstring)
description = docstring.split("@param")[0]
description = description.split("@see:")[0]
attrs['description'] = description.strip().rstrip('.')
# Look for @see: in docstring
match = pattern_format_element_seealso.search(docstring)
if match is not None:
elements = match.group('see').rstrip('.').split(",")
for element in elements:
attrs['seealso'].append(element.strip())
params = {}
# Look for parameters in function definition
(args, varargs, varkw, defaults) = inspect.getargspec(function)
# Prepare args and defaults_list such that we can have a mapping
# from args to defaults
args.reverse()
if defaults is not None:
defaults_list = list(defaults)
defaults_list.reverse()
else:
defaults_list = []
for arg, default in map(None, args, defaults_list):
if arg == "bfo":
#Don't keep this as parameter. It is hidden to users, and
#exists in all elements of this kind
continue
param = {}
param['name'] = arg
if default is None:
#In case no check is made inside element, we prefer to
#print "" (nothing) than None in output
param['default'] = ""
else:
param['default'] = default
param['description'] = "(no description provided)"
params[arg] = param
if isinstance(docstring, str):
# Look for AT param descriptions in docstring.
# Add description to existing parameters in params dict
params_iterator = pattern_format_element_params.finditer(docstring)
for match in params_iterator:
name = match.group('name')
if params.has_key(name):
params[name]['description'] = match.group('desc').rstrip('.')
attrs['params'] = params.values()
# Load built-in parameters if necessary
if with_built_in_params:
builtin_params = []
# Add 'prefix' parameter
param_prefix = {}
param_prefix['name'] = "prefix"
param_prefix['default'] = ""
param_prefix['description'] = """A prefix printed only if the
record has a value for this element"""
builtin_params.append(param_prefix)
# Add 'suffix' parameter
param_suffix = {}
param_suffix['name'] = "suffix"
param_suffix['default'] = ""
param_suffix['description'] = """A suffix printed only if the
record has a value for this element"""
builtin_params.append(param_suffix)
# Add 'default' parameter
param_default = {}
param_default['name'] = "default"
param_default['default'] = ""
param_default['description'] = """A default value printed if the
record has no value for this element"""
builtin_params.append(param_default)
# Add 'escape' parameter
param_escape = {}
param_escape['name'] = "escape"
param_escape['default'] = ""
param_escape['description'] = """0 keeps value as it is. Refer to main
documentation for escaping modes
1 to 7"""
builtin_params.append(param_escape)
attrs['builtin_params'] = builtin_params
return attrs
def get_format_element_attrs_from_table(element_name,
with_built_in_params=False):
"""
Returns the attributes of the format element with given name in 'tag' table.
Returns None if element_name does not exist in tag table.
The attributes are::
{'name' : "name of element" #basically the name of 'element_name' parameter
'description': "a string description of the element",
'seealso' : [] #a list of related elements. Always empty in this case
'params': [], #a list of parameters for this element. Always empty in this case
'builtin_params': [{'name':"param_name", #the parameters builtin for all elem of this kind
'default':"default value",
'description': "a description"}, ...],
'tags':["950.1", 203.a] #the list of tags printed by this element
}
@param element_name: an element name in database
@param element_name: the name of the element
@param with_built_in_params: if True, load the parameters built in all elements
@return: a structure with detailed information of an element found in DB
"""
attrs = {}
tags = bibformat_dblayer.get_tags_from_name(element_name)
field_label = "field"
if len(tags)>1:
field_label = "fields"
attrs['description'] = "Prints %s %s of the record" % (field_label,
", ".join(tags))
attrs['name'] = element_name.replace(" ", "_").upper()
attrs['seealso'] = []
attrs['params'] = []
attrs['tags'] = tags
# Load built-in parameters if necessary
if with_built_in_params:
builtin_params = []
# Add 'prefix' parameter
param_prefix = {}
param_prefix['name'] = "prefix"
param_prefix['default'] = ""
param_prefix['description'] = """A prefix printed only if the
record has a value for this element"""
builtin_params.append(param_prefix)
# Add 'suffix' parameter
param_suffix = {}
param_suffix['name'] = "suffix"
param_suffix['default'] = ""
param_suffix['description'] = """A suffix printed only if the
record has a value for this element"""
builtin_params.append(param_suffix)
# Add 'separator' parameter
param_separator = {}
param_separator['name'] = "separator"
param_separator['default'] = " "
param_separator['description'] = """A separator between elements of
the field"""
builtin_params.append(param_separator)
# Add 'nbMax' parameter
param_nbMax = {}
param_nbMax['name'] = "nbMax"
param_nbMax['default'] = ""
param_nbMax['description'] = """The maximum number of values to
print for this element. No limit if not
specified"""
builtin_params.append(param_nbMax)
# Add 'default' parameter
param_default = {}
param_default['name'] = "default"
param_default['default'] = ""
param_default['description'] = """A default value printed if the
record has no value for this element"""
builtin_params.append(param_default)
# Add 'escape' parameter
param_escape = {}
param_escape['name'] = "escape"
param_escape['default'] = ""
param_escape['description'] = """If set to 1, replaces special
characters '&', '<' and '>' of this
element by SGML entities"""
builtin_params.append(param_escape)
attrs['builtin_params'] = builtin_params
return attrs
def get_output_format(code, with_attributes=False, verbose=0):
"""
Returns the structured content of the given output format
If 'with_attributes' is true, also returns the names and description of the output formats,
else 'attrs' is not returned in dict (it might, if it has already been loaded previously).
if output format corresponding to 'code' is not found return an empty structure.
See get_output_format_attrs() to learn more about the attributes::
{'rules': [ {'field': "980__a",
'value': "PREPRINT",
'template': "filename_a.bft",
},
{...}
],
'attrs': {'names': {'generic':"a name", 'sn':{'en': "a name", 'fr':"un nom"}, 'ln':{'en':"a long name"}}
'description': "a description"
'code': "fnm1",
'content_type': "application/ms-excel",
'visibility': 1
}
'default':"filename_b.bft"
}
@param code: the code of an output_format
@param with_attributes: if True, fetch the attributes (names and description) for format
@param verbose: the level of verbosity from 0 to 9 (O: silent,
5: errors,
7: errors and warnings,
9: errors and warnings, stop if error (debug mode ))
@return: strucured content of output format
"""
_ = gettext_set_language(CFG_SITE_LANG)
output_format = {'rules':[], 'default':""}
filename = resolve_output_format_filename(code, verbose)
if filename is None:
try:
raise InvenioBibFormatError(_('Output format with code %s could not be found.') % code)
except InvenioBibFormatError, exc:
register_exception()
if with_attributes: #Create empty attrs if asked for attributes
output_format['attrs'] = get_output_format_attrs(code, verbose)
return output_format
# Get from cache whenever possible
global format_outputs_cache
if format_outputs_cache.has_key(filename):
# If was must return with attributes but cache has not
# attributes, then load attributes
if with_attributes and not \
format_outputs_cache[filename].has_key('attrs'):
format_outputs_cache[filename]['attrs'] = get_output_format_attrs(code, verbose)
return format_outputs_cache[filename]
try:
if with_attributes:
output_format['attrs'] = get_output_format_attrs(code, verbose)
path = "%s%s%s" % (CFG_BIBFORMAT_OUTPUTS_PATH, os.sep, filename )
format_file = open(path)
current_tag = ''
for line in format_file:
line = line.strip()
if line == "":
# Ignore blank lines
continue
if line.endswith(":"):
# Retrieve tag
# Remove : spaces and eol at the end of line
clean_line = line.rstrip(": \n\r")
# The tag starts at second position
current_tag = "".join(clean_line.split()[1:]).strip()
elif line.find('---') != -1:
words = line.split('---')
template = words[-1].strip()
condition = ''.join(words[:-1])
value = ""
output_format['rules'].append({'field': current_tag,
'value': condition,
'template': template,
})
elif line.find(':') != -1:
# Default case
default = line.split(':')[1].strip()
output_format['default'] = default
except Exception, e:
try:
raise InvenioBibFormatError(_('Output format %s cannot not be read. %s.') % (filename, str(e)))
except InvenioBibFormatError, exc:
register_exception()
# Cache and return
format_outputs_cache[filename] = output_format
return output_format
def get_output_format_attrs(code, verbose=0):
"""
Returns the attributes of an output format.
The attributes contain 'code', which is the short identifier of the output format
(to be given as parameter in format_record function to specify the output format),
'description', a description of the output format, 'visibility' the visibility of
the format in the output format list on public pages and 'names', the localized names
of the output format. If 'content_type' is specified then the search_engine will
send a file with this content type and with result of formatting as content to the user.
The 'names' dict always contais 'generic', 'ln' (for long name) and 'sn' (for short names)
keys. 'generic' is the default name for output format. 'ln' and 'sn' contain long and short
localized names of the output format. Only the languages for which a localization exist
are used::
{'names': {'generic':"a name", 'sn':{'en': "a name", 'fr':"un nom"}, 'ln':{'en':"a long name"}}
'description': "a description"
'code': "fnm1",
'content_type': "application/ms-excel",
'visibility': 1
}
@param code: the short identifier of the format
@param verbose: the level of verbosity from 0 to 9 (O: silent,
5: errors,
7: errors and warnings,
9: errors and warnings, stop if error (debug mode ))
@return: strucured content of output format attributes
"""
if code.endswith("."+CFG_BIBFORMAT_FORMAT_OUTPUT_EXTENSION):
code = code[:-(len(CFG_BIBFORMAT_FORMAT_OUTPUT_EXTENSION) + 1)]
attrs = {'names':{'generic':"",
'ln':{},
'sn':{}},
'description':'',
'code':code.upper(),
'content_type':"",
'visibility':1}
filename = resolve_output_format_filename(code, verbose)
if filename is None:
return attrs
attrs['names'] = bibformat_dblayer.get_output_format_names(code)
attrs['description'] = bibformat_dblayer.get_output_format_description(code)
attrs['content_type'] = bibformat_dblayer.get_output_format_content_type(code)
attrs['visibility'] = bibformat_dblayer.get_output_format_visibility(code)
return attrs
def get_output_formats(with_attributes=False):
"""
Returns the list of all output format, as a dictionary with their filename as key
If 'with_attributes' is true, also returns the names and description of the output formats,
else 'attrs' is not returned in dicts (it might, if it has already been loaded previously).
See get_output_format_attrs() to learn more on the attributes::
{'filename_1.bfo': {'rules': [ {'field': "980__a",
'value': "PREPRINT",
'template': "filename_a.bft",
},
{...}
],
'attrs': {'names': {'generic':"a name", 'sn':{'en': "a name", 'fr':"un nom"}, 'ln':{'en':"a long name"}}
'description': "a description"
'code': "fnm1"
}
'default':"filename_b.bft"
},
'filename_2.bfo': {...},
...
}
@param with_attributes: if returned output formats contain detailed info, or not
@type with_attributes: boolean
@return: the list of output formats
"""
output_formats = {}
files = os.listdir(CFG_BIBFORMAT_OUTPUTS_PATH)
for filename in files:
if filename.endswith("."+CFG_BIBFORMAT_FORMAT_OUTPUT_EXTENSION):
code = "".join(filename.split(".")[:-1])
output_formats[filename] = get_output_format(code, with_attributes)
return output_formats
def resolve_format_element_filename(element_name):
"""
Returns the filename of element corresponding to x{element_name}
This is necessary since format templates code call
elements by ignoring case, for eg. <BFE_AUTHOR> is the
same as <BFE_author>.
It is also recommended that format elements filenames are
prefixed with bfe_ . We need to look for these too.
The name of the element has to start with "BFE_".
@param element_name: a name for a format element
@return: the corresponding filename, with right case
"""
if not element_name.endswith(".py"):
name = element_name.replace(" ", "_").upper() +".PY"
else:
name = element_name.replace(" ", "_").upper()
files = os.listdir(cfg['CFG_BIBFORMAT_ELEMENTS_PATH'])
for filename in files:
test_filename = filename.replace(" ", "_").upper()
if test_filename == name or \
test_filename == "BFE_" + name or \
"BFE_" + test_filename == name:
return filename
# No element with that name found
# Do not log error, as it might be a normal execution case:
# element can be in database
return None
def resolve_output_format_filename(code, verbose=0):
"""
Returns the filename of output corresponding to code
This is necessary since output formats names are not case sensitive
but most file systems are.
@param code: the code for an output format
@param verbose: the level of verbosity from 0 to 9 (O: silent,
5: errors,
7: errors and warnings,
9: errors and warnings, stop if error (debug mode ))
@return: the corresponding filename, with right case, or None if not found
"""
_ = gettext_set_language(CFG_SITE_LANG)
#Remove non alphanumeric chars (except . and _)
code = re.sub(r"[^.0-9a-zA-Z_]", "", code)
if not code.endswith("."+CFG_BIBFORMAT_FORMAT_OUTPUT_EXTENSION):
code = re.sub(r"\W", "", code)
code += "."+CFG_BIBFORMAT_FORMAT_OUTPUT_EXTENSION
files = os.listdir(CFG_BIBFORMAT_OUTPUTS_PATH)
for filename in files:
if filename.upper() == code.upper():
return filename
# No output format with that name found
try:
raise InvenioBibFormatError(_('Could not find output format named %s.') % code)
except InvenioBibFormatError, exc:
register_exception()
if verbose >= 5:
sys.stderr.write(exc.message)
if verbose >= 9:
sys.exit(exc.message)
return None
def get_fresh_format_template_filename(name):
"""
Returns a new filename and name for template with given name.
Used when writing a new template to a file, so that the name
has no space, is unique in template directory
Returns (unique_filename, modified_name)
@param name: name for a format template
@return: the corresponding filename, and modified name if necessary
"""
#name = re.sub(r"\W", "", name) #Remove non alphanumeric chars
name = name.replace(" ", "_")
filename = name
# Remove non alphanumeric chars (except .)
filename = re.sub(r"[^.0-9a-zA-Z]", "", filename)
path = cfg['CFG_BIBFORMAT_TEMPLATES_PATH'] + os.sep + filename \
+ "." + CFG_BIBFORMAT_FORMAT_TEMPLATE_EXTENSION
index = 1
while os.path.exists(path):
index += 1
filename = name + str(index)
path = cfg['CFG_BIBFORMAT_TEMPLATES_PATH'] + os.sep + filename \
+ "." + CFG_BIBFORMAT_FORMAT_TEMPLATE_EXTENSION
if index > 1:
returned_name = (name + str(index)).replace("_", " ")
else:
returned_name = name.replace("_", " ")
return (filename + "." + CFG_BIBFORMAT_FORMAT_TEMPLATE_EXTENSION,
returned_name) #filename.replace("_", " "))
def get_fresh_output_format_filename(code):
"""
Returns a new filename for output format with given code.
Used when writing a new output format to a file, so that the code
has no space, is unique in output format directory. The filename
also need to be at most 6 chars long, as the convention is that
filename == output format code (+ .extension)
We return an uppercase code
Returns (unique_filename, modified_code)
@param code: the code of an output format
@return: the corresponding filename, and modified code if necessary
"""
_ = gettext_set_language(CFG_SITE_LANG)
#code = re.sub(r"\W", "", code) #Remove non alphanumeric chars
code = code.upper().replace(" ", "_")
# Remove non alphanumeric chars (except . and _)
code = re.sub(r"[^.0-9a-zA-Z_]", "", code)
if len(code) > 6:
code = code[:6]
filename = code
path = CFG_BIBFORMAT_OUTPUTS_PATH + os.sep + filename \
+ "." + CFG_BIBFORMAT_FORMAT_OUTPUT_EXTENSION
index = 2
while os.path.exists(path):
filename = code + str(index)
if len(filename) > 6:
filename = code[:-(len(str(index)))]+str(index)
index += 1
path = CFG_BIBFORMAT_OUTPUTS_PATH + os.sep + filename \
+ "." + CFG_BIBFORMAT_FORMAT_OUTPUT_EXTENSION
# We should not try more than 99999... Well I don't see how we
# could get there.. Sanity check.
if index >= 99999:
try:
raise InvenioBibFormatError(_('Could not find a fresh name for output format %s.') % code)
except InvenioBibFormatError, exc:
register_exception()
sys.exit("Output format cannot be named as %s"%code)
return (filename + "." + CFG_BIBFORMAT_FORMAT_OUTPUT_EXTENSION, filename)
def clear_caches():
"""
Clear the caches (Output Format, Format Templates and Format Elements)
@return: None
"""
global format_templates_cache, format_elements_cache, format_outputs_cache
format_templates_cache = {}
format_elements_cache = {}
format_outputs_cache = {}
class BibFormatObject:
"""
An object that encapsulates a record and associated methods, and that is given
as parameter to all format elements 'format' function.
The object is made specifically for a given formatting, i.e. it includes
for example the language for the formatting.
The object provides basic accessors to the record. For full access, one can get
the record with get_record() and then use BibRecord methods on the returned object.
"""
# The record
record = None
# The language in which the formatting has to be done
lang = CFG_SITE_LANG
# A list of string describing the context in which the record has
# to be formatted.
# It represents the words of the user request in web interface search
search_pattern = []
# The id of the record
recID = 0
# The information about the user, as returned by
# 'webuser.collect_user_info(req)'
user_info = None
# The format in which the record is being formatted
output_format = ''
req = None # DEPRECATED: use bfo.user_info instead. Used by WebJournal.
def __init__(self, recID, ln=CFG_SITE_LANG, search_pattern=None,
xml_record=None, user_info=None, output_format=''):
"""
Creates a new bibformat object, with given record.
You can either specify an record ID to format, or give its xml representation.
if 'xml_record' is not None, use 'xml_record' instead of recID for the record.
'user_info' allows to grant access to some functionalities on
a page depending on the user's priviledges. It is a dictionary
in the following form::
user_info = {
'remote_ip' : '',
'remote_host' : '',
'referer' : '',
'uri' : '',
'agent' : '',
'uid' : -1,
'nickname' : '',
'email' : '',
'group' : [],
'guest' : '1'
}
@param recID: the id of a record
@param ln: the language in which the record has to be formatted
@param search_pattern: list of string representing the request used by the user in web interface
@param xml_record: a xml string of the record to format
@param user_info: the information of the user who will view the formatted page
@param output_format: the output_format used for formatting this record
"""
self.xml_record = None # *Must* remain empty if recid is given
if xml_record is not None:
# If record is given as parameter
self.xml_record = xml_record
self.record = create_record(xml_record)[0]
recID = record_get_field_value(self.record, "001")
self.lang = wash_language(ln)
if search_pattern is None:
search_pattern = []
self.search_pattern = search_pattern
self.recID = recID
self.output_format = output_format
self.user_info = user_info
if self.user_info is None:
from invenio.ext.login.legacy_user import UserInfo
self.user_info = UserInfo(None)
def get_record(self):
"""
Returns the record structure of this L{BibFormatObject} instance
@return: the record structure as defined by BibRecord library
"""
from invenio.search_engine import get_record
# Create record if necessary
if self.record is None:
# on-the-fly creation if current output is xm
self.record = get_record(self.recID)
return self.record
def control_field(self, tag, escape=0):
"""
Returns the value of control field given by tag in record
@param tag: the marc code of a field
@param escape: 1 if returned value should be escaped. Else 0.
@return: value of field tag in record
"""
if self.get_record() is None:
#Case where BibRecord could not parse object
return ''
p_tag = parse_tag(tag)
field_value = record_get_field_value(self.get_record(),
p_tag[0],
p_tag[1],
p_tag[2],
p_tag[3])
if escape == 0:
return field_value
else:
return escape_field(field_value, escape)
def field(self, tag, escape=0):
"""
Returns the value of the field corresponding to tag in the
current record.
If the value does not exist, return empty string. Else
returns the same as bfo.fields(..)[0] (see docstring below).
'escape' parameter allows to escape special characters
of the field. The value of escape can be:
0. no escaping
1. escape all HTML characters
2. remove unsafe HTML tags (Eg. keep <br />)
3. Mix of mode 1 and 2. If value of field starts with
<!-- HTML -->, then use mode 2. Else use mode 1.
4. Remove all HTML tags
5. Same as 2, with more tags allowed (like <img>)
6. Same as 3, with more tags allowed (like <img>)
7. Mix of mode 0 and mode 1. If field_value
starts with <!--HTML-->, then use mode 0.
Else use mode 1.
8. Same as mode 1, but also escape double-quotes
9. Same as mode 4, but also escape double-quotes
@param tag: the marc code of a field
@param escape: 1 if returned value should be escaped. Else 0. (see above for other modes)
@return: value of field tag in record
"""
list_of_fields = self.fields(tag)
if len(list_of_fields) > 0:
# Escaping below
if escape == 0:
return list_of_fields[0]
else:
return escape_field(list_of_fields[0], escape)
else:
return ""
def fields(self, tag, escape=0, repeatable_subfields_p=False):
"""
Returns the list of values corresonding to "tag".
If tag has an undefined subcode (such as 999C5),
the function returns a list of dictionaries, whoose keys
are the subcodes and the values are the values of tag.subcode.
If the tag has a subcode, simply returns list of values
corresponding to tag.
Eg. for given MARC::
999C5 $a value_1a $b value_1b
999C5 $b value_2b
999C5 $b value_3b $b value_3b_bis
>>> bfo.fields('999C5b')
>>> ['value_1b', 'value_2b', 'value_3b', 'value_3b_bis']
>>> bfo.fields('999C5')
>>> [{'a':'value_1a', 'b':'value_1b'},
{'b':'value_2b'},
{'b':'value_3b'}]
By default the function returns only one value for each
subfield (that is it considers that repeatable subfields are
not allowed). It is why in the above example 'value3b_bis' is
not shown for bfo.fields('999C5'). (Note that it is not
defined which of value_3b or value_3b_bis is returned). This
is to simplify the use of the function, as most of the time
subfields are not repeatable (in that way we get a string
instead of a list). You can allow repeatable subfields by
setting 'repeatable_subfields_p' parameter to True. In
this mode, the above example would return:
>>> bfo.fields('999C5b', repeatable_subfields_p=True)
>>> ['value_1b', 'value_2b', 'value_3b']
>>> bfo.fields('999C5', repeatable_subfields_p=True)
>>> [{'a':['value_1a'], 'b':['value_1b']},
{'b':['value_2b']},
{'b':['value_3b', 'value3b_bis']}]
NOTICE THAT THE RETURNED STRUCTURE IS DIFFERENT. Also note
that whatever the value of 'repeatable_subfields_p' is,
bfo.fields('999C5b') always show all fields, even repeatable
ones. This is because the parameter has no impact on the
returned structure (it is always a list).
'escape' parameter allows to escape special characters
of the fields. The value of escape can be:
0. No escaping
1. Escape all HTML characters
2. Remove unsafe HTML tags (Eg. keep <br />)
3. Mix of mode 1 and 2. If value of field starts with
<!-- HTML -->, then use mode 2. Else use mode 1.
4. Remove all HTML tags
5. Same as 2, with more tags allowed (like <img>)
6. Same as 3, with more tags allowed (like <img>)
7. Mix of mode 0 and mode 1. If field_value
starts with <!--HTML-->, then use mode 0.
Else use mode 1.
8. Same as mode 1, but also escape double-quotes
9. Same as mode 4, but also escape double-quotes
@param tag: the marc code of a field
@param escape: 1 if returned values should be escaped. Else 0.
@repeatable_subfields_p if True, returns the list of subfields in the dictionary
@return: values of field tag in record
"""
if self.get_record() is None:
# Case where BibRecord could not parse object
return []
p_tag = parse_tag(tag)
if p_tag[3] != "":
# Subcode has been defined. Simply returns list of values
values = record_get_field_values(self.get_record(),
p_tag[0],
p_tag[1],
p_tag[2],
p_tag[3])
if escape == 0:
return values
else:
return [escape_field(value, escape) for value in values]
else:
# Subcode is undefined. Returns list of dicts.
# However it might be the case of a control field.
instances = record_get_field_instances(self.get_record(),
p_tag[0],
p_tag[1],
p_tag[2])
if repeatable_subfields_p:
list_of_instances = []
for instance in instances:
instance_dict = {}
for subfield in instance[0]:
if not instance_dict.has_key(subfield[0]):
instance_dict[subfield[0]] = []
if escape == 0:
instance_dict[subfield[0]].append(subfield[1])
else:
instance_dict[subfield[0]].append(escape_field(subfield[1], escape))
list_of_instances.append(instance_dict)
return list_of_instances
else:
if escape == 0:
return [dict(instance[0]) for instance in instances]
else:
return [dict([ (subfield[0], escape_field(subfield[1], escape)) \
for subfield in instance[0] ]) \
for instance in instances]
def kb(self, kb, string, default=""):
"""
Returns the value of the "string" in the knowledge base "kb".
If kb does not exist or string does not exist in kb,
returns 'default' string or empty string if not specified.
@param kb: a knowledge base name
@param string: the string we want to translate
@param default: a default value returned if 'string' not found in 'kb'
@return: a string value corresponding to translated input with given kb
"""
if not string:
return default
val = get_kbr_values(kb, searchkey=string, searchtype='e')
try:
return val[0][0]
except:
return default
def escape_field(value, mode=0):
"""
Utility function used to escape the value of a field in given mode.
- mode 0: no escaping
- mode 1: escaping all HTML/XML characters (escaped chars are shown as escaped)
- mode 2: escaping unsafe HTML tags to avoid XSS, but
keep basic one (such as <br />)
Escaped tags are removed.
- mode 3: mix of mode 1 and mode 2. If field_value starts with <!--HTML-->,
then use mode 2. Else use mode 1.
- mode 4: escaping all HTML/XML tags (escaped tags are removed)
- mode 5: same as 2, but allows more tags, like <img>
- mode 6: same as 3, but allows more tags, like <img>
- mode 7: mix of mode 0 and mode 1. If field_value starts with <!--HTML-->,
then use mode 0. Else use mode 1.
- mode 8: same as mode 1, but also escape double-quotes
- mode 9: same as mode 4, but also escape double-quotes
@param value: value to escape
@param mode: escaping mode to use
@return: an escaped version of X{value} according to chosen X{mode}
"""
if mode == 1:
return cgi.escape(value)
elif mode == 8:
return cgi.escape(value, True)
elif mode in [2, 5]:
allowed_attribute_whitelist = CFG_HTML_BUFFER_ALLOWED_ATTRIBUTE_WHITELIST
allowed_tag_whitelist = CFG_HTML_BUFFER_ALLOWED_TAG_WHITELIST + \
('class',)
if mode == 5:
allowed_attribute_whitelist += ('src', 'alt',
'width', 'height',
'style', 'summary',
'border', 'cellspacing',
'cellpadding')
allowed_tag_whitelist += ('img', 'table', 'td',
'tr', 'th', 'span', 'caption')
try:
return washer.wash(value,
allowed_attribute_whitelist=\
allowed_attribute_whitelist,
allowed_tag_whitelist= \
allowed_tag_whitelist
)
except HTMLParseError:
# Parsing failed
return cgi.escape(value)
elif mode in [3, 6]:
if value.lstrip(' \n').startswith(html_field):
allowed_attribute_whitelist = CFG_HTML_BUFFER_ALLOWED_ATTRIBUTE_WHITELIST
allowed_tag_whitelist = CFG_HTML_BUFFER_ALLOWED_TAG_WHITELIST + \
('class',)
if mode == 6:
allowed_attribute_whitelist += ('src', 'alt',
'width', 'height',
'style', 'summary',
'border', 'cellspacing',
'cellpadding')
allowed_tag_whitelist += ('img', 'table', 'td',
'tr', 'th', 'span', 'caption')
try:
return washer.wash(value,
allowed_attribute_whitelist=\
allowed_attribute_whitelist,
allowed_tag_whitelist=\
allowed_tag_whitelist
)
except HTMLParseError:
# Parsing failed
return cgi.escape(value)
else:
return cgi.escape(value)
elif mode in [4, 9]:
try:
out = washer.wash(value,
allowed_attribute_whitelist=[],
allowed_tag_whitelist=[]
)
if mode == 9:
out = out.replace('"', '&quot;')
return out
except HTMLParseError:
# Parsing failed
if mode == 4:
return cgi.escape(value)
else:
return cgi.escape(value, True)
elif mode == 7:
if value.lstrip(' \n').startswith(html_field):
return value
else:
return cgi.escape(value)
else:
return value
def bf_profile():
"""
Runs a benchmark
@return: None
"""
for i in range(1, 51):
format_record(i, "HD", ln=CFG_SITE_LANG, verbose=9, search_pattern=[])
return
if __name__ == "__main__":
import profile
import pstats
#bf_profile()
profile.run('bf_profile()', "bibformat_profile")
p = pstats.Stats("bibformat_profile")
p.strip_dirs().sort_stats("cumulative").print_stats()
diff --git a/invenio/modules/formatter/engines/bfx.py b/invenio/modules/formatter/engines/bfx.py
index 4ed29b260..4ae6a1b7c 100644
--- a/invenio/modules/formatter/engines/bfx.py
+++ b/invenio/modules/formatter/engines/bfx.py
@@ -1,1388 +1,1388 @@
## This file is part of Invenio.
## Copyright (C) 2006, 2007, 2008, 2009, 2010, 2011 CERN.
##
## Invenio is free software; you can redistribute it and/or
## modify it under the terms of the GNU General Public License as
## published by the Free Software Foundation; either version 2 of the
## License, or (at your option) any later version.
##
## Invenio is distributed in the hope that it will be useful, but
## WITHOUT ANY WARRANTY; without even the implied warranty of
## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
## General Public License for more details.
##
## You should have received a copy of the GNU General Public License
## along with Invenio; if not, write to the Free Software Foundation, Inc.,
## 59 Temple Place, Suite 330, Boston, MA 02111-1307, USA.
"""
BFX formatting engine.
For API: see format_with_bfx() docstring below.
"""
__revision__ = "$Id$"
import re
import logging
import copy as p_copy
from xml.dom import minidom, Node
from xml.sax import saxutils
from invenio.modules.formatter.engine import BibFormatObject, get_format_element, eval_format_element
from invenio.bibformat_bfx_engine_config import CFG_BIBFORMAT_BFX_LABEL_DEFINITIONS, CFG_BIBFORMAT_BFX_TEMPLATES_PATH
from invenio.bibformat_bfx_engine_config import CFG_BIBFORMAT_BFX_FORMAT_TEMPLATE_EXTENSION, CFG_BIBFORMAT_BFX_ELEMENT_NAMESPACE
from invenio.bibformat_bfx_engine_config import InvenioBibFormatBfxError, InvenioBibFormatBfxWarning
-from invenio.errorlib import register_exception
+from invenio.ext.logging import register_exception
from invenio.base.i18n import gettext_set_language
from invenio.config import CFG_SITE_LANG
address_pattern = r'(?P<parent>[a-z_]*):?/?(?P<tag>[0-9_?\w]*)/?(?P<code>[\w_?]?)#?(?P<reg>.*)'
def format_with_bfx(recIDs, out_file, template_name, preprocess=None):
'''
Format a set of records according to a BFX template.
This is the main entry point to the BFX engine.
@param recIDs: a list of record IDs to format
@param out_file: an object to write in; this can be every object which has a 'write' method: file, req, StringIO
@param template_name: the file name of the BFX template without the path and the .bfx extension
@param preprocess: an optional function; every record is passed through this function for initial preprocessing before formatting
'''
trans = MARCTranslator(CFG_BIBFORMAT_BFX_LABEL_DEFINITIONS)
trans.set_record_ids(recIDs, preprocess)
parser = BFXParser(trans)
template_tree = parser.load_template(template_name)
parser.walk(template_tree, out_file)
return None
class BFXParser:
'''
A general-purpose parser for generating xml/xhtml/text output based on a template system.
Must be initialised with a translator. A translator is like a blackbox that returns values, calls functions, etc...
Works with every translator supporting the following simple interface:
- is_defined(name)
- get_value(name)
- iterator(name)
- call_function(func_name, list_of_parameters)
Customized for MARC to XML conversion through the use of a MARCTranslator.
Templates are strict XML files. They are built by combining any tags with the
special BFX tags living in the http://invenio-software.org/ namespace.
Easily extensible by tags of your own.
Defined tags:
- template: defines a template
- template_ref: a reference to a template
- loop structure
- if, then, elif, else structure
- text: output text
- field: query translator for field 'name'
- element: call external functions
'''
def __init__(self, translator):
'''
Create an instance of the BFXParser class. Initialize with a translator.
The BFXparser makes queries to the translator for the values of certain names.
For the communication it uses the following translator methods:
- is_defined(name)
- iterator(name)
- get_value(name, [display_specifier])
@param translator: the translator used by the class instance
'''
self.translator = translator
self.known_operators = ['style', 'format', 'template', 'template_ref', 'text', 'field', 'element', 'loop', 'if', 'then', 'else', 'elif']
self.flags = {} # store flags here;
self.templates = {} # store templates and formats here
self.start_template_name = None #the name of the template from which the 'execution' starts;
#this is usually a format or the only template found in a doc
def load_template(self, template_name, template_source=None):
'''
Load a BFX template file.
A template file can have one of two forms:
- it is a file with a single template. Root tag is 'template'.
In an API call the single template element is 'executed'.
- it is a 'style' file which contains exactly one format and zero or more templates. Root tag is 'style' with children 'format' and 'template'(s).
In this case only the format code is 'executed'. Naturally, in it, it would have references to other templates in the document.
Template can be given by name (in that case search path is in
standard directory for bfx template) or directly using the template source.
If given, template_source overrides template_name
@param template_name: the name of the BFX template, the same as the name of the filename without the extension
@return: a DOM tree of the template
'''
_ = gettext_set_language(CFG_SITE_LANG)
if template_source is None:
template_file_name = CFG_BIBFORMAT_BFX_TEMPLATES_PATH + '/' + template_name + '.' + CFG_BIBFORMAT_BFX_FORMAT_TEMPLATE_EXTENSION
#load document
doc = minidom.parse(template_file_name)
else:
doc = minidom.parseString(template_source)
#set exec flag to false and walk document to find templates and formats
self.flags['exec'] = False
self.walk(doc)
#check found templates
if self.start_template_name:
start_template = self.templates[self.start_template_name]['node']
else:
#print CFG_BIBFORMAT_BFX_WARNING_MESSAGES['WRN_BFX_NO_FORMAT_FOUND']
if len(self.templates) == 1:
# no format found, check if there is a default template
self.start_template_name = self.templates.keys()[0]
start_template = self.templates[self.start_template_name]['node']
else:
#no formats found, templates either zero or more than one
if len(self.templates) > 1:
try:
raise InvenioBibFormatBfxError(_('More than one templates found in the document. No format found.'))
except InvenioBibFormatBfxError, exc:
register_exception()
logging.error(exc.message)
return None
self.flags['exec'] = True
return start_template
def parse_attribute(self, expression):
'''
A function to check if an expression is of the special form [!name:display].
A short form for saying <bx:field name="name" display="tag">, used in element attributes.
@param expression: a string, usually taken from an attribute value
@return: if the string is special, parse it and return the corresponding value; else return the initial expression
'''
output = expression
pattern = '\[!(?P<tmp>[\w_.:]*)\]'
expr = re.compile(pattern)
match = expr.match(expression)
if match:
tmp = match.group('tmp')
tmp = tmp.split(':')
var = tmp[0]
display = ''
if len(tmp) == 2:
display = tmp[1]
output = self.translator.get_value(var, display)
output = xml_escape(output)
return output
def walk(self, parent, out_file=None):
'''
Walk a template DOM tree.
The main function in the parser. It is recursively called until all the nodes are processed.
This function is used in two different ways:
- for initial loading of the template (and validation)
- for 'execution' of a format/template
The different behaviour is achieved through the use of flags, which can be set to True or False.
@param parent: a node to process; in an API call this is the root node
@param out_file: an object to write to; must have a 'write' method
@return: None
'''
_ = gettext_set_language(CFG_SITE_LANG)
for node in parent.childNodes:
if node.nodeType == Node.TEXT_NODE:
value = get_node_value(node)
value = value.strip()
if out_file:
out_file.write(value)
if node.nodeType == Node.ELEMENT_NODE:
#get values
name, attributes, element_namespace = get_node_name(node), get_node_attributes(node), get_node_namespace(node)
# write values
if element_namespace != CFG_BIBFORMAT_BFX_ELEMENT_NAMESPACE:
#parse all the attributes
for key in attributes.keys():
attributes[key] = self.parse_attribute(attributes[key])
if node_has_subelements(node):
if out_file:
out_file.write(create_xml_element(name=name, attrs=attributes, element_type=xmlopen))
self.walk(node, out_file) #walk subnodes
if out_file:
out_file.write(create_xml_element(name=name, element_type=xmlclose))
else:
if out_file:
out_file.write(create_xml_element(name=name, attrs=attributes, element_type=xmlempty))
#name is a special name, must fall in one of the next cases:
elif node.localName == 'style':
self.ctl_style(node, out_file)
elif node.localName == 'format':
self.ctl_format(node, out_file)
elif node.localName == 'template':
self.ctl_template(node, out_file)
elif node.localName == 'template_ref':
self.ctl_template_ref(node, out_file)
elif node.localName == 'element':
self.ctl_element(node, out_file)
elif node.localName == 'field':
self.ctl_field(node, out_file)
elif node.localName == 'text':
self.ctl_text(node, out_file)
elif node.localName == 'loop':
self.ctl_loop(node, out_file)
elif node.localName == 'if':
self.ctl_if(node, out_file)
elif node.localName == 'then':
self.ctl_then(node, out_file)
elif node.localName == 'else':
self.ctl_else(node, out_file)
elif node.localName == 'elif':
self.ctl_elif(node, out_file)
else:
if node.localName in self.known_operators:
try:
raise InvenioBibFormatBfxError(_('Note for programmer: you have not implemented operator %s.') % name)
except InvenioBibFormatBfxError, exc:
register_exception()
logging.error(exc.message)
# print 'Note for programmer: you haven\'t implemented operator %s.' % (name)
else:
try:
raise InvenioBibFormatBfxError(_('Name %s is not recognised as a valid operator name.') % name)
except InvenioBibFormatBfxError, exc:
register_exception()
logging.error(exc.message)
return None
def ctl_style(self, node, out_file):
'''
Process a style root node.
'''
#exec mode
if self.flags['exec']:
return None
#test mode
self.walk(node, out_file)
return None
def ctl_format(self, node, out_file):
'''
Process a format node.
Get name, description and content attributes.
This function is called only in test mode.
'''
_ = gettext_set_language(CFG_SITE_LANG)
#exec mode
if self.flags['exec']:
return None
#test mode
attrs = get_node_attributes(node)
#get template name and give control to ctl_template
if attrs.has_key('name'):
name = attrs['name']
if self.templates.has_key(name):
try:
raise InvenioBibFormatBfxError(_('Duplicate name: %s.') % name)
except InvenioBibFormatBfxError, exc:
register_exception()
logging.error(exc.message)
return None
self.start_template_name = name
self.ctl_template(node, out_file)
else:
try:
raise InvenioBibFormatBfxError(_('No name defined for the template.'))
except InvenioBibFormatBfxError, exc:
register_exception()
logging.error(exc.message)
return None
return None
def ctl_template(self, node, out_file):
'''
Process a template node.
Get name, description and content attributes.
Register name and store for later calls from template_ref.
This function is called only in test mode.
'''
_ = gettext_set_language(CFG_SITE_LANG)
#exec mode
if self.flags['exec']:
return None
#test mode
attrs = get_node_attributes(node)
#get template name
if attrs.has_key('name'):
name = attrs['name']
if self.templates.has_key(name):
try:
raise InvenioBibFormatBfxError(_('Duplicate name: %s.') % name)
except InvenioBibFormatBfxError, exc:
register_exception()
logging.error(exc.message)
return None
self.templates[name] = {}
self.templates[name]['node'] = node
else:
try:
raise InvenioBibFormatBfxError(_('No name defined for the template.'))
except InvenioBibFormatBfxError, exc:
register_exception()
logging.error(exc.message)
return None
#get template description
if attrs.has_key('description'):
description = attrs['description']
else:
description = ''
try:
raise InvenioBibFormatBfxWarning(_('No description entered for the template.'))
except InvenioBibFormatBfxWarning, exc:
register_exception(stream='warning')
logging.warning(exc.message)
self.templates[name]['description'] = description
#get content-type of resulting output
if attrs.has_key('content'):
content_type = attrs['content']
else:
content_type = 'text/xml'
try:
raise InvenioBibFormatBfxWarning(_('No content type specified for the template. Using default: text/xml.'))
except InvenioBibFormatBfxWarning, exc:
register_exception(stream='warning')
logging.warning(exc.message)
self.templates[name]['content_type'] = content_type
#walk node
self.walk(node, out_file)
return None
def ctl_template_ref(self, node, out_file):
'''
Reference to an external template.
This function is called only in execution mode. Bad references appear as run-time errors.
'''
_ = gettext_set_language(CFG_SITE_LANG)
#test mode
if not self.flags['exec']:
return None
#exec mode
attrs = get_node_attributes(node)
if not attrs.has_key('name'):
try:
raise InvenioBibFormatBfxError(_('Missing attribute "name" in TEMPLATE_REF.'))
except InvenioBibFormatBfxError, exc:
register_exception()
logging.error(exc.message)
return None
name = attrs['name']
#first check for a template in the same file, that is in the already cached templates
if self.templates.has_key(name):
node_to_walk = self.templates[name]['node']
self.walk(node_to_walk, out_file)
else:
#load a file and execute it
pass
#template_file_name = CFG_BIBFORMAT_BFX_TEMPLATES_PATH + name + '/' + CFG_BIBFORMAT_BFX_FORMAT_TEMPLATE_EXTENSION
#try:
# node = minidom.parse(template_file_name)
#except:
# print CFG_BIBFORMAT_BFX_ERROR_MESSAGES['ERR_BFX_TEMPLATE_NOT_FOUND'] % (template_file_name)
return None
def ctl_element(self, node, out_file):
'''
Call an external element (written in Python).
'''
_ = gettext_set_language(CFG_SITE_LANG)
#test mode
if not self.flags['exec']:
return None
#exec mode
parameters = get_node_attributes(node)
if not parameters.has_key('name'):
try:
raise InvenioBibFormatBfxError(_('Missing attribute "name" in ELEMENT.'))
except InvenioBibFormatBfxError, exc:
register_exception()
logging.error(exc.message)
return None
function_name = parameters['name']
del parameters['name']
#now run external bfe_name.py, with param attrs
if function_name:
value = self.translator.call_function(function_name, parameters)
value = xml_escape(value)
out_file.write(value)
return None
def ctl_field(self, node, out_file):
'''
Get the value of a field by its name.
'''
_ = gettext_set_language(CFG_SITE_LANG)
#test mode
if not self.flags['exec']:
return None
#exec mode
attrs = get_node_attributes(node)
if not attrs.has_key('name'):
try:
raise InvenioBibFormatBfxError(_('Missing attribute "name" in FIELD.'))
except InvenioBibFormatBfxError, exc:
register_exception()
logging.error(exc.message)
return None
display = ''
if attrs.has_key('display'):
display = attrs['display']
var = attrs['name']
if not self.translator.is_defined(var):
try:
raise InvenioBibFormatBfxError(_('Field %s is not defined.') % var)
except InvenioBibFormatBfxError, exc:
register_exception()
logging.error(exc.message)
return None
value = self.translator.get_value(var, display)
value = xml_escape(value)
out_file.write(value)
return None
def ctl_text(self, node, out_file):
'''
Output a text
'''
_ = gettext_set_language(CFG_SITE_LANG)
#test mode
if not self.flags['exec']:
return None
#exec mode
attrs = get_node_attributes(node)
if not attrs.has_key('value'):
try:
raise InvenioBibFormatBfxError(_('Missing attribute "value" in TEXT.'))
except InvenioBibFormatBfxError, exc:
register_exception()
logging.error(exc.message)
return None
value = attrs['value']
value = value.replace(r'\n', '\n')
#value = xml_escape(value)
if type(value) == type(u''):
value = value.encode('utf-8')
out_file.write(value)
return None
def ctl_loop(self, node, out_file):
'''
Loop through a set of values.
'''
_ = gettext_set_language(CFG_SITE_LANG)
#test mode
if not self.flags['exec']:
self.walk(node, out_file)
return None
#exec mode
attrs = get_node_attributes(node)
if not attrs.has_key('object'):
try:
raise InvenioBibFormatBfxError(_('Missing attribute "object" in LOOP.'))
except InvenioBibFormatBfxError, exc:
register_exception()
logging.error(exc.message)
return None
name = attrs['object']
if not self.translator.is_defined(name):
try:
raise InvenioBibFormatBfxError(_('Field %s is not defined.') % name)
except InvenioBibFormatBfxError, exc:
register_exception()
logging.error(exc.message)
return None
for new_object in self.translator.iterator(name):
self.walk(node, out_file)
return None
def ctl_if(self, node, out_file):
'''
An if/then/elif/.../elif/else construct.
'If' can have several forms:
<if name="var"/> : True if var is non-empty, eval as string
<if name="var" eq="value"/> : True if var=value, eval as string
<if name="var" lt="value"/> : True if var<value, try to eval as num, else eval as string
<if name="var" gt="value"/> : True if var>value, try to eval as num, else eval as string
<if name="var" le="value"/> : True if var<=value, try to eval as num, else eval as string
<if name="var" ge="value"/> : True if var>=value, try to eval as num, else eval as string
<if name="var" in="val1 val2"/> : True if var in [val1, val2], eval as string
<if name="var" nin="val1 val2"/> : True if var not in [val1, val2], eval as string
<if name="var" neq="value"/> : True if var!=value, eval as string
<if name="var" like="regexp"/> : Match against a regular expression
Example::
<if name="author" eq="Pauli">
<then>Pauli</then>
<elif name="" eq="Einstein">
<then>Pauli</then>
<else>other</else>
</elif>
</if>
'''
_ = gettext_set_language(CFG_SITE_LANG)
#test mode
if not self.flags['exec']:
self.walk(node, out_file)
return None
#exec mode
attrs = get_node_attributes(node)
if not attrs.has_key('name'):
try:
raise InvenioBibFormatBfxError(_('Missing attrbute "name" in IF.'))
except InvenioBibFormatBfxError, exc:
register_exception()
logging.error(exc.message)
return None
#determine result
var = attrs['name']
if not self.translator.is_defined(var):
try:
raise InvenioBibFormatBfxError(_('Field %s is not defined.') % var)
except InvenioBibFormatBfxError, exc:
register_exception()
logging.error(exc.message)
return None
value = self.translator.get_value(var)
value = value.strip()
#equal
if attrs.has_key('eq'):
pattern = attrs['eq']
if is_number(pattern) and is_number(value):
result = (float(value)==float(pattern))
else:
result = (value==pattern)
#not equal
elif attrs.has_key('neq'):
pattern = attrs['neq']
if is_number(pattern) and is_number(value):
result = (float(value)!=float(pattern))
else:
result = (value!=pattern)
#lower than
elif attrs.has_key('lt'):
pattern = attrs['lt']
if is_number(pattern) and is_number(value):
result = (float(value)<float(pattern))
else:
result = (value<pattern)
#greater than
elif attrs.has_key('gt'):
pattern = attrs['gt']
if is_number(pattern) and is_number(value):
result = (float(value)>float(pattern))
else:
result = (value>pattern)
#lower or equal than
elif attrs.has_key('le'):
pattern = attrs['le']
if is_number(pattern) and is_number(value):
result = (float(value)<=float(pattern))
else:
result = (value<=pattern)
#greater or equal than
elif attrs.has_key('ge'):
pattern = attrs['ge']
if is_number(pattern) and is_number(value):
result = (float(value)>=float(pattern))
else:
result = (value>=pattern)
#in
elif attrs.has_key('in'):
pattern = attrs['in']
values = pattern.split()
result = (value in values)
#not in
elif attrs.has_key('nin'):
pattern = attrs['nin']
values = pattern.split()
result = (value not in values)
#match against a regular expression
elif attrs.has_key('like'):
pattern = attrs['like']
try:
expr = re.compile(pattern)
result = expr.match(value)
except:
try:
raise InvenioBibFormatBfxError(_('Invalid regular expression: %s.') % pattern)
except InvenioBibFormatBfxError, exc:
register_exception()
logging.error(exc.message)
#simple form: True if non-empty, otherwise False
else:
result = value
#end of evaluation
#=================
#validate subnodes
then_node = get_node_subelement(node, 'then', CFG_BIBFORMAT_BFX_ELEMENT_NAMESPACE)
else_node = get_node_subelement(node, 'else', CFG_BIBFORMAT_BFX_ELEMENT_NAMESPACE)
elif_node = get_node_subelement(node, 'elif', CFG_BIBFORMAT_BFX_ELEMENT_NAMESPACE)
#having else and elif siblings at the same time is a syntax error
if (else_node is not None) and (elif_node is not None):
try:
raise InvenioBibFormatBfxError(_('Invalid syntax of IF statement.'))
except InvenioBibFormatBfxError, exc:
register_exception()
logging.error(exc.message)
return None
#now walk appropriate nodes, according to the result
if result: #True
if then_node:
self.walk(then_node, out_file)
#todo: add short form, without 'then', just elements within if statement to walk on 'true' and no 'elif' or 'else' elements
else: #False
if elif_node:
self.ctl_if(elif_node, out_file)
elif else_node:
self.walk(else_node, out_file)
return None
def ctl_then(self, node, out_file):
'''
Calling 'then' directly from the walk function means a syntax error.
'''
_ = gettext_set_language(CFG_SITE_LANG)
#test mode
if not self.flags['exec']:
self.walk(node, out_file)
return None
#exec mode
try:
raise InvenioBibFormatBfxError(_('Invalid syntax of IF statement.'))
except InvenioBibFormatBfxError, exc:
register_exception()
logging.error(exc.message)
return None
def ctl_else(self, node, out_file):
'''
Calling 'else' directly from the walk function means a syntax error.
'''
_ = gettext_set_language(CFG_SITE_LANG)
#test mode
if not self.flags['exec']:
self.walk(node, out_file)
return None
#exec mode
try:
raise InvenioBibFormatBfxError(_('Invalid syntax of IF statement.'))
except InvenioBibFormatBfxError, exc:
register_exception()
logging.error(exc.message)
return None
def ctl_elif(self, node, out_file):
'''
Calling 'elif' directly from the walk function means a syntax error.
'''
_ = gettext_set_language(CFG_SITE_LANG)
#test mode
if not self.flags['exec']:
self.walk(node, out_file)
return None
#exec mode
try:
raise InvenioBibFormatBfxError(_('Invalid syntax of IF statement.'))
except InvenioBibFormatBfxError, exc:
register_exception()
logging.error(exc.message)
return None
class MARCTranslator:
'''
memory[name]
[name]['addresses'] - the set of rules for each of the defined names
[name]['parent'] - the name of the parent; '' if none;
[name]['children'] - a list with the name of the children of every variable
[name]['object'] - stored state of object for performance efficiency
'''
def __init__(self, labels=None):
'''
Create an instance of the translator and init with the list of the defined labels and their rules.
'''
_ = gettext_set_language(CFG_SITE_LANG)
if labels is None:
labels = {}
self.recIDs = []
self.recID = 0
self.recID_index = 0
self.record = None
self.memory = {}
pattern = address_pattern
expr = re.compile(pattern)
for name in labels.keys():
self.memory[name] = {}
self.memory[name]['object'] = None
self.memory[name]['parent'] = ''
self.memory[name]['children'] = []
self.memory[name]['addresses'] = p_copy.deepcopy(labels[name])
for name in self.memory:
for i in range(len(self.memory[name]['addresses'])):
address = self.memory[name]['addresses'][i]
match = expr.match(address)
if not match:
try:
raise InvenioBibFormatBfxError(_('Invalid address: %s %s') % (name, address))
except InvenioBibFormatBfxError, exc:
register_exception()
logging.error(exc.message)
# print 'Invalid address: ', name, address
else:
parent_name = match.group('parent')
if parent_name:
if not self.memory.has_key(parent_name):
try:
raise InvenioBibFormatBfxError(_('Field %s is not defined.') % parent_name)
except InvenioBibFormatBfxError, exc:
register_exception()
logging.error(exc.message)
else:
self.memory[name]['parent'] = parent_name
#now make parent aware of children
if not name in self.memory[parent_name]['children']:
self.memory[parent_name]['children'].append(name)
level = self.determine_level(parent_name)
self.memory[name]['addresses'][i] = self.memory[name]['addresses'][i].replace(parent_name, '/'*level)
#special case 'record'
self.memory['record'] = {}
self.memory['record']['object'] = None
self.memory['record']['parent'] = ''
self.memory['record']['children'] = []
def set_record_ids(self, recIDs, preprocess=None):
'''
Initialize the translator with the set of record IDs.
@param recIDs: a list of the record IDs
@param preprocess: an optional function which acts on every record structure after creating it
This can be used to enrich the record with fields not present in the record initially,
verify the record data or whatever plausible.
Another solution is to use external function elements.
'''
self.record = None
self.recIDs = recIDs
self.preprocess = preprocess
if self.recIDs:
self.recID_index = 0
self.recID = self.recIDs[self.recID_index]
self.record = get_bfx_record(self.recID)
if self.preprocess:
self.preprocess(self.record)
return None
def determine_level(self, name):
'''
Determine the type of the variable, whether this is an instance or a subfield.
This is done by observing the first provided address for the name.
todo: define variable types in config file, remove this function, results in a clearer concept
'''
level = 0 #default value
if self.memory.has_key(name):
expr = re.compile(address_pattern)
if self.memory[name]['addresses']:
match = expr.match(self.memory[name]['addresses'][0])
if match:
tag = match.group('tag')
code = match.group('code')
reg = match.group('reg')
if reg:
level = 2 #subfield
elif code:
level = 2 #subfield
elif tag:
level = 1 #instance
return level
#========================================
#API functions for quering the translator
#========================================
def is_defined(self, name):
'''
Check whether a variable is defined.
@param name: the name of the variable
'''
return self.memory.has_key(name)
def get_num_elements(self, name):
'''
An API function to get the number of elements for a variable.
Do not use this function to build loops, Use iterator instead.
'''
if name == 'record':
return len(self.recIDs)
num = 0
for part in self.iterator(name):
num = num + 1
return num
def get_value(self, name, display_type='value'):
'''
The API function for quering the translator for values of a certain variable.
Called in a loop will result in a different value each time.
Objects are cached in memory, so subsequent calls for the same variable take less time.
@param name: the name of the variable you want the value of
@param display_type: an optional value for the type of the desired output, one of: value, tag, ind1, ind2, code, fulltag;
These can be easily added in the proper place of the code (display_value)
'''
if name == 'record':
return ''
record = self.get_object(name)
return self.display_record(record, display_type)
def iterator(self, name):
'''
An iterator over the values of a certain name.
The iterator changes state of internal variables and objects.
When calling get_value in a loop, this will result each time in a different value.
'''
if name == 'record':
for self.recID in self.recIDs:
self.record = get_bfx_record(self.recID)
if self.preprocess:
self.preprocess(self.record)
yield str(self.recID)
else:
full_object = self.build_object(name)
level = self.determine_level(name)
for new_object in record_parts(full_object, level):
self.memory[name]['object'] = new_object
#parent has changed state; also set childs state to None;
for children_name in self.memory[name]['children']:
self.memory[children_name]['object'] = None
yield new_object
#the result for a call of the same name after an iterator should be the same as if there was no iterator called before
self.memory[name]['object'] = None
def call_function(self, function_name, parameters=None):
'''
Call an external element which is a Python file, using BibFormat
@param function_name: the name of the function to call
@param parameters: a dictionary of the parameters to pass as key=value pairs
@return: a string value, which is the result of the function call
'''
if parameters is None:
parameters = {}
bfo = BibFormatObject(self.recID)
format_element = get_format_element(function_name)
(value, dummy) = eval_format_element(format_element, bfo, parameters)
#to do: check errors from function call
return value
#========================================
#end of API functions
#========================================
def get_object(self, name):
'''
Responsible for creating the desired object, corresponding to provided name.
If object is not cached in memory, it is build again.
Directly called by API function get_value.
The result is then formatted by display_record according to display_type.
'''
if self.memory[name]['object'] is not None:
return self.memory[name]['object']
new_object = self.build_object(name)
#if you have reached here you are not in an iterator; return first non-empty
level = self.determine_level(name)
for tmp_object in record_parts(new_object, level):
#get the first non-empty
if tmp_object:
new_object = tmp_object
break
self.memory[name]['object'] = new_object
return new_object
def build_object(self, name):
'''
Build the object from the list of addresses
A slave function for get_object.
'''
new_object = {}
parent_name = self.memory[name]['parent'];
has_parent = parent_name
for address in self.memory[name]['addresses']:
if not has_parent:
tmp_object = copy(self.record, address)
new_object = merge(new_object, tmp_object)
else: #has parent
parent_object = self.get_object(parent_name) #already returns the parents instance
tmp_object = copy(parent_object, address)
new_object = merge(new_object, tmp_object)
return new_object
def display_record(self, record, display_type='value'):
'''
Decide what the final output value is according to the display_type.
@param record: the record structure to display; this is most probably just a single subfield
@param display_type: a string specifying the desired output; can be one of: value, tag, ind1, ind2, code, fulltag
@return: a string to output
'''
_ = gettext_set_language(CFG_SITE_LANG)
output = ''
tag, ind1, ind2, code, value = '', '', '', '', ''
if record:
tags = record.keys()
tags.sort()
if tags:
fulltag = tags[0]
tag, ind1, ind2 = fulltag[0:3], fulltag[3:4], fulltag[4:5]
field_instances = record[fulltag]
if field_instances:
field_instance = field_instances[0]
codes = field_instance.keys()
codes.sort()
if codes:
code = codes[0]
value = field_instance[code]
if not display_type:
display_type = 'value'
if display_type == 'value':
output = value
elif display_type == 'tag':
output = tag
elif display_type == 'ind1':
ind1 = ind1.replace('_', ' ')
output = ind1
elif display_type=='ind2':
ind2 = ind2.replace('_', ' ')
output = ind2
elif display_type == 'code':
output = code
elif display_type == 'fulltag':
output = tag + ind1 + ind2
else:
try:
raise InvenioBibFormatBfxError(_('Invalid display type. Must be one of: value, tag, ind1, ind2, code; received: %s.') % display_type)
except InvenioBibFormatBfxError, exc:
register_exception()
logging.error(exc.message)
return output
'''
Functions for use with the structure representing a MARC record defined here.
This record structure differs from the one defined in bibrecord.
The reason is that we want a symmetry between controlfields and datafields.
In this format controlfields are represented internally as a subfield value with code ' ' of a datafield.
This allows for easier handling of the fields.
However, there is a restriction associated with this structure and it is that subfields cannot be repeated
in the same instance. If this is the case, the result will be incorrect.
The record structure has the form:
fields={field_tag:field_instances}
field_instances=[field_instance]
field_instance={field_code:field_value}
'''
def convert_record(old_record):
'''
Convert a record from the format defined in bibrecord to the format defined here
@param old_record: the record as returned from bibrecord.create_record()
@return: a record of the new form
'''
_ = gettext_set_language(CFG_SITE_LANG)
fields = {}
old_tags = old_record.keys()
old_tags.sort()
for old_tag in old_tags:
if int(old_tag) < 11:
#controlfields
new_tag = old_tag
fields[new_tag] = [{' ':old_record[old_tag][0][3]}]
else:
#datafields
old_field_instances = old_record[old_tag]
num_fields = len(old_field_instances)
for i in range(num_fields):
old_field_instance = old_field_instances[i]
ind1 = old_field_instance[1]
if not ind1 or ind1 == ' ':
ind1 = '_'
ind2 = old_field_instance[2]
if not ind2 or ind2 == ' ':
ind2 = '_'
new_tag = old_tag + ind1 + ind2
new_field_instance = {}
for old_subfield in old_field_instance[0]:
new_code = old_subfield[0]
new_value = old_subfield[1]
if new_field_instance.has_key(new_code):
try:
raise InvenioBibFormatBfxError(_('Repeating subfield codes in the same instance!'))
except InvenioBibFormatBfxError, exc:
register_exception()
logging.error(exc.message)
# print 'Error: Repeating subfield codes in the same instance!'
new_field_instance[new_code] = new_value
if not fields.has_key(new_tag):
fields[new_tag] = []
fields[new_tag].append(new_field_instance)
return fields
def get_bfx_record(recID):
'''
Get a record with a specific recID.
@param recID: the ID of the record
@return: a record in the structure defined here
'''
bfo = BibFormatObject(recID)
return convert_record(bfo.get_record())
def print_bfx_record(record):
'''
Print a record.
'''
tags = record.keys()
tags.sort()
for tag in tags:
field_instances = record[tag]
for field_instance in field_instances:
print tag, field_instance
def record_fields_value(record, tag, subfield):
'''
Return a list of all the fields with a certain tag and subfield code.
Works on subfield level.
@param record: a record
@param tag: a 3 or 5 letter tag; required
@param subfield: a subfield code; required
'''
output = []
if record.has_key(tag):
for field_instance in record[tag]:
if field_instance.has_key(subfield):
output.append(field_instance[subfield])
return output
def record_add_field_instance(record, tag, field_instance):
'''
Add a field_instance to the beginning of the instances of a corresponding tag.
@param record: a record
@param tag: a 3 or 5 letter tag; required
@param field_instance: the field instance to add
@return: None
'''
if not record.has_key(tag):
record[tag] = []
record[tag] = [field_instance] + record[tag]
return None
def record_num_parts(record, level):
'''
Count the number of instances or the number of subfields in the whole record.
@param record: record to consider for counting
@param level: either 1 or 2
level=1 - view record on instance level
level=2 - view record on subfield level
@return: the number of parts
'''
num = 0
for part in record_parts(record, level):
num = num + 1
def record_parts(record, level):
'''
An iterator over the instances or subfields of a record.
@param record: record to consider for iterating
@param level: either 1 or 2
- level=1: iterate over instances
- level=2: iterate over subfields
@return: a record structure representing the part (instance or subfield)
'''
if level == 1:
names = record.keys()
names.sort()
for name in names:
old_field_instances = record[name]
for old_field_instance in old_field_instances:
new_record = {}
new_field_instances = []
new_field_instance = {}
for old_field_code in old_field_instance.keys():
new_field_code = old_field_code
new_field_value = old_field_instance[old_field_code]
new_field_instance[new_field_code] = new_field_value
new_field_instances.append(new_field_instance)
new_record[name] = []
new_record[name].extend(new_field_instances)
yield new_record
if level == 2:
names = record.keys()
names.sort()
for name in names:
old_field_instances = record[name]
for old_field_instance in old_field_instances:
old_field_codes = old_field_instance.keys()
old_field_codes.sort()
for old_field_code in old_field_codes:
new_record = {}
new_field_instances = []
new_field_instance = {}
new_field_code = old_field_code
new_field_value = old_field_instance[old_field_code]
new_field_instance[new_field_code] = new_field_value
new_field_instances.append(new_field_instance)
new_record[name] = []
new_record[name].extend(new_field_instances)
yield new_record
def copy(old_record, address=''):
'''
Copy a record by filtering all parts of the old record specified by address
(A better name for the function is filter.)
@param old_record: the initial record
@param address: an address; for examples see bibformat_bfx_engine_config.
If no address is specified, return the initial record.
@return: the filtered record
'''
if not old_record:
return {}
tag_pattern, code_pattern, reg_pattern = '', '', ''
expr = re.compile(address_pattern)
match = expr.match(address)
if match:
tag_pattern = match.group('tag')
code_pattern = match.group('code')
reg_pattern = match.group('reg')
if tag_pattern:
tag_pattern = tag_pattern.replace('?','[0-9_\w]')
else:
tag_pattern = r'.*'
if code_pattern:
code_pattern = code_pattern.replace('?','[\w ]')
else:
code_pattern = r'.*'
tag_expr = re.compile(tag_pattern)
code_expr = re.compile(code_pattern)
new_record = {}
for tag in old_record.keys():
tag_match = tag_expr.match(tag)
if tag_match:
if tag_match.end() == len(tag):
old_field_instances = old_record[tag]
new_field_instances = []
for old_field_instance in old_field_instances:
new_field_instance = {}
for old_field_code in old_field_instance.keys():
new_field_code = old_field_code
code_match = code_expr.match(new_field_code)
if code_match:
new_field_value = old_field_instance[old_field_code]
new_field_instance[new_field_code] = new_field_value
if new_field_instance:
new_field_instances.append(new_field_instance)
if new_field_instances:
new_record[tag] = new_field_instances
#in new_record pass all subfields through regexp
if reg_pattern:
for tag in new_record:
field_instances = new_record[tag]
for field_instance in field_instances:
field_codes = field_instance.keys()
for field_code in field_codes:
field_instance[field_code] = pass_through_regexp(field_instance[field_code], reg_pattern)
return new_record
def merge(record1, record2):
'''
Merge two records.
Controlfields with the same tag in record2 as in record1 are ignored.
@param record1: first record to merge
@param record2: second record to merge
@return: the merged record
'''
new_record = {}
if record1:
new_record = copy(record1)
if not record2:
return new_record
for tag in record2.keys():
#append only datafield tags;
#if controlfields conflict, leave first;
old_field_instances = record2[tag]
new_field_instances = []
for old_field_instance in old_field_instances:
new_field_instance = {}
for old_field_code in old_field_instance.keys():
new_field_code = old_field_code
new_field_value = old_field_instance[old_field_code]
new_field_instance[new_field_code] = new_field_value
if new_field_instance:
new_field_instances.append(new_field_instance)
if new_field_instances:
#controlfield
if len(tag) == 3:
if not new_record.has_key(tag):
new_record[tag] = []
new_record[tag].extend(new_field_instances)
#datafield
if len(tag) == 5:
if not new_record.has_key(tag):
new_record[tag] = []
new_record[tag].extend(new_field_instances)
return new_record
#======================
#Help functions
#=====================
xmlopen = 1
xmlclose = 2
xmlfull = 3
xmlempty = 4
def create_xml_element(name, value='', attrs=None, element_type=xmlfull, level=0):
'''
Create a XML element as string.
@param name: the name of the element
@param value: the element value; default is ''
@param attrs: a dictionary with the element attributes
@param element_type: a constant which defines the type of the output
xmlopen = 1 <element attr="attr_value">
xmlclose = 2 </element>
xmlfull = 3 <element attr="attr_value">value</element>
xmlempty = 4 <element attr="attr_value"/>
@return: a formatted XML string
'''
output = ''
if attrs is None:
attrs = {}
if element_type == xmlempty:
output += '<'+name
for attrname in attrs.keys():
attrvalue = attrs[attrname]
if type(attrvalue) == type(u''):
attrvalue = attrvalue.encode('utf-8')
output += ' %s="%s"' % (attrname, attrvalue)
output += ' />'
if element_type == xmlfull:
output += '<'+name
for attrname in attrs.keys():
attrvalue = attrs[attrname]
if type(attrvalue) == type(u''):
attrvalue = attrvalue.encode('utf-8')
output += ' %s="%s"' % (attrname, attrvalue)
output += '>'
output += value
output += '</'+name+'>'
if element_type == xmlopen:
output += '<'+name
for attrname in attrs.keys():
output += ' '+attrname+'="'+attrs[attrname]+'"'
output += '>'
if element_type == xmlclose:
output += '</'+name+'>'
output = ' '*level + output
if type(output) == type(u''):
output = output.encode('utf-8')
return output
def xml_escape(value):
'''
Escape a string value for use as a xml element or attribute value.
@param value: the string value to escape
@return: escaped value
'''
return saxutils.escape(value)
def xml_unescape(value):
'''
Unescape a string value for use as a xml element.
@param value: the string value to unescape
@return: unescaped value
'''
return saxutils.unescape(value)
def node_has_subelements(node):
'''
Check if a node has any childnodes.
Check for element or text nodes.
@return: True if childnodes exist, False otherwise.
'''
result = False
for node in node.childNodes:
if node.nodeType == Node.ELEMENT_NODE or node.nodeType == Node.TEXT_NODE:
result = True
return result
def get_node_subelement(parent_node, name, namespace = None):
'''
Get the first childnode with specific name and (optional) namespace
@param parent_node: the node to check
@param name: the name to search
@param namespace: An optional namespace URI. This is usually a URL: http://invenio-software.org/
@return: the found node; None otherwise
'''
output = None
for node in parent_node.childNodes:
if node.nodeType == Node.ELEMENT_NODE and node.localName == name and node.namespaceURI == namespace:
output = node
return output
return output
def get_node_value(node):
'''
Get the node value of a node. For use with text nodes.
@param node: a text node
@return: a string of the nodevalue encoded in utf-8
'''
return node.nodeValue.encode('utf-8')
def get_node_namespace(node):
'''
Get node namespace. For use with element nodes.
@param node: an element node
@return: the namespace of the node
'''
return node.namespaceURI
def get_node_name(node):
'''
Get the node value of a node. For use with element nodes.
@param node: an element node
@return: a string of the node name
'''
return node.nodeName
def get_node_attributes(node):
'''
Get attributes of an element node. For use with element nodes
@param node: an element node
@return: a dictionary of the attributes as key:value pairs
'''
attributes = {}
attrs = node.attributes
for attrname in attrs.keys():
attrnode = attrs.get(attrname)
attrvalue = attrnode.nodeValue
attributes[attrname] = attrvalue
return attributes
def pass_through_regexp(value, regexp):
'''
Pass a value through a regular expression.
@param value: a string
@param regexp: a regexp with a group 'value' in it. No group named 'value' will result in an error.
@return: if the string matches the regexp, return named group 'value', otherwise return ''
'''
output = ''
expr = re.compile(regexp)
match = expr.match(value)
if match:
output = match.group('value')
return output
def is_number(value):
'''
Check if a value is a number.
@param value: the value to check
@return: True or False
'''
result = True
try:
float(value)
except ValueError:
result = False
return result
diff --git a/invenio/modules/formatter/template_context_functions/tfn_get_fulltext_snippets.py b/invenio/modules/formatter/template_context_functions/tfn_get_fulltext_snippets.py
index eedf733c9..f0873b2dc 100644
--- a/invenio/modules/formatter/template_context_functions/tfn_get_fulltext_snippets.py
+++ b/invenio/modules/formatter/template_context_functions/tfn_get_fulltext_snippets.py
@@ -1,54 +1,54 @@
# -*- coding: utf-8 -*-
##
## This file is part of Invenio.
## Copyright (C) 2013 CERN.
##
## Invenio is free software; you can redistribute it and/or
## modify it under the terms of the GNU General Public License as
## published by the Free Software Foundation; either version 2 of the
## License, or (at your option) any later version.
##
## Invenio is distributed in the hope that it will be useful, but
## WITHOUT ANY WARRANTY; without even the implied warranty of
## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
## General Public License for more details.
##
## You should have received a copy of the GNU General Public License
## along with Invenio; if not, write to the Free Software Foundation, Inc.,
## 59 Temple Place, Suite 330, Boston, MA 02111-1307, USA.
"""Template context function to get fulltext snippets via Solr."""
from invenio.config import CFG_WEBSEARCH_FULLTEXT_SNIPPETS
-from invenio.errorlib import register_exception
+from invenio.ext.logging import register_exception
from invenio.modules.formatter.utils import get_pdf_snippets
from invenio.search_engine_utils import get_fulltext_terms_from_search_pattern
from invenio.modules.search.cache import get_pattern_from_cache
def template_context_function(id_bibrec, pattern, qid, current_user):
"""
@param id_bibrec ID of record
@param pattern search pattern
@param current_user user object
@param qid query id
@return HTML containing snippet
"""
if not pattern: pattern = get_pattern_from_cache(qid)
if id_bibrec and pattern and current_user:
# Requires search in fulltext field
if CFG_WEBSEARCH_FULLTEXT_SNIPPETS and 'fulltext:' in pattern:
terms = get_fulltext_terms_from_search_pattern(pattern)
if terms:
snippets = ''
try:
snippets = get_pdf_snippets(id_bibrec, terms, current_user).decode('utf8')
if snippets: return ' ... ' + snippets + ' ... '
except:
register_exception()
return ''
else:
return ''
else:
return None
diff --git a/invenio/modules/workflows/api.py b/invenio/modules/workflows/api.py
index de93adc4b..79c7465cc 100644
--- a/invenio/modules/workflows/api.py
+++ b/invenio/modules/workflows/api.py
@@ -1,280 +1,280 @@
# -*- coding: utf-8 -*-
## This file is part of Invenio.
## Copyright (C) 2012, 2013 CERN.
##
## Invenio is free software; you can redistribute it and/or
## modify it under the terms of the GNU General Public License as
## published by the Free Software Foundation; either version 2 of the
## License, or (at your option) any later version.
##
## Invenio is distributed in the hope that it will be useful, but
## WITHOUT ANY WARRANTY; without even the implied warranty of
## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
## General Public License for more details.
##
## You should have received a copy of the GNU General Public License
## along with Invenio; if not, write to the Free Software Foundation, Inc.,
## 59 Temple Place, Suite 330, Boston, MA 02111-1307, USA.
"""
BibWorkflow API - functions to run workflows
"""
from werkzeug import cached_property
from werkzeug.utils import import_string
from invenio.base.globals import cfg
class InvenioBibWorkflowWorkerUnavailable(Exception):
pass
class WorkerBackend(object):
@cached_property
def worker(self):
try:
return import_string('invenio.modules.workflows.workers.%s:%s' % (
cfg['CFG_BIBWORKFLOW_WORKER'], cfg['CFG_BIBWORKFLOW_WORKER']))
except:
- from invenio.errorlib import register_exception
+ from invenio.ext.logging import register_exception
## Let's report about broken plugins
register_exception(alert_admin=True)
def __call__(self, *args, **kwargs):
if not self.worker:
raise InvenioBibWorkflowWorkerUnavailable('No worker configured')
return self.worker(*args, **kwargs)
WORKER = WorkerBackend()
def start(workflow_name, data, **kwargs):
"""
Starts a workflow by given name for specified data *immediately*
in the current process.
The name of the workflow to start is considered unique and it is
equal to the name of a file containing the workflow definition.
The data passed should be a list of object(s) to run through the
workflow. For example: a list of dict, JSON string, BibWorkflowObjects
etc.
Special custom keyword arguments can be given to the workflow engine
in order to pass certain variables to the tasks in the workflow execution,
such as a taskid from BibSched, the current user etc.
The workflow engine object generated is returned upon completion.
@param workflow_name: the workflow name to run. Ex: "my_workflow"
@type workflow_name: str
@param data: the workflow name to run. Ex: "my_workflow"
@type data: list of objects/dicts
@return: BibWorkflowEngine that ran the workflow.
"""
from invenio.bibworkflow_worker_engine import run_worker
return run_worker(workflow_name, data, **kwargs)
def start_delayed(workflow_name, data, **kwargs):
"""
Starts a *delayed* workflow by using one of the defined workers
available. For example, enqueueing the execution of the workflow in
a task queue such as Celery (http://celeryproject.org).
Otherwise, see documentation of start().
@param workflow_name: the workflow name to run. Ex: "my_workflow"
@type workflow_name: str
@param data: the workflow name to run. Ex: "my_workflow"
@type data: list of objects/dicts
@return: BibWorkflowEngine that ran the workflow.
"""
return WORKER().run_worker(workflow_name, data, **kwargs)
def start_by_wid(wid, **kwargs):
"""
Will re-start given workflow, by workflow uuid (wid),
from the beginning with the original data given.
Special custom keyword arguments can be given to the workflow engine
in order to pass certain variables to the tasks in the workflow execution,
such as a taskid from BibSched, the current user etc.
@param wid: the workflow uuid. Ex: "550e8400-e29b-41d4-a716-446655440000"
@type wid: string
@return: BibWorkflowEngine that ran the workflow.
"""
from invenio.bibworkflow_worker_engine import restart_worker
return restart_worker(wid, **kwargs)
def start_by_wid_delayed(wid, **kwargs):
"""
Will re-start given workflow, by workflow uuid (wid),
from the beginning with the original data given.
Starts the workflow *delayed* by using one of the defined workers
available. For example, enqueueing the execution of the workflow in
a task queue such as Celery (http://celeryproject.org).
Special custom keyword arguments can be given to the workflow engine
in order to pass certain variables to the tasks in the workflow execution,
such as a taskid from BibSched, the current user etc.
@param wid: the workflow uuid. Ex: "550e8400-e29b-41d4-a716-446655440000"
@type wid: string
@return: BibWorkflowEngine that ran the workflow.
"""
return WORKER().restart_worker(wid, **kwargs)
def start_by_oids(workflow_name, oids, **kwargs):
"""
Will start given workflow, by name, using the given
list of BibWorkflowObject ids (oids) from beginning.
Special custom keyword arguments can be given to the workflow engine
in order to pass certain variables to the tasks in the workflow execution,
such as a taskid from BibSched, the current user etc.
@param workflow_name: the workflow name to run. Ex: "my_workflow"
@type workflow_name: str
@param oids: list of BibWorkflowObject id's to run.
@type oids: list of strings/integers
@return: BibWorkflowEngine that ran the workflow.
"""
from invenio.modules.workflows.models import BibWorkflowObject
objects = BibWorkflowObject.query.filter(BibWorkflowObject.id.in_(list(oids))).all()
return start(workflow_name, objects, **kwargs)
def start_by_oids_delayed(workflow_name, oids, **kwargs):
"""
Will start given workflow, by name, using the given
list of BibWorkflowObject ids (oids) from beginning.
Special custom keyword arguments can be given to the workflow engine
in order to pass certain variables to the tasks in the workflow execution,
such as a taskid from BibSched, the current user etc.
Starts the workflow *delayed* by using one of the defined workers
available. For example, enqueueing the execution of the workflow in
a task queue such as Celery (http://celeryproject.org).
@param workflow_name: the workflow name to run. Ex: "my_workflow"
@type workflow_name: str
@param oids: list of BibWorkflowObject id's to run.
@type oids: list of strings/integers
@return: BibWorkflowEngine that ran the workflow.
"""
from invenio.modules.workflows.models import BibWorkflowObject
objects = BibWorkflowObject.query.filter(BibWorkflowObject.id.in_(list(oids))).all()
return start_delayed(workflow_name, objects, **kwargs)
def continue_oid(oid, start_point="continue_next", **kwargs):
"""
Continue workflow asociated with object given by object id (oid).
It can start from previous, current or next task.
Special custom keyword arguments can be given to the workflow engine
in order to pass certain variables to the tasks in the workflow execution,
such as a taskid from BibSched, the current user etc.
Starts the workflow *delayed* by using one of the defined workers
available. For example, enqueueing the execution of the workflow in
a task queue such as Celery (http://celeryproject.org).
@param oid: id of BibWorkflowObject to run.
@type oid: string
@param start_point: where should the workflow start from? One of:
* restart_prev: will restart from the previous task
* continue_next: will continue to the next task
* restart_task: will restart the current task
@type start_point: string
@return: BibWorkflowEngine that ran the workflow
"""
from invenio.bibworkflow_worker_engine import continue_worker
return continue_worker(oid, start_point, **kwargs)
def continue_oid_delayed(oid, start_point="continue_next", **kwargs):
"""
Continue workflow associated with object given by object id (oid).
It can start from previous, current or next task.
Special custom keyword arguments can be given to the workflow engine
in order to pass certain variables to the tasks in the workflow execution,
such as a taskid from BibSched, the current user etc.
Starts the workflow *delayed* by using one of the defined workers
available. For example, enqueueing the execution of the workflow in
a task queue such as Celery (http://celeryproject.org).
@param oid: id of BibWorkflowObject to run.
@type oid: string
@param start_point: where should the workflow start from? One of:
* restart_prev: will restart from the previous task
* continue_next: will continue to the next task
* restart_task: will restart the current task
@type start_point: string
@return: BibWorkflowEngine that ran the workflow
"""
return WORKER().continue_worker(oid, start_point, **kwargs)
def resume_objects_in_workflow(id_workflow, start_point="continue_next",
**kwargs):
"""
Resume workflow for any halted or failed objects from given workflow.
This is a generator function and will yield every workflow created per
object which needs to be resumed.
To identify the original workflow containing the halted objects,
the ID (or UUID) of the workflow is required. The starting point
to resume the objects from can optionally be given. By default,
the objects resume with their next task in the workflow.
@param id_workflow: id of Workflow with objects to resume.
@type id_workflow: string
@param start_point: where should the workflow start from? One of:
* restart_prev: will restart from the previous task
* continue_next: will continue to the next task
* restart_task: will restart the current task
@type start_point: string
@yield: BibWorkflowEngine that ran the workflow
"""
from invenio.modules.workflows.models import BibWorkflowObject
from invenio.bibworkflow_config import CFG_OBJECT_VERSION
# Resume workflow if there are objects to resume
objects = BibWorkflowObject.query.filter(
BibWorkflowObject.id_workflow == id_workflow,
BibWorkflowObject.version == CFG_OBJECT_VERSION.HALTED
).all()
for obj in objects:
yield continue_oid(oid=obj.id, start_point=start_point,
**kwargs)
diff --git a/invenio/modules/workflows/utils.py b/invenio/modules/workflows/utils.py
index edc6a5962..40b4c123c 100644
--- a/invenio/modules/workflows/utils.py
+++ b/invenio/modules/workflows/utils.py
@@ -1,306 +1,306 @@
# -*- coding: utf-8 -*-
## This file is part of Invenio.
## Copyright (C) 2012, 2013 CERN.
##
## Invenio is free software; you can redistribute it and/or
## modify it under the terms of the GNU General Public License as
## published by the Free Software Foundation; either version 2 of the
## License, or (at your option) any later version.
##
## Invenio is distributed in the hope that it will be useful, but
## WITHOUT ANY WARRANTY; without even the implied warranty of
## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
## General Public License for more details.
##
## You should have received a copy of the GNU General Public License
## along with Invenio; if not, write to the Free Software Foundation, Inc.,
## 59 Temple Place, Suite 330, Boston, MA 02111-1307, USA.
import re
import redis
from invenio.legacy.bibrecord import create_record
-from invenio.errorlib import register_exception
+from invenio.ext.logging import register_exception
from invenio.ext.sqlalchemy import db
REGEXP_RECORD = re.compile("<record.*?>(.*?)</record>", re.DOTALL)
class InvenioWorkflowDefinitionError(Exception):
pass
def create_objects(path_to_file):
from invenio.modules.workflows.models import BibWorkflowObject
list_of_bwo = []
f = open(path_to_file, "r")
records = f.read()
f.close()
record_xmls = REGEXP_RECORD.findall(records)
for record_xml in record_xmls:
rec = "<record>"
rec += record_xml
rec += "</record>"
rec = create_record(rec)[0]
#check for errors, if record is empty
bwo = BibWorkflowObject(rec, "bibrecord")
list_of_bwo.append(bwo)
return list_of_bwo
def get_workflow_definition(name):
from invenio.bibworkflow_load_workflows import workflows
if name in workflows:
return workflows[name]
else:
raise InvenioWorkflowDefinitionError("Cannot find workflow %s"
% (name,))
def determineDataType(data):
# If data is a dictionary and contains type key,
# we can directly derive the data_type
if isinstance(data, dict):
if 'type' in data:
data_type = data['type']
else:
data_type = 'dict'
else:
# If data is not a dictionary, we try to guess MIME type
# by using magic library
try:
from magic import Magic
mime_checker = Magic(mime=True)
data_type = mime_checker.from_buffer(data) # noqa
except:
register_exception(stream="warning", prefix=
"BibWorkflowObject.determineDataType:" +
" Impossible to resolve data type.")
data_type = ""
return data_type
## TODO special thanks to http://code.activestate.com/recipes/440514-dictproperty-properties-for-dictionary-attributes/
class dictproperty(object):
class _proxy(object):
def __init__(self, obj, fget, fset, fdel):
self._obj = obj
self._fget = fget
self._fset = fset
self._fdel = fdel
def __getitem__(self, key):
try:
return self._fget(self._obj, key)
except TypeError:
print "can't read item"
def __setitem__(self, key, value):
try:
self._fset(self._obj, key, value)
except TypeError:
print "can't set item %s: %s" % (str(key), str(value),)
def __delitem__(self, key):
try:
self._fdel(self._obj, key)
except TypeError:
print "can't delete item"
def __init__(self, fget=None, fset=None, fdel=None, doc=None):
self._fget = fget
self._fset = fset
self._fdel = fdel
self.__doc__ = doc
def __get__(self, obj, objtype=None):
if obj is None:
return self
return self._proxy(obj, self._fget, self._fset, self._fdel)
def create_hp_containers(iSortCol_0=None, sSortDir_0=None,
sSearch=None):
"""
Looks for related HPItems and groups them together in HPContainers
@type hpitems: list
@return: A list containing all the HPContainers.
"""
from invenio.modules.workflows.models import BibWorkflowObject
redis_server = redis.Redis()
if iSortCol_0:
iSortCol_0 = int(iSortCol_0)
bwobject_list = BibWorkflowObject.query.filter(
BibWorkflowObject.id_parent != 0).all()
if sSearch:
if len(sSearch) < 4:
pass
else:
bwobject_list2 = []
for bwo in bwobject_list:
extra_data = bwo.get_extra_data()
if bwo.id_parent == sSearch:
bwobject_list2.append(bwo)
elif bwo.id_user == sSearch:
bwobject_list2.append(bwo)
elif sSearch in bwo.id_workflow:
bwobject_list2.append(bwo)
elif sSearch in extra_data['widget']:
bwobject_list2.append(bwo)
elif sSearch in extra_data['last_task_name']:
bwobject_list2.append(bwo)
try:
if sSearch in extra_data['redis_search']['category']:
bwobject_list2.append(bwo)
elif sSearch in extra_data['redis_search']['source']:
bwobject_list2.append(bwo)
elif sSearch in extra_data['redis_search']['title']:
bwobject_list2.append(bwo)
except KeyError:
pass
bwobject_list = bwobject_list2
if iSortCol_0 == -6:
column = 'created'
if sSortDir_0 == 'desc':
bwobject_list.reverse()
return bwobject_list
def redis_create_search_entry(bwobject):
redis_server = set_up_redis()
extra_data = bwobject.get_extra_data()
#creates database entries to not loose key value pairs in redis
for key, value in extra_data["redis_search"].iteritems():
redis_server.sadd("holdingpen_sort", str(key))
redis_server.sadd("holdingpen_sort:%s" % (str(key),), str(value))
redis_server.sadd("holdingpen_sort:%s:%s" % (str(key), str(value),),
bwobject.id)
redis_server.sadd("holdingpen_sort", "owner")
redis_server.sadd("holdingpen_sort:owner", extra_data['owner'])
redis_server.sadd("holdingpen_sort:owner:%s" % (extra_data['owner'],),
bwobject.id)
redis_server.sadd("holdingpen_sort:last_task_name:%s" %
(extra_data['last_task_name'],), bwobject.id)
def filter_holdingpen_results(key, *args):
"""Function filters holdingpen entries by given key: value pair.
It returns list of IDs."""
redis_server = set_up_redis()
new_args = []
for a in args:
new_args.append("holdingpen_sort:"+a)
return redis_server.sinter("holdingpen_sort:"+key, *new_args)
def get_redis_keys(key=None):
redis_server = set_up_redis()
if key:
return list(redis_server.smembers("holdingpen_sort:%s" % (str(key),)))
else:
return list(redis_server.smembers("holdingpen_sort"))
def get_redis_values(key):
redis_server = set_up_redis()
return redis_server.smembers("holdingpen_sort:%s" % (str(key),))
def set_up_redis():
"""
Sets up the redis server for the saving of the HPContainers
@type url: string
@param url: address to setup the Redis server
@return: Redis server object.
"""
from flask import current_app
redis_server = redis.Redis.from_url(
current_app.config.get('CACHE_REDIS_URL', 'redis://localhost:6379')
)
return redis_server
def empty_redis():
redis_server = set_up_redis()
redis_server.flushall()
def sort_bwolist(bwolist, iSortCol_0, sSortDir_0):
if iSortCol_0 == 0:
if sSortDir_0 == 'desc':
bwolist.sort(key=lambda x: x.id, reverse=True)
else:
bwolist.sort(key=lambda x: x.id, reverse=False)
elif iSortCol_0 == 1:
pass
# if sSortDir_0 == 'desc':
# bwolist.sort(key=lambda x: x.id_user, reverse=True)
# else:
# bwolist.sort(key=lambda x: x.id_user, reverse=False)
elif iSortCol_0 == 2:
pass
# if sSortDir_0 == 'desc':
# bwolist.sort(key=lambda x: x.id_user, reverse=True)
# else:
# bwolist.sort(key=lambda x: x.id_user, reverse=False)
elif iSortCol_0 == 3:
pass
# if sSortDir_0 == 'desc':
# bwolist.sort(key=lambda x: x.id_user, reverse=True)
# else:
# bwolist.sort(key=lambda x: x.id_user, reverse=False)
elif iSortCol_0 == 4:
if sSortDir_0 == 'desc':
bwolist.sort(key=lambda x: x.id_workflow, reverse=True)
else:
bwolist.sort(key=lambda x: x.id_workflow, reverse=False)
elif iSortCol_0 == 5:
if sSortDir_0 == 'desc':
bwolist.sort(key=lambda x: x.id_user, reverse=True)
else:
bwolist.sort(key=lambda x: x.id_user, reverse=False)
elif iSortCol_0 == 6:
if sSortDir_0 == 'desc':
bwolist.sort(key=lambda x: x.created, reverse=True)
else:
bwolist.sort(key=lambda x: x.created, reverse=False)
elif iSortCol_0 == 7:
if sSortDir_0 == 'desc':
bwolist.sort(key=lambda x: x.version, reverse=True)
else:
bwolist.sort(key=lambda x: x.version, reverse=False)
elif iSortCol_0 == 8:
if sSortDir_0 == 'desc':
bwolist.sort(key=lambda x: x.version, reverse=True)
else:
bwolist.sort(key=lambda x: x.version, reverse=False)
elif iSortCol_0 == 9:
if sSortDir_0 == 'desc':
bwolist.sort(key=lambda x: x.version, reverse=True)
else:
bwolist.sort(key=lambda x: x.version, reverse=False)
return bwolist
def parse_bwids(bwlist):
import ast
return [item.encode('ascii') for item in ast.literal_eval(bwlist)]
diff --git a/invenio/testsuite/test_emergency_recipients.py b/invenio/testsuite/test_emergency_recipients.py
index 5ba0ecdbf..d0955fa39 100644
--- a/invenio/testsuite/test_emergency_recipients.py
+++ b/invenio/testsuite/test_emergency_recipients.py
@@ -1,84 +1,84 @@
# -*- coding: utf-8 -*-
##
## This file is part of Invenio.
## Copyright (C) 2005, 2006, 2007, 2008, 2010, 2011, 2013 CERN.
##
## Invenio is free software; you can redistribute it and/or
## modify it under the terms of the GNU General Public License as
## published by the Free Software Foundation; either version 2 of the
## License, or (at your option) any later version.
##
## Invenio is distributed in the hope that it will be useful, but
## WITHOUT ANY WARRANTY; without even the implied warranty of
## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
## General Public License for more details.
##
## You should have received a copy of the GNU General Public License
## along with Invenio; if not, write to the Free Software Foundation, Inc.,
## 59 Temple Place, Suite 330, Boston, MA 02111-1307, USA.
""" Test unit for the miscutil/errorlib module. """
__revision__ = "$Id$"
import datetime
from invenio.testsuite import make_test_suite, run_test_suite, InvenioTestCase
class TestGetEmergencyRecipients(InvenioTestCase):
def test_get_emergency_recipients(self):
"""errorlib - test return of proper set of recipients"""
- from invenio.errorlib import get_emergency_recipients
+ from invenio.ext.logging import get_emergency_recipients
now = datetime.datetime.today()
tomorrow = now + datetime.timedelta(days=1)
diff_day = now + datetime.timedelta(days=4)
later = now.replace(hour=(now.hour + 1) % 24)
earlier = now.replace(hour=(now.hour - 1) % 24)
constraint_now = "%s %s-%s" % (
now.strftime("%a"),
earlier.strftime("%H:00"),
later.strftime("%H:00"),
)
constraint_tomorrow = "%s %s-%s" % (
tomorrow.strftime("%a"),
earlier.strftime("%H:00"),
later.strftime("%H:00"),
)
constraint_time = "%s-%s" % (
earlier.strftime("%H:00"),
later.strftime("%H:00"),
)
minute = (now.minute - 3) % 60
# hour and earlier can change when minute is modified
if minute > now.minute:
hour = (now.hour - 1) % 24
earlier = now.replace(hour=(now.hour - 2) % 24)
else:
hour = now.hour
constraint_near_miss = "%s-%s" % (
earlier.strftime("%H:00"),
now.replace(minute=minute, hour=hour) \
.strftime("%H:%M")
)
constraint_day = "%s" % now.strftime("%A")
constraint_diff_day = "%s" % diff_day.strftime("%A")
test_config = {
constraint_now: 'now@example.com',
constraint_tomorrow: 'tomorrow@example.com',
constraint_time: 'time@example.com',
constraint_day: 'day@example.com,day@foobar.com',
constraint_diff_day: 'diff_day@example.com',
constraint_near_miss:'near_miss@example.com',
'*': 'fallback@example.com',
}
result = get_emergency_recipients(recipient_cfg=test_config)
expected = ['now@example.com', 'time@example.com',
'day@example.com,day@foobar.com', 'fallback@example.com']
self.assertEqual(set(result), set(expected))
TEST_SUITE = make_test_suite(TestGetEmergencyRecipients,)
if __name__ == "__main__":
run_test_suite(TEST_SUITE)
diff --git a/invenio/utils/autodiscovery/__init__.py b/invenio/utils/autodiscovery/__init__.py
index 2d8a6af6a..190bfaff5 100644
--- a/invenio/utils/autodiscovery/__init__.py
+++ b/invenio/utils/autodiscovery/__init__.py
@@ -1,125 +1,125 @@
# -*- coding: utf-8 -*-
##
## This file is part of Invenio.
## Copyright (C) 2013 CERN.
##
## Invenio is free software; you can redistribute it and/or
## modify it under the terms of the GNU General Public License as
## published by the Free Software Foundation; either version 2 of the
## License, or (at your option) any later version.
##
## Invenio is distributed in the hope that it will be useful, but
## WITHOUT ANY WARRANTY; without even the implied warranty of
## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
## General Public License for more details.
##
## You should have received a copy of the GNU General Public License
## along with Invenio; if not, write to the Free Software Foundation, Inc.,
## 59 Temple Place, Suite 330, Boston, MA 02111-1307, USA.
"""
Invenio import helper functions.
Usage example:
autodiscover_modules(['invenio'], '.+_tasks')
An import difference from pluginutils is that modules are imported in their
package hierarchy, contrary to pluginutils where modules are imported as
standalone Python modules.
"""
import imp
import re
from werkzeug import find_modules, import_string
from .checkers import create_enhanced_plugin_builder
_RACE_PROTECTION = False
def autodiscover_modules(packages, related_name_re='.+', ignore_exceptions=False):
"""
Autodiscover function follows the pattern used by Celery.
@param packages: List of package names to auto discover modules in.
@type packages: list of str
@param related_name_re: Regular expression used to match modules names.
@type related_name_re: str
@param ignore_exceptions: Ignore exception when importing modules.
@type ignore_exceptions: bool
"""
global _RACE_PROTECTION
if _RACE_PROTECTION:
return []
_RACE_PROTECTION = True
modules = []
try:
tmp = [find_related_modules(pkg, related_name_re, ignore_exceptions)
for pkg in packages]
for l in tmp:
for m in l:
if m is not None:
modules.append(m)
# Workaround for finally-statement
except:
_RACE_PROTECTION = False
raise
_RACE_PROTECTION = False
return modules
def find_related_modules(package, related_name_re='.+', ignore_exceptions=False):
"""Given a package name and a module name pattern, tries to find matching
modules."""
package_elements = package.rsplit(".", 1)
try:
if len(package_elements) == 2:
pkg = __import__(package_elements[0], globals(), locals(), [package_elements[1]])
pkg = getattr(pkg, package_elements[1])
else:
pkg = __import__(package_elements[0], globals(), locals(), [])
pkg_path = pkg.__path__
except AttributeError:
return []
# Find all modules named according to related_name
p = re.compile(related_name_re)
modules = []
for name in find_modules(package, include_packages=True):
if p.match(name.split('.')[-1]):
try:
modules.append(import_string(name, silent=ignore_exceptions))
except Exception as e:
if not ignore_exceptions:
raise e
return modules
def import_related_module(package, pkg_path, related_name, ignore_exceptions=False):
"""
Import module from given path
"""
try:
imp.find_module(related_name, pkg_path)
except ImportError:
return
try:
return getattr(
__import__('%s' % (package), globals(), locals(), [related_name]),
related_name
)
except Exception as e:
if ignore_exceptions:
#FIXME remove invenio dependency
- from invenio.errorlib import register_exception
+ from invenio.ext.logging import register_exception
register_exception()
else:
raise e
diff --git a/invenio/utils/url.py b/invenio/utils/url.py
index 5c4b66ddd..6327495e3 100644
--- a/invenio/utils/url.py
+++ b/invenio/utils/url.py
@@ -1,853 +1,853 @@
# -*- coding: utf-8 -*-
##
## This file is part of Invenio.
## Copyright (C) 2005, 2006, 2007, 2008, 2009, 2010, 2011, 2012, 2013 CERN.
##
## Invenio is free software; you can redistribute it and/or
## modify it under the terms of the GNU General Public License as
## published by the Free Software Foundation; either version 2 of the
## License, or (at your option) any later version.
##
## Invenio is distributed in the hope that it will be useful, but
## WITHOUT ANY WARRANTY; without even the implied warranty of
## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
## General Public License for more details.
##
## You should have received a copy of the GNU General Public License
## along with Invenio; if not, write to the Free Software Foundation, Inc.,
## 59 Temple Place, Suite 330, Boston, MA 02111-1307, USA.
"""
urlutils.py -- helper functions for URL related problems such as
argument washing, redirection, etc.
"""
__revision__ = "$Id$"
import time
import base64
import hmac
import re
import sys
import os
import inspect
import urllib
import urllib2
from urllib import urlencode, quote_plus, quote, FancyURLopener
from urlparse import urlparse, urlunparse
from cgi import parse_qs, parse_qsl, escape
from werkzeug import cached_property
from werkzeug.local import LocalProxy
try:
import BeautifulSoup
BEAUTIFUL_SOUP_IMPORTED = True
except ImportError:
BEAUTIFUL_SOUP_IMPORTED = False
from invenio.base.globals import cfg
from invenio.utils.hash import sha1, md5, HASHLIB_IMPORTED
from invenio.utils import apache
def wash_url_argument(var, new_type):
"""
Wash argument into 'new_type', that can be 'list', 'str',
'int', 'tuple' or 'dict'.
If needed, the check 'type(var) is not None' should be done before
calling this function.
@param var: variable value
@param new_type: variable type, 'list', 'str', 'int', 'tuple' or 'dict'
@return: as much as possible, value var as type new_type
If var is a list, will change first element into new_type.
If int check unsuccessful, returns 0
"""
out = []
if new_type == 'list': # return lst
if isinstance(var, list):
out = var
else:
out = [var]
elif new_type == 'str': # return str
if isinstance(var, list):
try:
out = "%s" % var[0]
except:
out = ""
elif isinstance(var, str):
out = var
else:
out = "%s" % var
elif new_type == 'int': # return int
if isinstance(var, list):
try:
out = int(var[0])
except:
out = 0
elif isinstance(var, (int, long)):
out = var
elif isinstance(var, str):
try:
out = int(var)
except:
out = 0
else:
out = 0
elif new_type == 'tuple': # return tuple
if isinstance(var, tuple):
out = var
else:
out = (var, )
elif new_type == 'dict': # return dictionary
if isinstance(var, dict):
out = var
else:
out = {0: var}
return out
def redirect_to_url(req, url, redirection_type=None, norobot=False):
"""
Redirect current page to url.
@param req: request as received from apache
@param url: url to redirect to
@param redirection_type: what kind of redirection is required:
e.g.: apache.HTTP_MULTIPLE_CHOICES = 300
apache.HTTP_MOVED_PERMANENTLY = 301
apache.HTTP_MOVED_TEMPORARILY = 302
apache.HTTP_SEE_OTHER = 303
apache.HTTP_NOT_MODIFIED = 304
apache.HTTP_USE_PROXY = 305
apache.HTTP_TEMPORARY_REDIRECT = 307
The default is apache.HTTP_MOVED_TEMPORARILY
@param norobot: wether to instruct crawlers and robots such as GoogleBot
not to index past this point.
@see: http://www.w3.org/Protocols/rfc2616/rfc2616-sec10.html#sec10.3
"""
if redirection_type is None:
redirection_type = apache.HTTP_MOVED_TEMPORARILY
from flask import redirect
r = redirect(url, code=redirection_type)
raise apache.SERVER_RETURN, r
#FIXME enable code bellow
del req.headers_out["Cache-Control"]
req.headers_out["Cache-Control"] = "no-cache, private, no-store, " \
"must-revalidate, post-check=0, pre-check=0, max-age=0"
req.headers_out["Pragma"] = "no-cache"
if norobot:
req.headers_out["X-Robots-Tag"] = "noarchive, nosnippet, noindex, nocache"
user_agent = req.headers_in.get('User-Agent', '')
if 'Microsoft Office Existence Discovery' in user_agent or 'ms-office' in user_agent:
## HACK: this is to workaround Microsoft Office trying to be smart
## when users click on URLs in Office documents that require
## authentication. Office will check the validity of the URL
## but will pass the browser the redirected URL rather than
## the original one. This is incompatible with e.g. Shibboleth
## based SSO since the referer would be lost.
## See: http://support.microsoft.com/kb/899927
req.status = 200
req.content_type = 'text/html'
if req.method != 'HEAD':
req.write("""
<html>
<head>
<title>Intermediate page for URLs clicked on MS Office Documents</title>
<meta http-equiv="REFRESH" content="5;url=%(url)s"></meta>
</head>
<body>
<p>You are going to be redirected to the desired content within 5 seconds. If the redirection does not happen automatically please click on <a href="%(url)s">%(url_ok)s</a>.</p>
</body>
</html>""" % {
'url': escape(req.unparsed_uri, True),
'url_ok': escape(req.unparsed_uri)
})
raise apache.SERVER_RETURN(apache.DONE)
req.headers_out["Location"] = url
if req.response_sent_p:
raise IOError("Cannot redirect after headers have already been sent.")
req.status = redirection_type
req.write('<p>Please go to <a href="%s">here</a></p>\n' % url)
raise apache.SERVER_RETURN, apache.DONE
def rewrite_to_secure_url(url, secure_base=None):
"""
Rewrite URL to a Secure URL
@param url URL to be rewritten to a secure URL.
@param secure_base: Base URL of secure site (defaults to CFG_SITE_SECURE_URL).
"""
if secure_base is None:
secure_base = cfg.get('CFG_SITE_SECURE_URL')
url_parts = list(urlparse(url))
url_secure_parts = urlparse(secure_base)
url_parts[0] = url_secure_parts[0]
url_parts[1] = url_secure_parts[1]
return urlunparse(url_parts)
def get_referer(req, replace_ampersands=False):
""" Return the referring page of a request.
Referer (wikipedia): Referer is a common misspelling of the word
"referrer"; so common, in fact, that it made it into the official
specification of HTTP. When visiting a webpage, the referer or
referring page is the URL of the previous webpage from which a link was
followed.
@param req: request
@param replace_ampersands: if 1, replace & by &amp; in url
(correct HTML cannot contain & characters alone)
"""
try:
referer = req.headers_in['Referer']
if replace_ampersands == 1:
return referer.replace('&', '&amp;')
return referer
except KeyError:
return ''
def drop_default_urlargd(urlargd, default_urlargd):
lndefault = {}
lndefault.update(default_urlargd)
## Commented out. An Invenio URL now should always specify the desired
## language, in order not to raise the automatic language discovery
## (client browser language can be used now in place of CFG_SITE_LANG)
# lndefault['ln'] = (str, CFG_SITE_LANG)
canonical = {}
canonical.update(urlargd)
for k, v in urlargd.items():
try:
d = lndefault[k]
if d[1] == v:
del canonical[k]
except KeyError:
pass
return canonical
def make_canonical_urlargd(urlargd, default_urlargd):
""" Build up the query part of an URL from the arguments passed in
the 'urlargd' dictionary. 'default_urlargd' is a secondary dictionary which
contains tuples of the form (type, default value) for the query
arguments (this is the same dictionary as the one you can pass to
webinterface_handler.wash_urlargd).
When a query element has its default value, it is discarded, so
that the simplest (canonical) url query is returned.
The result contains the initial '?' if there are actual query
items remaining.
"""
canonical = drop_default_urlargd(urlargd, default_urlargd)
if canonical:
return '?' + urlencode(canonical, doseq=True)
#FIXME double escaping of '&'? .replace('&', '&amp;')
return ''
def create_html_link(urlbase, urlargd, link_label, linkattrd=None,
escape_urlargd=True, escape_linkattrd=True,
urlhash=None):
"""Creates a W3C compliant link.
@param urlbase: base url (e.g. invenio.config.CFG_SITE_URL/search)
@param urlargd: dictionary of parameters. (e.g. p={'recid':3, 'of'='hb'})
@param link_label: text displayed in a browser (has to be already escaped)
@param linkattrd: dictionary of attributes (e.g. a={'class': 'img'})
@param escape_urlargd: boolean indicating if the function should escape
arguments (e.g. < becomes &lt; or " becomes &quot;)
@param escape_linkattrd: boolean indicating if the function should escape
attributes (e.g. < becomes &lt; or " becomes &quot;)
@param urlhash: hash string to add at the end of the link
"""
attributes_separator = ' '
output = '<a href="' + \
create_url(urlbase, urlargd, escape_urlargd, urlhash) + '"'
if linkattrd:
output += ' '
if escape_linkattrd:
attributes = [escape(str(key), quote=True) + '="' + \
escape(str(linkattrd[key]), quote=True) + '"'
for key in linkattrd.keys()]
else:
attributes = [str(key) + '="' + str(linkattrd[key]) + '"'
for key in linkattrd.keys()]
output += attributes_separator.join(attributes)
output += '>' + str(link_label) + '</a>'
return output
def create_html_mailto(email, subject=None, body=None, cc=None, bcc=None,
link_label="%(email)s", linkattrd=None, escape_urlargd=True,
escape_linkattrd=True, email_obfuscation_mode=None):
"""Creates a W3C compliant 'mailto' link.
Encode/encrypt given email to reduce undesired automated email
harvesting when embedded in a web page.
NOTE: there is no ultimate solution to protect against email
harvesting. All have drawbacks and can more or less be
circumvented. There are other techniques to protect email
adresses. We implement the less annoying one for users.
@param email: the recipient of the email
@param subject: a default subject for the email (must not contain
line feeds)
@param body: a default body for the email
@param cc: the co-recipient(s) of the email
@param bcc: the hidden co-recpient(s) of the email
@param link_label: the label of this mailto link. String
replacement is performed on key %(email)s with
the email address if needed.
@param linkattrd: dictionary of attributes (e.g. a={'class': 'img'})
@param escape_urlargd: boolean indicating if the function should escape
arguments (e.g. < becomes &lt; or " becomes &quot;)
@param escape_linkattrd: boolean indicating if the function should escape
attributes (e.g. < becomes &lt; or " becomes &quot;)
@param email_obfuscation_mode: the protection mode. See below:
You can choose among several modes to protect emails. It is
advised to keep the default
CFG_MISCUTIL_EMAIL_HARVESTING_PROTECTION value, so that it is
possible for an admin to change the policy globally.
Available modes ([t] means "transparent" for the user):
-1: hide all emails, excepted CFG_SITE_ADMIN_EMAIL and
CFG_SITE_SUPPORT_EMAIL.
[t] 0 : no protection, email returned as is.
foo@example.com => foo@example.com
1 : basic email munging: replaces @ by [at] and . by [dot]
foo@example.com => foo [at] example [dot] com
[t] 2 : transparent name mangling: characters are replaced by
equivalent HTML entities.
foo@example.com => &#102;&#111;&#111;&#64;&#101;&#120;&#97;&#109;&#112;&#108;&#101;&#46;&#99;&#111;&#109;
[t] 3 : javascript insertion. Requires Javascript enabled on client side.
4 : replaces @ and . characters by gif equivalents.
foo@example.com => foo<img src="at.gif" alt=" [at] ">example<img src="dot.gif" alt=" [dot] ">com
"""
# TODO: implement other protection modes to encode/encript email:
#
## [t] 5 : form submission. User is redirected to a form that he can
## fills in to send the email (??Use webmessage??).
## Depending on WebAccess, ask to answer a question.
##
## [t] 6 : if user can see (controlled by WebAccess), display. Else
## ask to login to see email. If user cannot see, display
## form submission.
if email_obfuscation_mode is None:
email_obfuscation_mode = cfg.get(
'CFG_WEBSTYLE_EMAIL_ADDRESSES_OBFUSCATION_MODE')
if linkattrd is None:
linkattrd = {}
parameters = {}
if subject:
parameters["subject"] = subject
if body:
parameters["body"] = body.replace('\r\n', '\n').replace('\n', '\r\n')
if cc:
parameters["cc"] = cc
if bcc:
parameters["bcc"] = bcc
# Preprocessing values for some modes
if email_obfuscation_mode == 1:
# Basic Munging
email = email.replace("@", " [at] ").replace(".", " [dot] ")
elif email_obfuscation_mode == 2:
# Transparent name mangling
email = string_to_numeric_char_reference(email)
if '%(email)s' in link_label:
link_label = link_label % {'email': email}
mailto_link = create_html_link('mailto:' + email, parameters,
link_label, linkattrd,
escape_urlargd, escape_linkattrd)
if email_obfuscation_mode == 0:
# Return "as is"
return mailto_link
elif email_obfuscation_mode == 1:
# Basic Munging
return mailto_link
elif email_obfuscation_mode == 2:
# Transparent name mangling
return mailto_link
elif email_obfuscation_mode == 3:
# Javascript-based
return '''<script language="JavaScript" ''' \
'''type="text/javascript">''' \
'''document.write('%s'.split("").reverse().join(""))''' \
'''</script>''' % \
mailto_link[::-1].replace("'", "\\'")
elif email_obfuscation_mode == 4:
# GIFs-based
email = email.replace('.',
'<img src="%s/img/dot.gif" alt=" [dot] " '
'style="vertical-align:bottom" />' % cfg.get('CFG_SITE_URL'))
email = email.replace('@',
'<img src="%s/img/at.gif" alt=" [at] " '
'style="vertical-align:baseline" />' % cfg.get('CFG_SITE_URL'))
return email
# All other cases, including mode -1:
return ""
def string_to_numeric_char_reference(string):
"""
Encode a string to HTML-compatible numeric character reference.
Eg: encode_html_entities("abc") == '&#97;&#98;&#99;'
"""
out = ""
for char in string:
out += "&#" + str(ord(char)) + ";"
return out
def get_canonical_and_alternates_urls(url, drop_ln=True, washed_argd=None):
"""
Given an Invenio URL returns a tuple with two elements. The first is the
canonical URL, that is the original URL with CFG_SITE_URL prefix, and
where the ln= argument stripped. The second element element is mapping,
language code -> alternate URL
"""
dummy_scheme, dummy_netloc, path, dummy_params, query, fragment = urlparse(url)
canonical_scheme, canonical_netloc = urlparse(cfg.get('CFG_SITE_URL'))[0:2]
parsed_query = washed_argd or parse_qsl(query)
no_ln_parsed_query = [(key, value) for (key, value) in parsed_query if key != 'ln']
if drop_ln:
canonical_parsed_query = no_ln_parsed_query
else:
canonical_parsed_query = parsed_query
canonical_query = urlencode(canonical_parsed_query)
canonical_url = urlunparse((canonical_scheme, canonical_netloc, path, dummy_params, canonical_query, fragment))
alternate_urls = {}
for ln in cfg.get('CFG_SITE_LANGS'):
alternate_query = urlencode(no_ln_parsed_query + [('ln', ln)])
alternate_url = urlunparse((canonical_scheme, canonical_netloc, path, dummy_params, alternate_query, fragment))
alternate_urls[ln] = alternate_url
return canonical_url, alternate_urls
def create_url(urlbase, urlargd, escape_urlargd=True, urlhash=None):
"""Creates a W3C compliant URL. Output will look like this:
'urlbase?param1=value1&amp;param2=value2'
@param urlbase: base url (e.g. invenio.config.CFG_SITE_URL/search)
@param urlargd: dictionary of parameters. (e.g. p={'recid':3, 'of'='hb'}
@param escape_urlargd: boolean indicating if the function should escape
arguments (e.g. < becomes &lt; or " becomes &quot;)
@param urlhash: hash string to add at the end of the link
"""
separator = '&amp;'
output = urlbase
if urlargd:
output += '?'
if escape_urlargd:
arguments = [escape(quote(str(key)), quote=True) + '=' + \
escape(quote(str(urlargd[key])), quote=True)
for key in urlargd.keys()]
else:
arguments = [str(key) + '=' + str(urlargd[key])
for key in urlargd.keys()]
output += separator.join(arguments)
if urlhash:
output += "#" + escape(quote(str(urlhash)))
return output
def same_urls_p(a, b):
""" Compare two URLs, ignoring reorganizing of query arguments """
ua = list(urlparse(a))
ub = list(urlparse(b))
ua[4] = parse_qs(ua[4])
ub[4] = parse_qs(ub[4])
return ua == ub
def urlargs_replace_text_in_arg(urlargs, regexp_argname, text_old, text_new):
"""Analyze `urlargs' (URL CGI GET query arguments in string form)
and for each occurrence of argument matching `regexp_argname'
replace every substring `text_old' by `text_new'. Return the
resulting new URL.
Used to be used for search engine's create_nearest_terms_box,
now it is not used there anymore. It is left here in case it
will become possibly useful later.
"""
out = ""
# parse URL arguments into a dictionary:
urlargsdict = parse_qs(urlargs)
## construct new URL arguments:
urlargsdictnew = {}
for key in urlargsdict.keys():
if re.match(regexp_argname, key): # replace `arg' by new values
urlargsdictnew[key] = []
for parg in urlargsdict[key]:
urlargsdictnew[key].append(parg.replace(text_old, text_new))
else: # keep old values
urlargsdictnew[key] = urlargsdict[key]
# build new URL for this word:
for key in urlargsdictnew.keys():
for val in urlargsdictnew[key]:
out += "&amp;" + key + "=" + quote_plus(val, '')
if out.startswith("&amp;"):
out = out[5:]
return out
def get_title_of_page(url):
"""
@param url: page to get the title from
@return: the page title in utf-8 or None in case
that any kind of exception occured e.g. connection error,
URL not known
"""
if BEAUTIFUL_SOUP_IMPORTED:
try:
opener = make_invenio_opener('UrlUtils')
soup = BeautifulSoup.BeautifulSoup(opener.open(url))
return soup.title.string.encode("utf-8")
except:
return None
else:
return "Title not available"
def make_user_agent_string(component=None):
"""
Return a nice and uniform user-agent string to be used when Invenio
act as a client in HTTP requests.
"""
ret = "Invenio-%s (+%s; \"%s\")" % (cfg.get('CFG_VERSION'), cfg.get('CFG_SITE_URL'), cfg.get('CFG_SITE_NAME'))
if component:
ret += " %s" % component
return ret
class InvenioFancyURLopener(FancyURLopener):
"""Provide default user agent string."""
@cached_property
def version(self):
return make_user_agent_string()
def prompt_user_passwd(self, host, realm):
"""Don't prompt"""
return None, None
## Let's override default useragent string
## See: http://docs.python.org/release/2.4.4/lib/module-urllib.html
urllib._urlopener = LocalProxy(lambda: InvenioFancyURLopener())
def make_invenio_opener(component=None):
"""
Return an urllib2 opener with the useragent already set in the appropriate
way.
"""
opener = urllib2.build_opener()
opener.addheaders = [('User-agent', make_user_agent_string(component))]
return opener
def create_AWS_request_url(base_url, argd, _amazon_secret_access_key,
_timestamp=None):
"""
Create a signed AWS (Amazon Web Service) request URL corresponding
to the given parameters.
Example:
>> create_AWS_request_url("http://ecs.amazon.com/onca/xml",
{'AWSAccessKeyID': '0000000000',
'Service': 'AWSECommerceService',
'Operation': 'ItemLookup',
'ItemID': '0679722769',
'ResponseGroup': 'ItemAttributes,Offers,Images,Review'},
"1234567890")
@param base_url: Service URL of the Amazon store to query
@param argd: dictionary of arguments defining the query
@param _amazon_secret_access_key: your Amazon secret key
@param _timestamp: for testing purpose only (default: current timestamp)
@type base_url: string
@type argd: dict
@type _amazon_secret_access_key: string
@type _timestamp: string
@return signed URL of the request (string)
"""
## First define a few util functions
def get_AWS_signature(argd, _amazon_secret_access_key,
method="GET", request_host="webservices.amazon.com",
request_uri="/onca/xml",
_timestamp=None):
"""
Returns the signature of an Amazon request, based on the
arguments of the request.
@param argd: dictionary of arguments defining the query
@param _amazon_secret_access_key: your Amazon secret key
@param method: method of the request POST or GET
@param request_host: host contacted for the query. To embed in the signature.
@param request_uri: uri contacted at 'request_host'. To embed in the signature.
@param _timestamp: for testing purpose only (default: current timestamp)
@type argd: dict
@type _amazon_secret_access_key: string
@type method: string
@type host_header: string
@type http_request_uri: string
@type _timestamp: string
@return signature of the request (string)
"""
# Add timestamp
if not _timestamp:
argd["Timestamp"] = time.strftime("%Y-%m-%dT%H:%M:%SZ",
time.gmtime())
else:
argd["Timestamp"] = _timestamp
# Order parameter keys by byte value
parameter_keys = argd.keys()
parameter_keys.sort()
# Encode arguments, according to RFC 3986. Make sure we
# generate a list which is ordered by byte value of the keys
arguments = [quote(str(key), safe="~/") + "=" + \
quote(str(argd[key]), safe="~/") \
for key in parameter_keys]
# Join
parameters_string = "&".join(arguments)
# Prefix
parameters_string = method.upper() + "\n" + \
request_host.lower() + "\n" + \
(request_uri or "/") + "\n" + \
parameters_string
# Sign and return
return calculate_RFC2104_HMAC(parameters_string,
_amazon_secret_access_key)
def calculate_RFC2104_HMAC(data, _amazon_secret_access_key):
"""
Computes a RFC 2104 compliant HMAC Signature and then Base64
encodes it.
Module hashlib must be installed if Python < 2.5
<http://pypi.python.org/pypi/hashlib/20081119>
@param data: data to sign
@param _amazon_secret_access_key: your Amazon secret key
@type data: string
@type _amazon_secret_access_key: string. Empty if hashlib module not installed
"""
if not HASHLIB_IMPORTED:
try:
raise Exception("Module hashlib not installed. Please install it.")
except:
- from invenio.errorlib import register_exception
+ from invenio.ext.logging import register_exception
register_exception(stream='warning', alert_admin=True, subject='Cannot create AWS signature')
return ""
else:
if sys.version_info < (2, 5):
# compatibility mode for Python < 2.5 and hashlib
my_digest_algo = _MySHA256(sha256())
else:
my_digest_algo = sha256
return base64.encodestring(hmac.new(_amazon_secret_access_key,
data, my_digest_algo).digest()).strip()
## End util functions
parsed_url = urlparse(base_url)
signature = get_AWS_signature(argd, _amazon_secret_access_key,
request_host=parsed_url[1],
request_uri=parsed_url[2],
_timestamp=_timestamp)
if signature:
argd["Signature"] = signature
return base_url + "?" + urlencode(argd)
def create_Indico_request_url(base_url, indico_what, indico_loc, indico_id, indico_type, indico_params, indico_key, indico_sig, _timestamp=None):
"""
Create a signed Indico request URL to access Indico HTTP Export APIs.
See U{http://indico.cern.ch/ihelp/html/ExportAPI/index.html} for more
information.
Example:
>> create_Indico_request_url("https://indico.cern.ch",
"categ",
"",
[1, 7],
"xml",
{'onlypublic': 'yes',
'order': 'title',
'from': 'today',
'to': 'tomorrow'},
'00000000-0000-0000-0000-000000000000',
'00000000-0000-0000-0000-000000000000')
@param base_url: Service base URL of the Indico instance to query
@param indico_what: element to export
@type indico_what: one of the strings: C{categ}, C{event}, C{room}, C{reservation}
@param indico_loc: location of the element(s) specified by ID (only used for some elements)
@param indico_id: ID of the element to be exported
@type indico_id: a string or a list/tuple of strings
@param indico_type: output format
@type indico_type: one of the strings: C{json}, C{jsonp}, C{xml}, C{html}, C{ics}, C{atom}
@param indico_params: parameters of the query. See U{http://indico.cern.ch/ihelp/html/ExportAPI/common.html}
@param indico_key: API key provided for the given Indico instance
@param indico_sig: API secret key (signature) provided for the given Indico instance
@param _timestamp: for testing purpose only (default: current timestamp)
@return signed URL of the request (string)
"""
url = '/export/' + indico_what + '/'
if indico_loc:
url += indico_loc + '/'
if type(indico_id) in (list, tuple):
# dash separated list of values
indico_id = '-'.join([str(x) for x in indico_id])
url += indico_id + '.' + str(indico_type)
if hasattr(indico_params, 'items'):
items = indico_params.items()
else:
items = list(indico_params)
if indico_key:
items.append(('apikey', indico_key))
if indico_sig and HASHLIB_IMPORTED:
if _timestamp:
items.append(('timestamp', str(_timestamp)))
else:
items.append(('timestamp', str(int(time.time()))))
items = sorted(items, key=lambda x: x[0].lower())
url_to_sign = '%s?%s' % (url, urlencode(items))
if sys.version_info < (2, 5):
# compatibility mode for Python < 2.5 and hashlib
my_digest_algo = _MySHA1(sha1())
else:
my_digest_algo = sha1
signature = hmac.new(indico_sig, url_to_sign, my_digest_algo).hexdigest()
items.append(('signature', signature))
elif not HASHLIB_IMPORTED:
try:
raise Exception("Module hashlib not installed. Please install it.")
except:
- from invenio.errorlib import register_exception
+ from invenio.ext.logging import register_exception
register_exception(stream='warning', alert_admin=True, subject='Cannot create AWS signature')
if not items:
return url
url = '%s%s?%s' % (base_url.strip('/'), url, urlencode(items))
return url
class _MyHashlibAlgo(object):
'''
Define a subclass of any hashlib algorithm class, with an additional "new()"
function, to work with the Python < 2.5 version of the hmac module.
(This class is more complex than it should, but it is not
possible to subclass haslib algorithm)
'''
def __init__(self, obj):
"""Set the wrapped object."""
super(_MyHashlibAlgo, self).__setattr__('_obj', obj)
methods = []
for name_value in inspect.getmembers(obj, inspect.ismethod):
methods.append(name_value[0])
super(_MyHashlibAlgo, self).__setattr__('__methods__', methods)
def isnotmethod(object_):
"Opposite of ismethod(..)"
return not inspect.ismethod(object_)
members = []
for name_value in inspect.getmembers(obj, isnotmethod):
members.append(name_value[0])
super(_MyHashlibAlgo, self).__setattr__('__members__', members)
def __getattr__(self, name):
"""Redirect unhandled get attribute to self._obj."""
if not hasattr(self._obj, name):
raise AttributeError, ("'%s' has no attribute %s" %
(self.__class__.__name__, name))
else:
return getattr(self._obj, name)
def __setattr__(self, name, value):
"""Redirect set attribute to self._obj if necessary."""
self_has_attr = True
try:
super(_MyHashlibAlgo, self).__getattribute__(name)
except AttributeError:
self_has_attr = False
if (name == "_obj" or not hasattr(self, "_obj") or
not hasattr(self._obj, name) or self_has_attr):
return super(_MyHashlibAlgo, self).__setattr__(name, value)
else:
return setattr(self._obj, name, value)
if HASHLIB_IMPORTED:
from invenio.utils.hash import sha256
class _MySHA256(_MyHashlibAlgo):
"A _MyHashlibAlgo subsclass for sha256"
new = lambda d = '': sha256()
class _MySHA1(_MyHashlibAlgo):
"A _MyHashlibAlgo subsclass for sha1"
new = lambda d = '': sha1()
def auto_version_url(file_path):
""" Appends modification time of the file to the request URL in order for the
browser to refresh the cache when file changes
@param file_path: path to the file, e.g js/foo.js
@return: file_path with modification time appended to URL
"""
file_md5 = ""
try:
file_md5 = md5(open(cfg.get('CFG_WEBDIR') + os.sep + file_path).read()).hexdigest()
except IOError:
pass
return file_path + "?%s" % file_md5
diff --git a/invenio_demosite/testsuite/regression/test_errorlib.py b/invenio_demosite/testsuite/regression/test_errorlib.py
index 07ebe936c..a2fbb17dd 100644
--- a/invenio_demosite/testsuite/regression/test_errorlib.py
+++ b/invenio_demosite/testsuite/regression/test_errorlib.py
@@ -1,121 +1,121 @@
# -*- coding: utf-8 -*-
##
## This file is part of Invenio.
## Copyright (C) 2006, 2007, 2008, 2010, 2011, 2013 CERN.
##
## Invenio is free software; you can redistribute it and/or
## modify it under the terms of the GNU General Public License as
## published by the Free Software Foundation; either version 2 of the
## License, or (at your option) any later version.
##
## Invenio is distributed in the hope that it will be useful, but
## WITHOUT ANY WARRANTY; without even the implied warranty of
## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
## General Public License for more details.
##
## You should have received a copy of the GNU General Public License
## along with Invenio; if not, write to the Free Software Foundation, Inc.,
## 59 Temple Place, Suite 330, Boston, MA 02111-1307, USA.
"""errorlib Regression Test Suite."""
__revision__ = "$Id$"
import os
import sys
-from invenio.errorlib import register_exception, get_pretty_traceback
+from invenio.ext.logging import register_exception, get_pretty_traceback
from invenio.config import CFG_SITE_URL, CFG_LOGDIR
from invenio.testsuite import make_test_suite, run_test_suite, \
test_web_page_content, merge_error_messages, \
InvenioTestCase
class ErrorlibWebPagesAvailabilityTest(InvenioTestCase):
"""Check errorlib web pages whether they are up or not."""
def test_your_baskets_pages_availability(self):
"""errorlib - availability of error sending pages"""
baseurl = CFG_SITE_URL + '/error/'
_exports = ['', 'send']
error_messages = []
for url in [baseurl + page for page in _exports]:
error_messages.extend(test_web_page_content(url))
if error_messages:
self.fail(merge_error_messages(error_messages))
return
class ErrorlibRegisterExceptionTest(InvenioTestCase):
"""Check errorlib register_exception functionality."""
def setUp(self):
from invenio.dbquery import run_sql
run_sql("DELETE FROM hstEXCEPTION")
def test_simple_register_exception(self):
"""errorlib - simple usage of register_exception"""
try:
raise Exception('test-exception')
except:
result = register_exception()
log_content = open(os.path.join(CFG_LOGDIR, 'invenio.err')).read()
self.failUnless('test_simple_register_exception' in log_content)
self.failUnless('test-exception' in log_content)
self.assertEqual(1, result, "register_exception have not returned 1")
def test_alert_admin_register_exception(self):
"""errorlib - alerting admin with register_exception"""
text = 'test-exception that you should receive by email'
try:
raise Exception(text)
except:
result = register_exception(alert_admin=True)
log_content = open(os.path.join(CFG_LOGDIR, 'invenio.err')).read()
self.failUnless('test_alert_admin_register_exception' in log_content)
self.failUnless(text in log_content)
self.assertEqual(1, result, "register_exception have not returned 1")
def test_password_hiding(self):
"""errorlib - hide password in frame analysis"""
try:
password = 'this password should not be visible'
int('foo')
except:
output = get_pretty_traceback(exc_info=sys.exc_info())
self.failIf(password in output, output)
self.failUnless('<*****>' in output, output)
def test_dbquery_password_hiding(self):
"""errorlib - hide dbquery password in frame analysis"""
from invenio.dbquery import connect
kwargs = {'host': 'foo', 'port': 999, 'db': 'baz', 'user': 'qoox', 'passwd': '123', 'use_unicode': False, 'charset': 'utf8'}
try:
connect(**kwargs)
except:
output = get_pretty_traceback(exc_info=sys.exc_info())
self.failIf('123' in output, output)
self.failUnless('<*****>' in output, output)
def test_nested_password_hiding(self):
"""errorlib - hide password nested in dictionary in frame analysis"""
try:
foo = {
'bar' : 'baz',
'qoox' : {
'blibpwdblob' : '1234'
}
}
int(foo)
except:
output = get_pretty_traceback(exc_info=sys.exc_info())
self.failIf('1234' in output, output)
self.failUnless('<*****>' in output, output)
TEST_SUITE = make_test_suite(ErrorlibWebPagesAvailabilityTest,
ErrorlibRegisterExceptionTest)
if __name__ == "__main__":
run_test_suite(TEST_SUITE, warn_user=True)
diff --git a/invenio_demosite/testsuite/regression/test_websubmit.py b/invenio_demosite/testsuite/regression/test_websubmit.py
index ae222b612..fd4e455d7 100644
--- a/invenio_demosite/testsuite/regression/test_websubmit.py
+++ b/invenio_demosite/testsuite/regression/test_websubmit.py
@@ -1,278 +1,278 @@
# -*- coding: utf-8 -*-
##
## This file is part of Invenio.
## Copyright (C) 2006, 2007, 2008, 2009, 2010, 2011, 2013 CERN.
##
## Invenio is free software; you can redistribute it and/or
## modify it under the terms of the GNU General Public License as
## published by the Free Software Foundation; either version 2 of the
## License, or (at your option) any later version.
##
## Invenio is distributed in the hope that it will be useful, but
## WITHOUT ANY WARRANTY; without even the implied warranty of
## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
## General Public License for more details.
##
## You should have received a copy of the GNU General Public License
## along with Invenio; if not, write to the Free Software Foundation, Inc.,
## 59 Temple Place, Suite 330, Boston, MA 02111-1307, USA.
"""WebSubmit Regression Test Suite."""
__revision__ = "$Id$"
import os
from logging import StreamHandler, DEBUG
from cStringIO import StringIO
-from invenio.errorlib import register_exception
+from invenio.ext.logging import register_exception
from invenio.config import CFG_SITE_URL, CFG_PREFIX, CFG_TMPDIR, CFG_PATH_PDFTK
from invenio.base.wrappers import lazy_import
from invenio.testsuite import make_test_suite, run_test_suite, \
test_web_page_content, merge_error_messages, \
InvenioTestCase
from invenio.base.factory import with_app_context
websubmit_file_stamper = lazy_import('invenio.websubmit_file_stamper')
class WebSubmitWebPagesAvailabilityTest(InvenioTestCase):
"""Check WebSubmit web pages whether they are up or not."""
def test_submission_pages_availability(self):
"""websubmit - availability of submission pages"""
baseurl = CFG_SITE_URL + '/submit/'
_exports = ['', 'direct']
error_messages = []
for url in [baseurl + page for page in _exports]:
error_messages.extend(test_web_page_content(url))
if error_messages:
self.fail(merge_error_messages(error_messages))
return
def test_publiline_pages_availability(self):
"""websubmit - availability of approval pages"""
baseurl = CFG_SITE_URL
_exports = ['/approve.py', '/publiline.py',
'/yourapprovals.py']
error_messages = []
for url in [baseurl + page for page in _exports]:
error_messages.extend(test_web_page_content(url))
if error_messages:
self.fail(merge_error_messages(error_messages))
return
def test_your_submissions_pages_availability(self):
"""websubmit - availability of Your Submissions pages"""
baseurl = CFG_SITE_URL
_exports = ['/yoursubmissions.py']
error_messages = []
for url in [baseurl + page for page in _exports]:
error_messages.extend(test_web_page_content(url))
if error_messages:
self.fail(merge_error_messages(error_messages))
return
def test_help_page_availability(self):
"""websubmit - availability of WebSubmit help page"""
self.assertEqual([],
test_web_page_content(CFG_SITE_URL + '/help/submit-guide',
expected_text="Submit Guide"))
class WebSubmitLegacyURLsTest(InvenioTestCase):
""" Check that the application still responds to legacy URLs"""
def test_legacy_help_page_link(self):
"""websubmit - legacy Submit Guide page link"""
self.assertEqual([],
test_web_page_content(CFG_SITE_URL + '/help/submit',
expected_text="Submit Guide"))
self.assertEqual([],
test_web_page_content(CFG_SITE_URL + '/help/submit/',
expected_text="Submit Guide"))
self.assertEqual([],
test_web_page_content(CFG_SITE_URL + '/help/submit/index.en.html',
expected_text="Submit Guide"))
self.assertEqual([],
test_web_page_content(CFG_SITE_URL + '/help/submit/access.en.html',
expected_text="Submit Guide"))
class WebSubmitXSSVulnerabilityTest(InvenioTestCase):
"""Test possible XSS vulnerabilities of the submission engine."""
def test_xss_in_submission_doctype(self):
"""websubmit - no XSS vulnerability in doctype parameter"""
self.assertEqual([],
test_web_page_content(CFG_SITE_URL + '/submit?doctype=%3CSCRIPT%3Ealert%28%22XSS%22%29%3B%3C%2FSCRIPT%3E',
expected_text='Unable to find document type: &lt;SCRIPT&gt;alert("XSS")', username="jekyll",
password="j123ekyll"))
def test_xss_in_submission_act(self):
"""websubmit - no XSS vulnerability in act parameter"""
self.assertEqual([],
test_web_page_content(CFG_SITE_URL + '/submit?doctype=DEMOTHE&access=1_1&act=%3CSCRIPT%3Ealert%28%22XSS%22%29%3B%3C%2FSCRIPT%3E',
expected_text='Invalid doctype and act parameters', username="jekyll",
password="j123ekyll"))
def test_xss_in_submission_page(self):
"""websubmit - no XSS vulnerability in access parameter"""
self.assertEqual([],
test_web_page_content(CFG_SITE_URL +
'/submit?doctype=DEMOTHE&access=/../../../etc/passwd&act=SBI&startPg=1&ln=en&ln=en', expected_text='Invalid parameters', username="jekyll",
password="j123ekyll"))
self.assertEqual([],
test_web_page_content(CFG_SITE_URL +
'/submit?doctype=DEMOTHE&access=%3CSCRIPT%3Ealert%28%22XSS%22%29%3B%3C%2FSCRIPT%3E&act=SBI', expected_text='Invalid parameters', username="jekyll",
password="j123ekyll"))
@with_app_context()
def WebSubmitFileConverterTestGenerator():
from invenio.websubmit_file_converter import get_conversion_map, can_convert
if can_convert('.odt', '.txt'):
## Special test for unoconv/LibreOffice
yield WebSubmitFileConverterTest(os.path.join(CFG_PREFIX, 'lib', 'webtest', 'invenio', 'test.odt'), '.odt', '.txt')
if can_convert('.doc', '.txt'):
## Special test for unoconv/LibreOffice
yield WebSubmitFileConverterTest(os.path.join(CFG_PREFIX, 'lib', 'webtest', 'invenio', 'test.doc'), '.doc', '.txt')
for from_format in get_conversion_map().keys():
input_file = os.path.join(CFG_PREFIX, 'lib', 'webtest', 'invenio', 'test%s' % from_format)
if not os.path.exists(input_file):
## Can't run such a test because there is no test example
continue
for to_format in get_conversion_map().keys():
if from_format == to_format:
continue
conversion_map = can_convert(from_format, to_format)
if conversion_map:
if [converter for converter in conversion_map if converter[0].__name__ == 'unoconv']:
## We don't want to test unoconv which is tested separately
continue
yield WebSubmitFileConverterTest(input_file, from_format, to_format)
class WebSubmitFileConverterTest(InvenioTestCase):
"""Test WebSubmit file converter tool"""
def __init__(self, input_file, from_format, to_format):
super(WebSubmitFileConverterTest, self).__init__('_run_test')
self.from_format = from_format
self.to_format = to_format
self.input_file = input_file
def setUp(self):
from invenio.websubmit_file_converter import get_file_converter_logger
logger = get_file_converter_logger()
self.log = StringIO()
logger.setLevel(DEBUG)
for handler in logger.handlers:
logger.removeHandler(handler)
handler = StreamHandler(self.log)
handler.setLevel(DEBUG)
logger.addHandler(handler)
def shortDescription(self):
return """websubmit - test %s to %s conversion""" % (self.from_format, self.to_format)
def _run_test(self):
from invenio.websubmit_file_converter import InvenioWebSubmitFileConverterError, convert_file
try:
tmpdir_snapshot1 = set(os.listdir(CFG_TMPDIR))
output_file = convert_file(self.input_file, output_format=self.to_format)
tmpdir_snapshot2 = set(os.listdir(CFG_TMPDIR))
tmpdir_snapshot2.discard(os.path.basename(output_file))
if not os.path.exists(output_file):
raise InvenioWebSubmitFileConverterError("output_file %s was not correctly created" % output_file)
if tmpdir_snapshot2 - tmpdir_snapshot1:
raise InvenioWebSubmitFileConverterError("Some temporary files were left over: %s" % (tmpdir_snapshot2 - tmpdir_snapshot1))
except Exception, err:
register_exception(alert_admin=True)
self.fail("ERROR: when converting from %s to %s: %s, the log contained: %s" % (self.from_format, self.to_format, err, self.log.getvalue()))
if CFG_PATH_PDFTK:
class WebSubmitStampingTest(InvenioTestCase):
"""Test WebSubmit file stamping tool"""
def test_stamp_coverpage(self):
"""websubmit - creation of a PDF cover page stamp (APIs)"""
file_stamper_options = { 'latex-template' : "demo-stamp-left.tex",
'latex-template-var' : {'REPORTNUMBER':'TEST-2010','DATE':'10/10/2000'},
'input-file' : CFG_PREFIX + "/lib/webtest/invenio/test.pdf",
'output-file' : "test-stamp-coverpage.pdf",
'stamp' : "coverpage",
'layer' : "foreground",
'verbosity' : 0,
}
try:
(stamped_file_path_only, stamped_file_name) = \
websubmit_file_stamper.stamp_file(file_stamper_options)
except:
self.fail("Stamping failed")
# Test that file is now bigger...
assert os.path.getsize(os.path.join(stamped_file_path_only,
stamped_file_name)) > 12695
def test_stamp_firstpage(self):
"""websubmit - stamping first page of a PDF (APIs)"""
file_stamper_options = { 'latex-template' : "demo-stamp-left.tex",
'latex-template-var' : {'REPORTNUMBER':'TEST-2010','DATE':'10/10/2000'},
'input-file' : CFG_PREFIX + "/lib/webtest/invenio/test.pdf",
'output-file' : "test-stamp-firstpage.pdf",
'stamp' : "first",
'layer' : "background",
'verbosity' : 0,
}
try:
(stamped_file_path_only, stamped_file_name) = \
websubmit_file_stamper.stamp_file(file_stamper_options)
except:
self.fail("Stamping failed")
# Test that file is now bigger...
assert os.path.getsize(os.path.join(stamped_file_path_only,
stamped_file_name)) > 12695
def test_stamp_allpages(self):
"""websubmit - stamping all pages of a PDF (APIs)"""
file_stamper_options = { 'latex-template' : "demo-stamp-left.tex",
'latex-template-var' : {'REPORTNUMBER':'TEST-2010','DATE':'10/10/2000'},
'input-file' : CFG_PREFIX + "/lib/webtest/invenio/test.pdf",
'output-file' : "test-stamp-allpages.pdf",
'stamp' : "all",
'layer' : "foreground",
'verbosity' : 0,
}
try:
(stamped_file_path_only, stamped_file_name) = \
websubmit_file_stamper.stamp_file(file_stamper_options)
except:
self.fail("Stamping failed")
# Test that file is now bigger...
assert os.path.getsize(os.path.join(stamped_file_path_only,
stamped_file_name)) > 12695
else:
## pdftk is not available. Disabling stamping-related
## regression tests.
class WebSubmitStampingTest(InvenioTestCase):
pass
TEST_SUITE = make_test_suite(WebSubmitWebPagesAvailabilityTest,
WebSubmitLegacyURLsTest,
WebSubmitXSSVulnerabilityTest,
WebSubmitStampingTest)
for test in WebSubmitFileConverterTestGenerator():
TEST_SUITE.addTest(test)
if __name__ == "__main__":
run_test_suite(TEST_SUITE, warn_user=True)
diff --git a/modules/miscutil/lib/Makefile.am b/modules/miscutil/lib/Makefile.am
index 030105d96..32ea2ecbd 100644
--- a/modules/miscutil/lib/Makefile.am
+++ b/modules/miscutil/lib/Makefile.am
@@ -1,135 +1,134 @@
## This file is part of Invenio.
## Copyright (C) 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011, 2012, 2013 CERN.
##
# Invenio is free software; you can redistribute it and/or
## modify it under the terms of the GNU General Public License as
## published by the Free Software Foundation; either version 2 of the
## License, or (at your option) any later version.
##
## Invenio is distributed in the hope that it will be useful, but
## WITHOUT ANY WARRANTY; without even the implied warranty of
## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
## General Public License for more details.
##
## You should have received a copy of the GNU General Public License
## along with Invenio; if not, write to the Free Software Foundation, Inc.,
## 59 Temple Place, Suite 330, Boston, MA 02111-1307, USA.
pylibdir = $(libdir)/python/invenio
pylib_DATA = __init__.py \
crossrefutils.py \
data_cacher.py \
dataciteutils.py \
dataciteutils_tester.py \
dbdump.py \
dbquery.py \
dbquery_regression_tests.py \
dbquery_unit_tests.py \
- errorlib.py \
errorlib_regression_tests.py \
errorlib_unit_tests.py \
errorlib_webinterface.py \
flaskshell.py \
htmlutils.py \
htmlutils_unit_tests.py \
intbitset_helper.py \
intbitset_unit_tests.py \
invenio_connector.py \
invenio_connector_regression_tests.py \
inveniocfg_unit_tests.py \
inveniomanage_unit_tests.py \
jsonutils_unit_tests.py \
logicutils_unit_tests.py \
memoiseutils_unit_tests.py \
messages_unit_tests.py \
miscutil_model.py \
orcid.py \
paginationutils_unit_tests.py \
pidutils.py \
pidutils_unit_tests.py \
plotextractor.py \
plotextractor_config.py \
plotextractor_converter.py \
plotextractor_getter.py \
plotextractor_output_utils.py \
plotextractor_regression_tests.py \
plotextractor_unit_tests.py \
pluginutils.py \
pluginutils_unit_tests.py \
remote_debugger.py \
remote_debugger_config.py \
remote_debugger_wsgi_reload.py \
sequtils.py \
sequtils_cnum.py \
sequtils_regression_tests.py \
shellutils.py \
shellutils_unit_tests.py \
sherpa_romeo.py \
sherpa_romeo_testing.py \
solrutils_bibindex_indexer.py \
solrutils_bibindex_searcher.py \
solrutils_bibrank_indexer.py \
solrutils_bibrank_searcher.py \
solrutils_config.py \
solrutils_regression_tests.py \
solrutils_unit_tests.py \
testutils_regression_tests.py \
textutils_unit_tests.py \
urlutils_unit_tests.py \
w3c_validator.py \
web_api_key_unit_tests.py \
xapianutils_bibindex_indexer.py \
xapianutils_bibindex_searcher.py \
xapianutils_bibrank_indexer.py \
xapianutils_bibrank_searcher.py \
xapianutils_config.py \
xmlDict.py
noinst_DATA = testimport.py \
kwalitee.py \
pep8.py
tmpdir = $(prefix)/var/tmp
tmp_DATA = intbitset_example.int
EXTRA_DIST = $(pylib_DATA) \
$(tmp_DATA) \
testimport.py \
kwalitee.py \
pep8.py \
intbitset.pyx \
intbitset.c \
intbitset.h \
intbitset_impl.c \
intbitset_setup.py \
intbitset.pyx \
solrutils \
solrutils/schema.xml \
solrutils/java_sources.txt \
solrutils/org \
solrutils/org/invenio_software \
solrutils/org/invenio_software/solr \
solrutils/org/invenio_software/solr/BitSetFieldCollector.java \
solrutils/org/invenio_software/solr/InvenioFacetComponent.java \
solrutils/org/invenio_software/solr/FieldCollectorBase.java \
solrutils/org/invenio_software/solr/IntFieldCollector.java \
solrutils/org/invenio_software/solr/FieldCollector.java \
solrutils/org/invenio_software/solr/InvenioQueryComponent.java \
solrutils/org/invenio_software/solr/InvenioBitsetStreamResponseWriter.java \
solrutils/org/invenio_software/solr/InvenioBitSet.java \
solrutils/org/invenio_software/solr/StringFieldCollector.java \
solrutils/solrconfig.xml
all:
$(PYTHON) $(srcdir)/intbitset_setup.py build_ext
install-data-hook:
$(PYTHON) $(srcdir)/testimport.py ${prefix}
@find ${srcdir} -name intbitset.so -exec cp {} ${pylibdir} \;
CLEANFILES = *~ *.tmp *.pyc
clean-local:
rm -rf build
diff --git a/modules/miscutil/lib/pluginutils.py b/modules/miscutil/lib/pluginutils.py
index 890c5920a..ad3d06b22 100644
--- a/modules/miscutil/lib/pluginutils.py
+++ b/modules/miscutil/lib/pluginutils.py
@@ -1,523 +1,523 @@
# -*- coding: utf-8 -*-
##
## This file is part of Invenio.
## Copyright (C) 2009, 2010, 2011 CERN.
##
## Invenio is free software; you can redistribute it and/or
## modify it under the terms of the GNU General Public License as
## published by the Free Software Foundation; either version 2 of the
## License, or (at your option) any later version.
##
## Invenio is distributed in the hope that it will be useful, but
## WITHOUT ANY WARRANTY; without even the implied warranty of
## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
## General Public License for more details.
##
## You should have received a copy of the GNU General Public License
## along with Invenio; if not, write to the Free Software Foundation, Inc.,
## 59 Temple Place, Suite 330, Boston, MA 02111-1307, USA.
"""
This module implement a generic plugin container facility.
"""
import sys
import os
import glob
import inspect
import imp
class PluginContainer(object):
"""
This class implements a I{plugin container}.
This class implements part of the dict interface with the condition
that only correctly enabled plugins can be retrieved by their plugin_name.
>>> ## Loading all the plugin within a directory.
>>> websubmit_functions = PluginContainer(
... os.path.join(CFG_PYLIBDIR,
... 'invenio', 'websubmit_functions', '*.py')
... )
>>> ## Loading an explicit plugin.
>>> case_eds = websubmit_functions['CaseEDS']
@param plugin_pathnames: zero or more plugins_pathnames from where to load
the plugins.
@type plugin_pathnames: string/list
@param plugin_builder: a callable with the signature
C{plugin_builder(plugin_name, plugin_code)} that will be called
to extract the actual plugin from the module stored in plugin_code.
@type plugin_builder: callable
@param api_version: the API version of the plugin. If specified, plugins
which specify different versions will fail to be loaded. Default value
is C{None} which turns off the version checking.
@type api_version: integer
@param plugin_signature: a stub to be used in order to check if a loaded
plugin respect a particular signature or not.
@type plugin_signature: class/function
@param external: are the plugins loaded from outside the Invenio standard lib
directory? Defaults to False.
@type external: bool
@param register_exception: should exceptions be registered when loading
plugins? Defaults to True.
@type register_exception: bool
@ivar _plugin_map: a map between plugin_name and a dict with keys
"error", "plugin", "plugin_path", "enabled", "api_version"
@type _plugin_map: dict
@ivar _plugin_pathnames: the list of normalized plugin pathnames
corresponding to the plugins to be loaded.
@type _plugin_pathnames: list
@ivar _plugin_builder: the plugin builder as passed to the constructor.
@type plugin_builder: function
@ivar api_version: the version as provided to the constructor.
@type api_version: integer
@ivar external: are the plugins loaded from outside the Invenio standard lib
directory? Defaults to False.
@type external: bool
@ivar exception_registration: should exceptions be registered when loading
plugins? Defaults to True.
@type exception_registration: bool
@group Mapping interface: __contains__,__getitem__,get,has_key,items,
iteritems,iterkeys,itervalues,keys,values,__len__
@group Main API: __init__,add_plugin_pathnames,get_enabled_plugins,
get_broken_plugins,get_plugin,reload_plugins
"""
def __init__(self,
plugin_pathnames=None,
plugin_builder=None,
api_version=None,
plugin_signature=None,
external=False,
exception_registration=True):
self._plugin_map = {}
self._plugin_pathnames = []
self._external = external
self.api_version = api_version
self._register_exception = exception_registration
if plugin_builder is None:
self._plugin_builder = self.default_plugin_builder
else:
self._plugin_builder = plugin_builder
self._plugin_signature = plugin_signature
if plugin_pathnames:
self.add_plugin_pathnames(plugin_pathnames)
def default_plugin_builder(plugin_name, plugin_code):
"""
Default plugin builder used to extract the plugin from the module
that contains it.
@note: By default it will look for a class or function with the same
name of the plugin.
@param plugin_name: the name of the plugin.
@type plugin_name: string
@param plugin_code: the code of the module as just read from
filesystem.
@type plugin_code: module
@return: the plugin
"""
return getattr(plugin_code, plugin_name)
default_plugin_builder = staticmethod(default_plugin_builder)
def add_plugin_pathnames(self, plugin_pathnames):
"""
Add a one or more plugin pathnames, i.e. full plugin path exploiting
wildcards, e.g. "bibformat_elements/bfe_*.py".
@note: these plugins_pathnames will be added to the current list of
plugin_pathnames, and all the plugins will be reloaded.
@param plugin_pathnames: one or more plugins_pathnames
@type plugin_pathnames: string/list
"""
if type(plugin_pathnames) is unicode:
plugin_pathnames = str(plugin_pathnames)
if type(plugin_pathnames) is str:
self._plugin_pathnames.append(plugin_pathnames)
else:
self._plugin_pathnames.extend(plugin_pathnames)
self.reload_plugins()
def enable_plugin(self, plugin_name):
"""
Enable plugin_name.
@param plugin_name: the plugin name.
@type plugin_name: string
@raise KeyError: if the plugin does not exists.
"""
self._plugin_map[plugin_name]['enabled'] = True
def disable_plugin(self, plugin_name):
"""
Disable plugin_name.
@param plugin_name: the plugin name.
@type plugin_name: string
@raise KeyError: if the plugin does not exists.
"""
self._plugin_map[plugin_name]['enabled'] = False
def plugin_enabled_p(self, plugin_name):
"""
Returns True if the plugin is correctly enabled.
@param plugin_name: the plugin name.
@type plugin_name: string
@return: True if the plugin is correctly enabled..
@rtype: bool
@raise KeyError: if the plugin does not exists.
"""
return self._plugin_map[plugin_name]['enabled']
def get_plugin_filesystem_path(self, plugin_name):
"""
Returns the filesystem path from where the plugin was loaded.
@param plugin_name: the plugin name.
@type plugin_name: string
@return: the filesystem path.
@rtype: string
@raise KeyError: if the plugin does not exists.
"""
return self._plugin_map[plugin_name]['plugin_path']
def get_plugin(self, plugin_name):
"""
Returns the plugin corresponding to plugin_name.
@param plugin_name: the plugin name,
@type plugin_name: string
@return: the plugin
@raise KeyError: if the plugin does not exists or is not enabled.
"""
if self._plugin_map[plugin_name]['enabled']:
return self._plugin_map[plugin_name]['plugin']
else:
raise KeyError('"%s" is not enabled' % plugin_name)
def get_broken_plugins(self):
"""
Returns a map between plugin names and errors, in the form of
C{sys.exc_info} structure.
@return: plugin_name -> sys.exc_info().
@rtype: dict
"""
ret = {}
for plugin_name, plugin in self._plugin_map.iteritems():
if plugin['error']:
ret[plugin_name] = plugin['error']
return ret
def reload_plugins(self, reload=False):
"""
For the plugins found through iterating in the plugin_pathnames, loads
and working plugin.
@note: if a plugin has the same plugin_name of an already loaded
plugin, the former will override the latter (provided that the
former had a compatible signature to the latter).
@note: any plugin that fails to load will be added to the plugin
map as disabled and the sys.exc_info() captured during the
Exception will be stored. (if the failed plugin was supposed to
override an existing one, the latter will be overridden by
the failed former).
"""
# The reload keyword argument exists for backwards compatibility.
# Previously, reload_plugins, would not reload a module due to a bug.
for plugin_path in self._plugin_pathnames_iterator():
self._load_plugin(plugin_path, reload=reload)
def normalize_plugin_path(self, plugin_path):
"""
Returns a normalized plugin_path.
@param plugin_path: the plugin path.
@type plugin_path: string
@return: the normalized plugin path.
@rtype: string
@raise ValueError: if the path is not under CFG_PYLIBDIR/invenio
"""
from invenio.config import CFG_PYLIBDIR
invenio_path = os.path.abspath(os.path.join(CFG_PYLIBDIR, 'invenio'))
plugin_path = os.path.abspath(plugin_path)
if not self._external and not os.path.abspath(plugin_path).startswith(invenio_path):
raise ValueError('A plugin should be stored under "%s" ("%s" was'
' specified)' % (invenio_path, plugin_path))
return plugin_path
def _plugin_pathnames_iterator(self):
"""
Returns an iterator over all the normalized plugin path.
@note: older plugin_pathnames are considered first, and newer
plugin_pathnames later, so that plugin overriding is possible.
@return: the iterator over plugin paths.
@rtype: iterator
"""
for plugin_pathname in self._plugin_pathnames:
for plugin_path in glob.glob(plugin_pathname):
yield self.normalize_plugin_path(plugin_path)
def get_plugin_name(plugin_path):
"""
Returns the name of the plugin after the plugin_path.
@param plugin_path: the filesystem path to the plugin code.
@type plugin_path: string
@return: the plugin name.
@rtype: string
"""
plugin_name = os.path.basename(plugin_path)
if plugin_name.endswith('.py'):
plugin_name = plugin_name[:-len('.py')]
return plugin_name
get_plugin_name = staticmethod(get_plugin_name)
def _load_plugin(self, plugin_path, reload=False):
"""
Load a plugin in the plugin map.
@note: if the plugin_name calculated from plugin_path corresponds to
an already existing plugin, the old plugin will be overridden and
if the old plugin was correctly loaded but disabled also the
new plugin will be disabled.
@param plugin_path: the plugin path.
@type plugin_path: string
"""
api_version = None
try:
plugin_name = self.get_plugin_name(plugin_path)
# Let's see if the module is already loaded
plugin = None
if plugin_name in sys.modules:
mod = sys.modules[plugin_name]
if os.path.splitext(mod.__file__)[0] == os.path.splitext(plugin_path)[0]:
plugin = mod
if not plugin or reload:
# Let's load the plugin module.
plugin_fp, plugin_path, plugin_desc = imp.find_module(
plugin_name, [os.path.dirname(plugin_path)]
)
try:
plugin = imp.load_module(
plugin_name, plugin_fp, plugin_path, plugin_desc
)
finally:
if plugin_fp:
plugin_fp.close()
## Let's check for API version.
api_version = getattr(plugin, '__plugin_version__', None)
if self.api_version and api_version != self.api_version:
raise InvenioPluginContainerError("Plugin version mismatch."
" Expected %s, found %s" % (self.api_version, api_version))
## Let's load the actual plugin
plugin = self._plugin_builder(plugin_name, plugin)
## Are we overriding an already loaded plugin?
enabled = True
if plugin_name in self._plugin_map:
old_plugin = self._plugin_map[plugin_name]
if old_plugin['error'] is None:
enabled = old_plugin['enabled']
check_signature(plugin_name, old_plugin['plugin'], plugin)
## Let's check the plugin signature.
if self._plugin_signature:
check_signature(plugin_name, self._plugin_signature, plugin)
self._plugin_map[plugin_name] = {
'plugin': plugin,
'error': None,
'plugin_path': plugin_path,
'enabled': enabled,
'api_version': api_version,
}
except Exception:
if self._register_exception:
- from invenio.errorlib import register_exception
+ from invenio.ext.logging import register_exception
register_exception()
self._plugin_map[plugin_name] = {
'plugin': None,
'error': sys.exc_info(),
'plugin_path': plugin_path,
'enabled': False,
'api_version': api_version,
}
def __getitem__(self, plugin_name):
"""
As in C{dict.__getitem__} but apply plugin name normalization and check
if the plugin is correctly enabled.
@param plugin_name: the name of the plugin
@type plugin_name: string
@return: the plugin.
@raise KeyError: if the corresponding plugin is not enabled or there
were some errors.
"""
plugin_name = self.get_plugin_name(plugin_name)
if plugin_name in self._plugin_map and \
self._plugin_map[plugin_name]['enabled'] is True:
return self._plugin_map[plugin_name]['plugin']
else:
raise KeyError('"%s" does not exists or is not correctly enabled' %
plugin_name)
def __contains__(self, plugin_name):
"""
As in C{dict.__contains__} but apply plugin name normalization and
check if the plugin is correctly enabled.
@param plugin_name: the name of the plugin
@type plugin_name: string
@return: True if plugin_name is correctly there.
@rtype: bool
"""
plugin_name = self.get_plugin_name(plugin_name)
return plugin_name in self._plugin_map and \
self._plugin_map[plugin_name]['enabled'] is True
def __len__(self):
"""
As in C{dict.__len__} but consider only correctly enabled plugins.
@return: the total number of plugins correctly enabled.
@rtype: integer
"""
count = 0
for plugin in self._plugin_map.values():
if plugin['enabled']:
count += 1
return count
def get(self, plugin_name, default=None):
"""
As in C{dict.get} but consider only correctly enabled plugins.
@param plugin_name: the name of the plugin
@type plugin_name: string
@param default: the default value to return if plugin_name does not
correspond to a correctly enabled plugin.
@return: the total number of plugins correctly enabled.
@rtype: integer
"""
try:
return self.__getitem__(plugin_name)
except KeyError:
return default
def has_key(self, plugin_name):
"""
As in C{dict.has_key} but apply plugin name normalization and check
if the plugin is correctly enabled.
@param plugin_name: the name of the plugin
@type plugin_name: string
@return: True if plugin_name is correctly there.
@rtype: bool
"""
return self.__contains__(plugin_name)
def items(self):
"""
As in C{dict.items} but checks if the plugin are correctly enabled.
@return: list of (plugin_name, plugin).
@rtype: [(plugin_name, plugin), ...]
"""
ret = []
for plugin_name, plugin in self._plugin_map.iteritems():
if plugin['enabled']:
ret.append((plugin_name, plugin['plugin']))
return ret
def iteritems(self):
"""
As in C{dict.iteritems} but checks if the plugin are correctly enabled.
@return: an iterator over the (plugin_name, plugin) items.
"""
for plugin_name, plugin in self._plugin_map.iteritems():
if plugin['enabled']:
yield (plugin_name, plugin['plugin'])
def iterkeys(self):
"""
As in C{dict.iterkeys} but checks if the plugin are correctly enabled.
@return: an iterator over the plugin_names.
"""
for plugin_name, plugin in self._plugin_map.iteritems():
if plugin['enabled']:
yield plugin_name
__iter__ = iterkeys
def itervalues(self):
"""
As in C{dict.itervalues} but checks if the plugin are correctly
enabled.
@return: an iterator over the plugins.
"""
for plugin in self._plugin_map.itervalues():
if plugin['enabled']:
yield plugin['plugin']
def keys(self):
"""
As in C{dict.keys} but checks if the plugin are correctly enabled.
@return: the list of enabled plugin_names.
@rtype: list of strings
"""
ret = []
for plugin_name, plugin in self._plugin_map.iteritems():
if plugin['enabled']:
ret.append(plugin_name)
return ret
def values(self):
"""
As in C{dict.values} but checks if the plugin are correctly enabled.
@return: the list of enabled plugin codes.
"""
return [plugin['plugin'] \
for plugin in self._plugin_map.values() if plugin['enabled']]
def get_enabled_plugins(self):
"""
Return a map of the correctly enabled plugins.
@return: a map plugin_name -> plugin
@rtype: dict
"""
ret = {}
for plugin_name, plugin in self._plugin_map.iteritems():
if plugin['enabled']:
ret[plugin_name] = plugin['plugin']
return ret

Event Timeline