diff --git a/.gitignore b/.gitignore
index 7d7f3e181..ced0edc28 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1,98 +1,99 @@
.version
Makefile
Makefile.in
compile
configure
config.cache
config.log
config.status
config.nice
config.guess
config.sub
install-sh
missing
autom4te.cache
aclocal.m4
TAGS
invenio-autotools.conf
po/POTFILES
po/POTFILES-py
po/POTFILES-webdoc
po/stamp-po
po/*.gmo
po/*.mo
po/*.sed
*~
*.pyc
*.clisp.mem
*.cmucl.core
*.sbcl.core
*.fas
*.fasl
*.sse2f
*.lib
*.x86f
modules/webauthorprofile/bin/webauthorprofile
modules/bibauthorid/bin/bibauthorid
modules/bibcirculation/bin/bibcircd
modules/bibclassify/bin/bibclassify
modules/bibconvert/bin/bibconvert
modules/bibdocfile/bin/bibdocfile
modules/bibedit/bin/bibedit
modules/bibrecord/bin/textmarc2xmlmarc
modules/bibrecord/bin/xmlmarc2textmarc
modules/bibrecord/bin/xmlmarclint
modules/docextract/bin/refextract
modules/docextract/bin/docextract
modules/bibencode/bin/bibencode
modules/bibexport/bin/bibexport
modules/bibformat/bin/bibreformat
modules/oaiharvest/bin/oaiharvest
modules/oairepository/bin/oairepositoryupdater
modules/bibindex/bin/bibindex
modules/bibindex/bin/bibstat
modules/bibmatch/bin/bibmatch
modules/bibrank/bin/bibrank
modules/bibrank/bin/bibrankgkb
modules/bibrank/etc/bibrankgkb.cfg
modules/bibrank/etc/demo_jif.cfg
modules/bibrank/etc/template_single_tag_rank_method.cfg
modules/bibsched/bin/bibsched
modules/bibsched/bin/bibtaskex
modules/bibsched/bin/bibtasklet
modules/bibsort/bin/bibsort
modules/bibsword/bin/bibsword
modules/bibupload/bin/batchuploader
modules/bibupload/bin/bibupload
modules/elmsubmit/bin/elmsubmit
modules/elmsubmit/etc/elmsubmit.cfg
modules/miscutil/bin/dbdump
modules/miscutil/bin/dbexec
modules/miscutil/bin/inveniocfg
modules/miscutil/bin/inveniomanage
modules/miscutil/bin/plotextractor
modules/miscutil/etc/bash_completion.d/inveniocfg
modules/miscutil/lib/build
modules/webaccess/bin/authaction
modules/webaccess/bin/webaccessadmin
modules/webalert/bin/alertengine
modules/webmessage/bin/webmessageadmin
modules/websearch/bin/webcoll
modules/websession/bin/inveniogc
modules/webstat/bin/webstat
modules/webstat/bin/webstatadmin
modules/webstyle/bin/gotoadmin
modules/webstyle/bin/webdoc
modules/websubmit/bin/bibdocfile
modules/websubmit/bin/inveniounoconv
+modules/websubmit/bin/websubmitadmin
modules/bibcirculation/bin/bibcircd
tags
config.status.lineno
configure.lineno
*.kdevelop
*.kdevses
.project
.noseids
.settings
.pydevproject
org.eclipse.core.resources.prefs
diff --git a/AUTHORS b/AUTHORS
index 9efae2f01..fe3125b4f 100644
--- a/AUTHORS
+++ b/AUTHORS
@@ -1,383 +1,387 @@
Invenio AUTHORS
===============
Invenio is being co-developed by an international collaboration
comprising institutes such as CERN, CfA, DESY, EPFL, FNAL, SLAC.
The CERN development team currently consists of:
- Jean-Yves Le Meur
CERN Digital Library Services team leader.
- Tibor Simko
CERN Digital Library Technology team leader. Head Developer of
Invenio. General system architecture, release management.
WebSearch, BibIndex, BibSched, WebStat, WebStyle, WebSession,
WebHelp, and more.
- Jerome Caffaro
BibFormat, redesign and rewrite in Python. BibConvert
XML-oriented mode. OAI harvester improvements. Improvements to
BibEdit. WebDoc tool. WebJournal refactoring and rewrite.
WebComment rounds and threads. WebSubmit asynchronous upload
support. Improvements to numerous modules.
- Samuele Kaplun
Authentication and user preferences rewrite and improvements.
Firewall-like access control RBAC system. Fulltext file
management rewrite and upload feature. Intbitset Python C
extension for the indexer. Improvents to the task scheduler and
session hander. Improvements to numerous modules.
- Ludmila Marian
Citerank family of ranking methods. Fixes to numerous modules.
- - Chris Montarbaud
- Multimedia and photo management.
-
- Jaime Garcia Llopis
Improvements to the BibCirculation module.
- Flavio Costa
Contributions to the Italian translation.
- Jiri Kuncar
- Contributions to the Czech translation.
+ Fix for demo site installation. Contributions to the Czech
+ translation.
- Esteban J. G. Gabancho
Initial release of WebApiKey, enhancements for WebSubmit.
- Lars Holm Nielsen
Initial release of Invenio Upgrader and jsonutils; patches for
pluginutils,
- Patrick Glauner
Cleanup of SQL queries for several modules.
- Raquel Jimenez Encinar
Errorlib refactoring, improvements to search UI, discussion tab,
merged record redirect, adaptation to new web test framework.
- - Vasanth Venkatraman
- Improvements to BibUpload version treatment, monotask and
- sequence tasks for BibSched.
+ - Grzegorz Szpura
+ Better browsing of fuzzy indexes.
- Thorsten Schwander
Improvements to dbdump.
- Jan Aage Lavik
Improvements to BibMatch with remote matching capabilities,
improvements to plot extractor, improvements to harvesting and
other small fixes.
- Piotr Praczyk
OAI harvesting from arXiv. Test harvesting interface, for
OAIHarvest. Record comparison library functions, for BibRecord.
Numerous improvements to BibEdit, e.g. holding pen, copy/paste,
undo/redo.
- Samuele Carli
Initial implementation of BibAuthorID module, with Henning
Weiler. Fixes for basket export facility and adding external
items to baskets.
- Alessio Deiana
- Improvements to data cacher and cite summary.
+ Fix for BibFormat element initialisation. Improvements to data
+ cacher and cite summary.
- - Daniel Stanculescu
- Improvements to Unicode treatment for textutils.
+ - Wojciech Ziolek
+ Fixes for OAI holding pen facility, Strftime improvements for
+ dateutils.
- Sebastian Witowski
Improvements to multi-record editor.
- Laura Rueda
Mechanize compatibility for Invenio Connector.
- Annette Holtkamp
Updates to `Howto MARC' guide.
- Jocelyne Jerdelet
Updates to `Howto MARC' guide.
The EPFL development team currently consists of:
- Gregory Favre
Rewrite of WebBasket. WebMessage. Improvements to WebComment.
Other contributions and improvements.
The SLAC development team currently consists of:
- Mike Sullivan
Improvements to author pages.
- Eduardo Benavidez
Improvements to BibCatalog.
The Harvard-Smithsonian Center for Astrophysics development team
currently consists of:
- Alberto Accomazzi
Team leader.
- Giovanni Di Milia
Recognition of /record/sysno URLs, ADS formatting.
- Jay Luker
Improvements to the emergency notification sending facility.
- Roman Chyla
WSGI handler accepts UTF-8 strings.
-The Cornell development team currently consists of:
+Many former team members (mostly CERN staff and fellows, technical
+students, diploma students, summer students) contributed to the
+project since 2002. In an approximately decreasing chronological
+order:
- - Simeon Warner
- Team leader.
+ - Daniel Stanculescu
+ Improvements to Unicode treatment for textutils.
+
+ - Vasanth Venkatraman
+ Improvements to BibUpload version treatment, monotask and
+ sequence tasks for BibSched.
- Peter Halliday
Configurable /record URL name space, improvements to dbquery.
-Many former team members (mostly CERN staff and fellows, technical
-students, diploma students, summer students) contributed to the
-project since 2002. In an approximately decreasing chronological
-order:
+ - Chris Montarbaud
+ Multimedia and photo management.
- Joe Blaylock
Rewrite of SPIRES query syntax parser, support for nested
parenthesis for WebSearch, fuzzy author name tokenizer,
enrichment of author pages with h-index.
- Benoit Thiell
Fixes for BibRecord library, detailed record links, improvements
to code kwalitee in numerous modules. Improvements to
BibClassify.
- Nikola Yolov
Improvements and refactoring of BibAuthorID, fixes for
WebAuthorProfile.
- Lewis Barnes
Amendments for INSPIRE linking style.
- Olivier Canévet
Fixes for WebComment report abuse facility.
- Belinda Chan
User documentation for personal features like alerts and baskets.
- Carmen Alvarez Perez
Improvements to WebStat.
- Henning Weiler
Initial implementation of BibAuthorID module, with Samuele Carli.
- Juan Francisco Pereira Corral
Fix taxonomy regression test, for BibKnowledge.
- Stamen Todorov Peev
Enrichment of Dublin Core XSL stylesheet.
- Jan Iwaszkiewicz
Full-text snippet feature for full-text search.
- Björn Oltmanns
Initial release of BibEncode, multi-node support for BibSched,
style refactoring for WebComment.
- Christopher Dickinson
Patch for auto-suggest facility.
- Christopher Hayward
Improvements to the reference extraction tool.
- Travis Brooks
Support for SPIRES search syntax and other improvements.
- Juliusz Sompolski
Reimplementation of pdf2hocr2pdf.
- Jurga Girdzijauskaite
Contributions to the Lithuanian translation.
- Tony Ohls
Fixes for regexp treatment in BibConvert.
- Marko Niinimaki
Contributions to the BibRank citation module and WebSearch
summary output formats. Initial implementation of BibCatalog and
BibKnowledge.
- Mathieu Barras
Initial implementation of SWORD client application.
- Fabio Souto
Initial implementation of the invenio config dumper/loader.
- Pablo Vázquez Caderno
Prevent loop in collection trees, for WebSearch.
- Victor Engmark
Bash completion for inveniocfg, patche for dist-packages.
- Javier Martin
Moderation tools for WebComment, improvements to BibEdit, initial
implementation of the batch uploader.
- Nikolaos Kasioumis
Hosted collections for WebSearch, rewrite of WebBasket UI,
improvements to WebAlert.
- Valkyrie Savage
Initial implementation of the plot extractor library.
- Miguel Martinez Pedreira
Tool for manipulated embedded metadata in full-text files.
- Jorge Aranda Sumarroca
Support for FCKeditor-uploaded files for WebSubmit.
- Glenn Gard
Implemented many unit, regression and web tests for WebAlert,
WebJournal, WebSubmit, WebComment, WebMessage, WebSession
modules.
- Christopher Parker
Improvements to the submission approval workflow.
- Martin Vesely
OAIHarvest, OAIRepository, OAI daemon and admin
interface. BibConvert text-oriented mode. BibMatch.
- Tony Osborne
Improvements to the reference extractor.
- Radoslav Ivanov
Contributions to the WebBasket module test suite. Support for
parentheses and SPIRES search syntax in WebSearch. Initial
implementation of the multi-record editor. Initial
implementation of BibExport.
- Joaquim Rodrigues Silvestre
Initial implementation of the BibCirculation module to handle
physical item copies.
- Kyriakos Liakopoulos
Initial implementation of BibMerge. Improvements to BibEdit.
- Lars Christian Raae
Record locking, per-collection curating authentication, reverting
older record versions, for the BibEdit. Rewrite of BibEdit in
Ajax.
- Ruben Pollan
Contributions to the WebStat module.
- Nicholas Robinson
WebSubmit. Reference extraction for the BibEdit module.
- Gabriel Hase
WebJournal module.
- Diane Berkovits
Ranking by downloads, for the BibRank and WebSubmit
modules. Group management for WebSession.
- Joël Vogt
Contributions to the BibClassify module.
- Marcus Johansson
Contributions to the WebStat module.
- Jan Brice Krause
Original implementation of the fulltext file transfer mode for
BibUpload.
- Axel Voitier
Complex approval and refereeing subsystem, for WebSubmit.
- Alberto Pepe
BibClassify, OAIHarvest Admin.
- Øyvind Østlund
Sample BibTeX to MARCXML conversion, for BibConvert.
- Nikolay Dyankov
XML-based BFX formatting engine, for BibFormat.
- Olivier Serres
External collections searching, for WebSearch.
- Eric Stahl
Rewrite of BibUpload in Python.
- Frederic Gobry
Contributions to the templating system, the URL handler, the
gettext infrastructure, the regression test suite infrastructure,
numerous patches for many modules.
- Krzysztof Jedrzejek
Improvements to ElmSubmit.
- Yohann Paris
BibEdit Admin.
- Paulo Cabral
WebComment, error library, design of collaborative features.
- Thomas Baron
WebSubmit and BibUpload. Improvements to BibSched.
- Maja Gracco
System librarian, MARC21 expertise.
- Tiberiu Dondera
Patches for the WebSubmit engine and the admin interface.
Templatizing codebase.
- Anna Afshar
Ranking by citations, for the BibRank module.
- Trond Aksel Myklebust
Ranking engine, the BibRank module. Stemming and stopwords for
the BibIndex module. Site access policies and external
authentication methods, for the WebAccess module and its clients.
Administration interfaces to WebSearch, BibIndex, BibRank, and
additions to WebAccess.
- Hector Sanchez
Metadata output formatter, the BibFormat module. Session
management, for the WebSession module.
- Richard Owen
Electronic mail submission system, the ElmSubmit module.
- Alexandra Silva
Rewriting and enhancing BibRecord XML MARC and record handling
library, for the BibEdit module.
- Arturo Montejo Raez
Automatic text classification and keyword indexing. (upcoming)
- Mikael Vik
Role-based access control engine and its admin interface,
the WebAccess module. Guest user sessions garbage collector,
for the WebSession module.
- Erik Simon , Eric Simon
Alert engine, for the WebAlert module.
- Roberta Faggian
Rewrite of the alert and basket user interfaces, for the WebAlert
and the WebBasket modules.
- Julio Pernia Aznar
Parts of user and session management, for the WebSession module.
- Franck Grenier
Parts of web design and graphics, for the WebStyle module.
- Eduardo Margallo
Enhancements to the indexing engine, for the BibWords module.
Initial implementation of the task scheduler, for the BibSched
module.
- end of file -
diff --git a/INSTALL b/INSTALL
index 61ce5933d..3bc0c54db 100644
--- a/INSTALL
+++ b/INSTALL
@@ -1,909 +1,909 @@
Invenio INSTALLATION
====================
About
=====
This document specifies how to build, customize, and install Invenio
-v1.1.1 for the first time. See RELEASE-NOTES if you are upgrading
+v1.1.2 for the first time. See RELEASE-NOTES if you are upgrading
from a previous Invenio release.
Contents
========
0. Prerequisites
1. Quick instructions for the impatient Invenio admin
2. Detailed instructions for the patient Invenio admin
0. Prerequisites
================
Here is the software you need to have around before you
start installing Invenio:
a) Unix-like operating system. The main development and
production platforms for Invenio at CERN are GNU/Linux
distributions Debian, Gentoo, Scientific Linux (aka RHEL),
Ubuntu, but we also develop on Mac OS X. Basically any Unix
system supporting the software listed below should do.
If you are using Debian GNU/Linux ``Lenny'' or later, then you
can install most of the below-mentioned prerequisites and
recommendations by running:
$ sudo aptitude install python-dev apache2-mpm-prefork \
mysql-server mysql-client python-mysqldb \
python-4suite-xml python-simplejson python-xml \
python-libxml2 python-libxslt1 gnuplot poppler-utils \
gs-common clisp gettext libapache2-mod-wsgi unzip \
pdftk html2text giflib-tools \
pstotext netpbm python-chardet
You also need to install following packages from PyPi
by running:
$ sudo pip install -r requirements.txt
$ sudo pip install -r requirements-extras.txt
$ sudo pip install -r requirements-flask.txt
$ sudo pip install -r requirements-flask-ext.txt
You may also want to install some of the following packages,
if you have them available on your concrete architecture:
$ sudo aptitude install sbcl cmucl pylint pychecker pyflakes \
python-profiler python-epydoc libapache2-mod-xsendfile \
openoffice.org python-utidylib python-beautifulsoup
Moreover, you should install some Message Transfer Agent (MTA)
such as Postfix so that Invenio can email notification
alerts or registration information to the end users, contact
moderators and reviewers of submitted documents, inform
administrators about various runtime system information, etc:
$ sudo aptitude install postfix
After running the above-quoted aptitude command(s), you can
proceed to configuring your MySQL server instance
(max_allowed_packet in my.cnf, see item 0b below) and then to
installing the Invenio software package in the section 1
below.
If you are using another operating system, then please
continue reading the rest of this prerequisites section, and
please consult our wiki pages for any concrete hints for your
specific operating system.
b) MySQL server (may be on a remote machine), and MySQL client
(must be available locally too). MySQL versions 4.1 or 5.0
are supported. Please set the variable "max_allowed_packet"
in your "my.cnf" init file to at least 4M. (For sites such as
INSPIRE, having 1M records with 10M citer-citee pairs in its
citation map, you may need to increase max_allowed_packet to
1G.) You may perhaps also want to run your MySQL server
natively in UTF-8 mode by setting "default-character-set=utf8"
in various parts of your "my.cnf" file, such as in the
"[mysql]" part and elsewhere; but this is not really required.
c) Apache 2 server, with support for loading DSO modules, and
optionally with SSL support for HTTPS-secure user
authentication, and mod_xsendfile for off-loading file
downloads away from Invenio processes to Apache.
d) Python v2.4 or above:
as well as the following Python modules:
- (mandatory) MySQLdb (version >= 1.2.1_p2; see below)
- (mandatory) Pyparsing, for document parsing
- (recommended) python-dateutil, for complex date processing:
- (recommended) PyXML, for XML processing:
- (recommended) PyRXP, for very fast XML MARC processing:
- (recommended) lxml, for XML/XLST processing:
- (recommended) libxml2-python, for XML/XLST processing:
- (recommended) simplejson, for AJAX apps:
Note that if you are using Python-2.6, you don't need to
install simplejson, because the module is already included
in the main Python distribution.
- (recommended) Gnuplot.Py, for producing graphs:
- (recommended) Snowball Stemmer, for stemming:
- (recommended) py-editdist, for record merging:
- (recommended) numpy, for citerank methods:
- (recommended) magic, for full-text file handling:
- (optional) chardet, for character encoding detection:
- (optional) 4suite, slower alternative to PyRXP and
libxml2-python:
- (optional) feedparser, for web journal creation:
- (optional) RDFLib, to use RDF ontologies and thesauri:
- (optional) mechanize, to run regression web test suite:
- (optional) python-mock, mocking library for the test suite:
- (optional) hashlib, needed only for Python-2.4 and only
if you would like to use AWS connectivity:
- (optional) utidylib, for HTML washing:
- (optional) Beautiful Soup, for HTML washing:
- (optional) Python Twitter (and its dependencies) if you want
to use the Twitter Fetcher bibtasklet:
- (optional) Python OpenID if you want to enable OpenID support
for authentication:
- (optional) Python Rauth if you want to enable OAuth 1.0/2.0
support for authentication (depends on Python-2.6 or later):
Note: MySQLdb version 1.2.1_p2 or higher is recommended. If
you are using an older version of MySQLdb, you may get
into problems with character encoding.
e) mod_wsgi Apache module. Versions 3.x and above are
recommended.
Note: if you are using Python 2.4 or earlier, then you should
also install the wsgiref Python module, available from:
(As of Python 2.5
this module is included in standard Python
distribution.)
f) If you want to be able to extract references from PDF fulltext
files, then you need to install pdftotext version 3 at least.
g) If you want to be able to search for words in the fulltext
files (i.e. to have fulltext indexing) or to stamp submitted
files, then you need as well to install some of the following
tools:
- for Microsoft Office/OpenOffice.org document conversion:
OpenOffice.org
- for PDF file stamping: pdftk, pdf2ps
- for PDF files: pdftotext or pstotext
- for PostScript files: pstotext or ps2ascii
- for DjVu creation, elaboration: DjVuLibre
- to perform OCR: OCRopus (tested only with release 0.3.1)
- to perform different image elaborations: ImageMagick
- to generate PDF after OCR: netpbm, ReportLab and pyPdf or pyPdf2
h) If you have chosen to install fast XML MARC Python processors
in the step d) above, then you have to install the parsers
themselves:
- (optional) 4suite:
i) (recommended) Gnuplot, the command-line driven interactive
plotting program. It is used to display download and citation
history graphs on the Detailed record pages on the web
interface. Note that Gnuplot must be compiled with PNG output
support, that is, with the GD library. Note also that Gnuplot
is not required, only recommended.
j) (recommended) A Common Lisp implementation, such as CLISP,
SBCL or CMUCL. It is used for the web server log analysing
tool and the metadata checking program. Note that any of the
three implementations CLISP, SBCL, or CMUCL will do. CMUCL
produces fastest machine code, but it does not support UTF-8
yet. Pick up CLISP if you don't know what to do. Note that a
Common Lisp implementation is not required, only recommended.
k) GNU gettext, a set of tools that makes it possible to
translate the application in multiple languages.
This is available by default on many systems.
l) (recommended) xlwt 0.7.2, Library to create spreadsheet files
compatible with MS Excel 97/2000/XP/2003 XLS files, on any
platform, with Python 2.3 to 2.6
m) (recommended) matplotlib 1.0.0 is a python 2D plotting library
which produces publication quality figures in a variety of
hardcopy formats and interactive environments across
platforms. matplotlib can be used in python scripts, the
python and ipython shell (ala MATLAB® or Mathematica®),
web application servers, and six graphical user interface
toolkits. It is used to generate pie graphs in the custom
summary query (WebStat)
n) (optional) FFmpeg, an open-source tools an libraries collection
to convert video and audio files. It makes use of both internal
as well as external libraries to generate videos for the web, such
as Theora, WebM and H.264 out of almost any thinkable video input.
FFmpeg is needed to run video related modules and submission workflows
in Invenio. The minimal configuration of ffmpeg for the Invenio demo site
requires a number of external libraries. It is highly recommended
to remove all installed versions and packages that are comming with
various Linux distributions and install the latest versions from
sources. Additionally, you will need the Mediainfo Library for multimedia
metadata handling.
Minimum libraries for the demo site:
- the ffmpeg multimedia encoder tools
- a library for jpeg images needed for thumbnail extraction
- a library for the ogg container format, needed for Vorbis and Theora
- the OGG Vorbis audi codec library
- the OGG Theora video codec library
- the WebM video codec library
- the mediainfo library for multimedia metadata
Recommended for H.264 video (!be aware of licensing issues!):
- a library for H.264 video encoding
- a library for Advanced Audi Coding
- a library for MP3 encoding
o) (recommended) RabbitMQ is a message broker used by Celery for running
a distributed task queue .
- Install
sudo aptitude install rabbitmq-server
- Enable web interface
sudo rabbitmq-plugins enable rabbitmq_management
- Add user and vhost
sudo rabbitmqctl add_user myuser mypassword
sudo rabbitmqctl add_vhost myvhost
sudo rabbitmqctl set_permissions -p myvhost myuser ".*" ".*" ".*"
- Allow Web UI login
sudo rabbitmqctl set_user_tags myuser management
- Change default user password
sudo rabbitmqctl change_password guest guest
sudo service rabbitmq-server restart
- Starting Celery worker (after Invenio is installed):
celery worker -A invenio -l info -B -E
- Starting Flower (monitoring web interface, requires Python 2.6):
pip install flower
flower --port=5555
http://localhost:55672 (RabbitMQ web admin)
http://localhost:5555 (Flower UI)
1. Quick instructions for the impatient Invenio admin
=========================================================
1a. Installation
----------------
$ cd $HOME/src/
- $ wget http://invenio-software.org/download/invenio-1.1.1.tar.gz
- $ wget http://invenio-software.org/download/invenio-1.1.1.tar.gz.md5
- $ wget http://invenio-software.org/download/invenio-1.1.1.tar.gz.sig
- $ md5sum -c invenio-1.1.1.tar.gz.md5
- $ gpg --verify invenio-1.1.1.tar.gz.sig invenio-1.1.1.tar.gz
- $ tar xvfz invenio-1.1.1.tar.gz
- $ cd invenio-1.1.1
+ $ wget http://invenio-software.org/download/invenio-1.1.2.tar.gz
+ $ wget http://invenio-software.org/download/invenio-1.1.2.tar.gz.md5
+ $ wget http://invenio-software.org/download/invenio-1.1.2.tar.gz.sig
+ $ md5sum -c invenio-1.1.2.tar.gz.md5
+ $ gpg --verify invenio-1.1.2.tar.gz.sig invenio-1.1.2.tar.gz
+ $ tar xvfz invenio-1.1.2.tar.gz
+ $ cd invenio-1.1.2
$ ./configure
$ make
$ make install
$ make install-bootstrap
$ make install-hogan-plugin
$ make install-mathjax-plugin ## optional
$ make install-jquery-plugins ## optional
$ make install-jquery-tokeninput ## optional
$ make install-plupload-plugin ## optional
$ make install-ckeditor-plugin ## optional
$ make install-pdfa-helper-files ## optional
$ make install-mediaelement ## optional
$ make install-solrutils ## optional
$ make install-js-test-driver ## optional
1b. Configuration
-----------------
$ sudo chown -R www-data.www-data /opt/invenio
$ sudo -u www-data emacs /opt/invenio/etc/invenio-local.conf
$ sudo -u www-data /opt/invenio/bin/inveniocfg --update-all
$ sudo -u www-data /opt/invenio/bin/inveniocfg --create-secret-key
$ sudo -u www-data /opt/invenio/bin/inveniocfg --update-all
$ sudo -u www-data /opt/invenio/bin/inveniocfg --create-tables
$ sudo -u www-data /opt/invenio/bin/inveniocfg --load-bibfield-conf
$ sudo -u www-data /opt/invenio/bin/inveniocfg --load-webstat-conf
$ sudo -u www-data /opt/invenio/bin/inveniocfg --create-apache-conf
$ sudo /etc/init.d/apache2 restart
$ sudo -u www-data /opt/invenio/bin/inveniocfg --check-openoffice
$ sudo -u www-data /opt/invenio/bin/inveniocfg --create-demo-site
$ sudo -u www-data /opt/invenio/bin/inveniocfg --load-demo-records
$ sudo -u www-data /opt/invenio/bin/inveniocfg --run-unit-tests
$ sudo -u www-data /opt/invenio/bin/inveniocfg --run-regression-tests
$ sudo -u www-data /opt/invenio/bin/inveniocfg --run-web-tests
$ sudo -u www-data /opt/invenio/bin/inveniocfg --remove-demo-records
$ sudo -u www-data /opt/invenio/bin/inveniocfg --drop-demo-site
$ firefox http://your.site.com/help/admin/howto-run
2. Detailed instructions for the patient Invenio admin
==========================================================
2a. Installation
----------------
The Invenio uses standard GNU autoconf method to build and
install its files. This means that you proceed as follows:
$ cd $HOME/src/
Change to a directory where we will build the Invenio
sources. (The built files will be installed into different
"target" directories later.)
- $ wget http://invenio-software.org/download/invenio-1.1.1.tar.gz
- $ wget http://invenio-software.org/download/invenio-1.1.1.tar.gz.md5
- $ wget http://invenio-software.org/download/invenio-1.1.1.tar.gz.sig
+ $ wget http://invenio-software.org/download/invenio-1.1.2.tar.gz
+ $ wget http://invenio-software.org/download/invenio-1.1.2.tar.gz.md5
+ $ wget http://invenio-software.org/download/invenio-1.1.2.tar.gz.sig
Fetch Invenio source tarball from the distribution server,
together with MD5 checksum and GnuPG cryptographic signature
files useful for verifying the integrity of the tarball.
- $ md5sum -c invenio-1.1.1.tar.gz.md5
+ $ md5sum -c invenio-1.1.2.tar.gz.md5
Verify MD5 checksum.
- $ gpg --verify invenio-1.1.1.tar.gz.sig invenio-1.1.1.tar.gz
+ $ gpg --verify invenio-1.1.2.tar.gz.sig invenio-1.1.2.tar.gz
Verify GnuPG cryptographic signature. Note that you may
first have to import my public key into your keyring, if you
haven't done that already:
$ gpg --keyserver wwwkeys.eu.pgp.net --recv-keys 0xBA5A2B67
The output of the gpg --verify command should then read:
Good signature from "Tibor Simko "
You can safely ignore any trusted signature certification
warning that may follow after the signature has been
successfully verified.
- $ tar xvfz invenio-1.1.1.tar.gz
+ $ tar xvfz invenio-1.1.2.tar.gz
Untar the distribution tarball.
- $ cd invenio-1.1.1
+ $ cd invenio-1.1.2
Go to the source directory.
$ ./configure
Configure Invenio software for building on this specific
platform. You can use the following optional parameters:
--prefix=/opt/invenio
Optionally, specify the Invenio general
installation directory (default is /opt/invenio).
It will contain command-line binaries and program
libraries containing the core Invenio
functionality, but also store web pages, runtime log
and cache information, document data files, etc.
Several subdirs like `bin', `etc', `lib', or `var'
will be created inside the prefix directory to this
effect. Note that the prefix directory should be
chosen outside of the Apache htdocs tree, since only
one its subdirectory (prefix/var/www) is to be
accessible directly via the Web (see below).
Note that Invenio won't install to any other
directory but to the prefix mentioned in this
configuration line.
--with-python=/opt/python/bin/python2.4
Optionally, specify a path to some specific Python
binary. This is useful if you have more than one
Python installation on your system. If you don't set
this option, then the first Python that will be found
in your PATH will be chosen for running Invenio.
--with-mysql=/opt/mysql/bin/mysql
Optionally, specify a path to some specific MySQL
client binary. This is useful if you have more than
one MySQL installation on your system. If you don't
set this option, then the first MySQL client
executable that will be found in your PATH will be
chosen for running Invenio.
--with-clisp=/opt/clisp/bin/clisp
Optionally, specify a path to CLISP executable. This
is useful if you have more than one CLISP
installation on your system. If you don't set this
option, then the first executable that will be found
in your PATH will be chosen for running Invenio.
--with-cmucl=/opt/cmucl/bin/lisp
Optionally, specify a path to CMUCL executable. This
is useful if you have more than one CMUCL
installation on your system. If you don't set this
option, then the first executable that will be found
in your PATH will be chosen for running Invenio.
--with-sbcl=/opt/sbcl/bin/sbcl
Optionally, specify a path to SBCL executable. This
is useful if you have more than one SBCL
installation on your system. If you don't set this
option, then the first executable that will be found
in your PATH will be chosen for running Invenio.
--with-openoffice-python
Optionally, specify the path to the Python interpreter
embedded with OpenOffice.org. This is normally not
contained in the normal path. If you don't specify this
it won't be possible to use OpenOffice.org to convert from and
to Microsoft Office and OpenOffice.org documents.
This configuration step is mandatory. Usually, you do this
step only once.
(Note that if you are building Invenio not from a
released tarball, but from the Git sources, then you have to
generate the configure file via autotools:
$ sudo aptitude install automake1.9 autoconf
$ aclocal-1.9
$ automake-1.9 -a
$ autoconf
after which you proceed with the usual configure command.)
$ make
Launch the Invenio build. Since many messages are printed
during the build process, you may want to run it in a
fast-scrolling terminal such as rxvt or in a detached screen
session.
During this step all the pages and scripts will be
pre-created and customized based on the config you have
edited in the previous step.
Note that on systems such as FreeBSD or Mac OS X you have to
use GNU make ("gmake") instead of "make".
$ make install
Install the web pages, scripts, utilities and everything
needed for Invenio runtime into respective installation
directories, as specified earlier by the configure command.
Note that if you are installing Invenio for the first
time, you will be asked to create symbolic link(s) from
Python's site-packages system-wide directory(ies) to the
installation location. This is in order to instruct Python
where to find Invenio's Python files. You will be
hinted as to the exact command to use based on the
parameters you have used in the configure command.
$ make install-bootstrap
This will automatically download and install Twitter
Bootstrap prerequisite.
$ make install-mathjax-plugin ## optional
This will automatically download and install in the proper
place MathJax, a JavaScript library to render LaTeX formulas
in the client browser.
Note that in order to enable the rendering you will have to
set the variable CFG_WEBSEARCH_USE_MATHJAX_FOR_FORMATS in
invenio-local.conf to a suitable list of output format
codes. For example:
CFG_WEBSEARCH_USE_MATHJAX_FOR_FORMATS = hd,hb
$ make install-jquery-plugins ## optional
This will automatically download and install in the proper
place jQuery and related plugins. They are used for AJAX
applications such as the record editor.
Note that `unzip' is needed when installing jquery plugins.
$ make install-jquery-tokeninput ## optional
This will automatically download and install jQuery
Tokeninput pre-requisite.
$ make install-plupload-plugin ## optional
This will automatically download and install plupload
pre-requisite that is used in the deposition interface for
submitting files.
$ make install-ckeditor-plugin ## optional
This will automatically download and install in the proper
place CKeditor, a WYSIWYG Javascript-based editor (e.g. for
the WebComment module).
Note that in order to enable the editor you have to set the
CFG_WEBCOMMENT_USE_RICH_EDITOR to True.
$ make install-pdfa-helper-files ## optional
This will automatically download and install in the proper
place the helper files needed to create PDF/A files out of
existing PDF files.
$ make install-mediaelement ## optional
This will automatically download and install the MediaElementJS
HTML5 video player that is needed for videos on the DEMO site.
$ make install-solrutils ## optional
This will automatically download and install a Solr instance
which can be used for full-text searching. See CFG_SOLR_URL
variable in the invenio.conf. Note that the admin later has
to take care of running init.d scripts which would start the
Solr instance automatically.
$ make install-js-test-driver ## optional
This will automatically download and install JsTestDriver
which is needed to run JS unit tests. Recommended for developers.
2b. Configuration
-----------------
Once the basic software installation is done, we proceed to
configuring your Invenio system.
$ sudo chown -R www-data.www-data /opt/invenio
For the sake of simplicity, let us assume that your Invenio
installation will run under the `www-data' user process
identity. The above command changes ownership of installed
files to www-data, so that we shall run everything under
this user identity from now on.
For production purposes, you would typically enable Apache
server to read all files from the installation place but to
write only to the `var' subdirectory of your installation
place. You could achieve this by configuring Unix directory
group permissions, for example.
$ sudo -u www-data emacs /opt/invenio/etc/invenio-local.conf
Customize your Invenio installation. Please read the
'invenio.conf' file located in the same directory that
contains the vanilla default configuration parameters of
your Invenio installation. If you want to customize some of
these parameters, you should create a file named
'invenio-local.conf' in the same directory where
'invenio.conf' lives and you should write there only the
customizations that you want to be different from the
vanilla defaults.
Here is a realistic, minimalist, yet production-ready
example of what you would typically put there:
$ cat /opt/invenio/etc/invenio-local.conf
[Invenio]
CFG_SITE_NAME = John Doe's Document Server
CFG_SITE_NAME_INTL_fr = Serveur des Documents de John Doe
CFG_SITE_URL = http://your.site.com
CFG_SITE_SECURE_URL = https://your.site.com
CFG_SITE_ADMIN_EMAIL = john.doe@your.site.com
CFG_SITE_SUPPORT_EMAIL = john.doe@your.site.com
CFG_WEBALERT_ALERT_ENGINE_EMAIL = john.doe@your.site.com
CFG_WEBCOMMENT_ALERT_ENGINE_EMAIL = john.doe@your.site.com
CFG_WEBCOMMENT_DEFAULT_MODERATOR = john.doe@your.site.com
CFG_DATABASE_HOST = localhost
CFG_DATABASE_NAME = invenio
CFG_DATABASE_USER = invenio
CFG_DATABASE_PASS = my123p$ss
CFG_BIBDOCFILE_ENABLE_BIBDOCFSINFO_CACHE = 1
You should override at least the parameters mentioned above
in order to define some very essential runtime parameters
such as the name of your document server (CFG_SITE_NAME and
CFG_SITE_NAME_INTL_*), the visible URL of your document
server (CFG_SITE_URL and CFG_SITE_SECURE_URL), the email
address of the local Invenio administrator, comment
moderator, and alert engine (CFG_SITE_SUPPORT_EMAIL,
CFG_SITE_ADMIN_EMAIL, etc), and last but not least your
database credentials (CFG_DATABASE_*).
If this is a first installation of Invenio it is recommended
you set the CFG_BIBDOCFILE_ENABLE_BIBDOCFSINFO_CACHE
variable to 1. If this is instead an upgrade from an existing
installation don't add it until you have run:
$ bibdocfile --fix-bibdocfsinfo-cache .
The Invenio system will then read both the default
invenio.conf file and your customized invenio-local.conf
file and it will override any default options with the ones
you have specifield in your local file. This cascading of
configuration parameters will ease your future upgrades.
If you want to have multiple Invenio instances for distributed
video encoding, you need to share the same configuration amongs
them and make some of the folders of the Invenio installation
available for all nodes.
Configure the allowed tasks for every node:
CFG_BIBSCHED_NODE_TASKS = {
"hostname_machine1" : ["bibindex", "bibupload",
"bibreformat","webcoll", "bibtaskex", "bibrank",
"oaiharvest", "oairepositoryupdater", "inveniogc",
"webstatadmin", "bibclassify", "bibexport",
"dbdump", "batchuploader", "bibauthorid", "bibtasklet"],
"hostname_machine2" : ['bibencode',]
}
Share the following directories among Invenio instances:
/var/tmp-shared
hosts video uploads in a temporary form
/var/tmp-shared/bibencode/jobs
hosts new job files for the video encoding daemon
/var/tmp-shared/bibencode/jobs/done
hosts job files that have been processed by the daemon
/var/data/files
hosts fulltext and media files associated to records
/var/data/submit
hosts files created during submissions
$ sudo -u www-data /opt/invenio/bin/inveniocfg --update-all
Make the rest of the Invenio system aware of your
invenio-local.conf changes. This step is mandatory each
time you edit your conf files.
$ sudo -u www-data /opt/invenio/bin/inveniocfg --create-secret-key
You may need to create secret key for the Flask application
if you have not done so yet during customisation of your
`invenio-local.conf'. This command will check the contents
of this file and will update it with randomly generated
secret key value.
$ sudo -u www-data /opt/invenio/bin/inveniocfg --update-all
Make the rest of the Invenio system aware of the secret key
change in invenio-local.conf.
$ sudo -u www-data /opt/invenio/bin/inveniocfg --create-tables
If you are installing Invenio for the first time, you
have to create database tables.
Note that this step checks for potential problems such as
the database connection rights and may ask you to perform
some more administrative steps in case it detects a problem.
Notably, it may ask you to set up database access
permissions, based on your configure values.
If you are installing Invenio for the first time, you
have to create a dedicated database on your MySQL server
that the Invenio can use for its purposes. Please
contact your MySQL administrator and ask him to execute the
commands this step proposes you.
At this point you should now have successfully completed the
"make install" process. We continue by setting up the
Apache web server.
$ sudo -u www-data /opt/invenio/bin/inveniocfg --load-bibfield-conf
Load the configuration file of the BibField module. It will
create `bibfield_config.py' file. (FIXME: When BibField
becomes essential part of Invenio, this step should be later
automatised so that people do not have to run it manually.)
$ sudo -u www-data /opt/invenio/bin/inveniocfg --load-webstat-conf
Load the configuration file of webstat module. It will create
the tables in the database for register customevents, such as
basket hits.
$ sudo -u www-data /opt/invenio/bin/inveniocfg --create-apache-conf
Running this command will generate Apache virtual host
configurations matching your installation. You will be
instructed to check created files (usually they are located
under /opt/invenio/etc/apache/) and edit your httpd.conf
to activate Invenio virtual hosts.
If you are using Debian GNU/Linux ``Lenny'' or later, then
you can do the following to create your SSL certificate and
to activate your Invenio vhosts:
## make SSL certificate:
$ sudo aptitude install ssl-cert
$ sudo mkdir /etc/apache2/ssl
$ sudo /usr/sbin/make-ssl-cert /usr/share/ssl-cert/ssleay.cnf \
/etc/apache2/ssl/apache.pem
## add Invenio web sites:
$ sudo ln -s /opt/invenio/etc/apache/invenio-apache-vhost.conf \
/etc/apache2/sites-available/invenio
$ sudo ln -s /opt/invenio/etc/apache/invenio-apache-vhost-ssl.conf \
/etc/apache2/sites-available/invenio-ssl
## disable Debian's default web site:
$ sudo /usr/sbin/a2dissite default
## enable Invenio web sites:
$ sudo /usr/sbin/a2ensite invenio
$ sudo /usr/sbin/a2ensite invenio-ssl
## enable SSL module:
$ sudo /usr/sbin/a2enmod ssl
## if you are using xsendfile module, enable it too:
$ sudo /usr/sbin/a2enmod xsendfile
If you are using another operating system, you should do the
equivalent, for example edit your system-wide httpd.conf and
put the following include statements:
Include /opt/invenio/etc/apache/invenio-apache-vhost.conf
Include /opt/invenio/etc/apache/invenio-apache-vhost-ssl.conf
Note that you may need to adapt generated vhost file
snippets to match your concrete operating system specifics.
For example, the generated configuration snippet will
preload Invenio WSGI daemon application upon Apache start up
for faster site response. The generated configuration
assumes that you are using mod_wsgi version 3 or later. If
you are using the old legacy mod_wsgi version 2, then you
would need to comment out the WSGIImportScript directive
from the generated snippet, or else move the WSGI daemon
setup to the top level, outside of the VirtualHost section.
Note also that you may want to tweak the generated Apache
vhost snippet for performance reasons, especially with
respect to WSGIDaemonProcess parameters. For example, you
can increase the number of processes from the default value
`processes=5' if you have lots of RAM and if many concurrent
users may access your site in parallel. However, note that
you must use `threads=1' there, because Invenio WSGI daemon
processes are not fully thread safe yet. This may change in
the future.
$ sudo /etc/init.d/apache2 restart
Please ask your webserver administrator to restart the
Apache server after the above "httpd.conf" changes.
$ sudo -u www-data /opt/invenio/bin/inveniocfg --check-openoffice
If you plan to support MS Office or Open Document Format
files in your installation, you should check whether
LibreOffice or OpenOffice.org is well integrated with
Invenio by running the above command. You may be asked to
create a temporary directory for converting office files
with special ownership (typically as user nobody) and
permissions. Note that you can do this step later.
$ sudo -u www-data /opt/invenio/bin/inveniocfg --create-demo-site
This step is recommended to test your local Invenio
installation. It should give you our "Atlantis Institute of
Science" demo installation, exactly as you see it at
.
$ sudo -u www-data /opt/invenio/bin/inveniocfg --load-demo-records
Optionally, load some demo records to be able to test
indexing and searching of your local Invenio demo
installation.
$ sudo -u www-data /opt/invenio/bin/inveniocfg --run-unit-tests
Optionally, you can run the unit test suite to verify the
unit behaviour of your local Invenio installation. Note
that this command should be run only after you have
installed the whole system via `make install'.
$ sudo -u www-data /opt/invenio/bin/inveniocfg --run-regression-tests
Optionally, you can run the full regression test suite to
verify the functional behaviour of your local Invenio
installation. Note that this command requires to have
created the demo site and loaded the demo records. Note
also that running the regression test suite may alter the
database content with junk data, so that rebuilding the
demo site is strongly recommended afterwards.
$ sudo -u www-data /opt/invenio/bin/inveniocfg --run-web-tests
Optionally, you can run additional automated web tests
running in a real browser. This requires to have Firefox
with the Selenium IDE extension installed.
$ sudo -u www-data /opt/invenio/bin/inveniocfg --remove-demo-records
Optionally, remove the demo records loaded in the previous
step, but keeping otherwise the demo collection, submission,
format, and other configurations that you may reuse and
modify for your own production purposes.
$ sudo -u www-data /opt/invenio/bin/inveniocfg --drop-demo-site
Optionally, drop also all the demo configuration so that
you'll end up with a completely blank Invenio system.
However, you may want to find it more practical not to drop
the demo site configuration but to start customizing from
there.
$ firefox http://your.site.com/help/admin/howto-run
In order to start using your Invenio installation, you
can start indexing, formatting and other daemons as
indicated in the "HOWTO Run" guide on the above URL. You
can also use the Admin Area web interfaces to perform
further runtime configurations such as the definition of
data collections, document types, document formats, word
indexes, etc.
$ sudo ln -s /opt/invenio/etc/bash_completion.d/inveniocfg \
/etc/bash_completion.d/inveniocfg
Optionally, if you are using Bash shell completion, then
you may want to create the above symlink in order to
configure completion for the inveniocfg command.
Good luck, and thanks for choosing Invenio.
- Invenio Development Team
diff --git a/Makefile.am b/Makefile.am
index 47b6c4cc4..b57f6af54 100644
--- a/Makefile.am
+++ b/Makefile.am
@@ -1,580 +1,583 @@
## This file is part of Invenio.
## Copyright (C) 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011, 2012, 2013 CERN.
##
## Invenio is free software; you can redistribute it and/or
## modify it under the terms of the GNU General Public License as
## published by the Free Software Foundation; either version 2 of the
## License, or (at your option) any later version.
##
## Invenio is distributed in the hope that it will be useful, but
## WITHOUT ANY WARRANTY; without even the implied warranty of
## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
## General Public License for more details.
##
## You should have received a copy of the GNU General Public License
## along with Invenio; if not, write to the Free Software Foundation, Inc.,
## 59 Temple Place, Suite 330, Boston, MA 02111-1307, USA.
confignicedir = $(sysconfdir)/build
confignice_SCRIPTS=config.nice
SUBDIRS = po config modules
EXTRA_DIST = UNINSTALL THANKS RELEASE-NOTES configure-tests.py config.nice.in \
config.rpath
# current MathJax version and packages
# See also modules/miscutil/lib/htmlutils.py (get_mathjax_header)
MJV = 2.1
MATHJAX = http://invenio-software.org/download/mathjax/MathJax-v$(MJV).zip
# current CKeditor version
CKV = 3.6.6
CKEDITOR = ckeditor_$(CKV).zip
# current MediaElement.js version
MEV = master
MEDIAELEMENT = http://github.com/johndyer/mediaelement/zipball/$(MEV)
#for solrutils
INVENIO_JAVA_PATH = org/invenio_software/solr
solrdirname = apache-solr-3.1.0
solrdir = $(prefix)/lib/$(solrdirname)
solrutils_dir=$(CURDIR)/modules/miscutil/lib/solrutils
CLASSPATH=.:${solrdir}/dist/solrj-lib/commons-io-1.4.jar:${solrdir}/dist/apache-solr-core-*jar:${solrdir}/contrib/jzlib-1.0.7.jar:${solrdir}/dist/apache-solr-solrj-3.1.0.jar:${solrdir}/dist/solrj-lib/slf4j-api-1.5.5.jar:${solrdir}/dist/*:${solrdir}/contrib/basic-lucene-libs/*:${solrdir}/contrib/analysis-extras/lucene-libs/*:${solrdir}/dist/solrj-lib/*
# git-version-get stuff:
BUILT_SOURCES = $(top_srcdir)/.version
$(top_srcdir)/.version:
echo $(VERSION) > $@-t && mv $@-t $@
dist-hook:
echo $(VERSION) > $(distdir)/.tarball-version
# Bootstrap version
BOOTSTRAPV = 2.2.1
# Hogan.js version
HOGANVER = 2.0.0
check-upgrade:
$(PYTHON) $(top_srcdir)/modules/miscutil/lib/inveniocfg_upgrader.py $(top_srcdir) --upgrade-check
check-custom-templates:
$(PYTHON) $(top_srcdir)/modules/webstyle/lib/template.py --check-custom-templates $(top_srcdir)
kwalitee-check:
@$(PYTHON) $(top_srcdir)/modules/miscutil/lib/kwalitee.py --stats $(top_srcdir)
kwalitee-check-errors-only:
@$(PYTHON) $(top_srcdir)/modules/miscutil/lib/kwalitee.py --check-errors $(top_srcdir)
kwalitee-check-variables:
@$(PYTHON) $(top_srcdir)/modules/miscutil/lib/kwalitee.py --check-variables $(top_srcdir)
kwalitee-check-indentation:
@$(PYTHON) $(top_srcdir)/modules/miscutil/lib/kwalitee.py --check-indentation $(top_srcdir)
kwalitee-check-sql-queries:
@$(PYTHON) $(top_srcdir)/modules/miscutil/lib/kwalitee.py --check-sql $(top_srcdir)
etags:
\rm -f $(top_srcdir)/TAGS
(cd $(top_srcdir) && find $(top_srcdir) -name "*.py" -print | xargs etags)
install-data-local:
for d in / /cache /cache/RTdata /log /tmp /tmp-shared /data /run /tmp-shared/bibencode/jobs/done /tmp-shared/bibedit-cache; do \
mkdir -p $(localstatedir)$$d ; \
done
@echo "************************************************************"
@echo "** Invenio software has been successfully installed! **"
@echo "** **"
@echo "** You may proceed to customizing your installation now. **"
@echo "************************************************************"
install-mathjax-plugin:
@echo "***********************************************************"
@echo "** Installing MathJax plugin, please wait... **"
@echo "***********************************************************"
rm -rf /tmp/invenio-mathjax-plugin
mkdir /tmp/invenio-mathjax-plugin
rm -fr ${prefix}/var/www/MathJax
mkdir -p ${prefix}/var/www/MathJax
(cd /tmp/invenio-mathjax-plugin && \
wget '$(MATHJAX)' -O mathjax.zip && \
unzip -q mathjax.zip && cd mathjax-MathJax-* && cp -r * \
${prefix}/var/www/MathJax)
rm -fr /tmp/invenio-mathjax-plugin
@echo "************************************************************"
@echo "** The MathJax plugin was successfully installed. **"
@echo "** Please do not forget to properly set the option **"
@echo "** CFG_WEBSEARCH_USE_MATHJAX_FOR_FORMATS and **"
@echo "** CFG_WEBSUBMIT_USE_MATHJAX in invenio.conf. **"
@echo "************************************************************"
uninstall-mathjax-plugin:
@rm -rvf ${prefix}/var/www/MathJax
@echo "***********************************************************"
@echo "** The MathJax plugin was successfully uninstalled. **"
@echo "***********************************************************"
install-jscalendar-plugin:
@echo "***********************************************************"
@echo "** Installing jsCalendar plugin, please wait... **"
@echo "***********************************************************"
rm -rf /tmp/invenio-jscalendar-plugin
mkdir /tmp/invenio-jscalendar-plugin
(cd /tmp/invenio-jscalendar-plugin && \
wget 'http://www.dynarch.com/static/jscalendar-1.0.zip' && \
unzip -u jscalendar-1.0.zip && \
mkdir -p ${prefix}/var/www/jsCalendar && \
cp jscalendar-1.0/img.gif ${prefix}/var/www/jsCalendar/jsCalendar.gif && \
cp jscalendar-1.0/calendar.js ${prefix}/var/www/jsCalendar/ && \
cp jscalendar-1.0/calendar-setup.js ${prefix}/var/www/jsCalendar/ && \
cp jscalendar-1.0/lang/calendar-en.js ${prefix}/var/www/jsCalendar/ && \
cp jscalendar-1.0/calendar-blue.css ${prefix}/var/www/jsCalendar/)
rm -fr /tmp/invenio-jscalendar-plugin
@echo "***********************************************************"
@echo "** The jsCalendar plugin was successfully installed. **"
@echo "***********************************************************"
uninstall-jscalendar-plugin:
@rm -rvf ${prefix}/var/www/jsCalendar
@echo "***********************************************************"
@echo "** The jsCalendar plugin was successfully uninstalled. **"
@echo "***********************************************************"
install-js-test-driver:
@echo "*******************************************************"
@echo "** Installing js-test-driver, please wait... **"
@echo "*******************************************************"
mkdir -p $(prefix)/lib/java/js-test-driver && \
cd $(prefix)/lib/java/js-test-driver && \
wget http://invenio-software.org/download/js-test-driver/JsTestDriver-1.3.5.jar -O JsTestDriver.jar
uninstall-js-test-driver:
@rm -rvf ${prefix}/lib/java/js-test-driver
@echo "*********************************************************"
@echo "** The js-test-driver was successfully uninstalled. **"
@echo "*********************************************************"
install-jquery-plugins:
@echo "***********************************************************"
@echo "** Installing various jQuery plugins, please wait... **"
@echo "***********************************************************"
mkdir -p ${prefix}/var/www/js
mkdir -p $(prefix)/var/www/css
(cd ${prefix}/var/www/js && \
wget http://code.jquery.com/jquery-1.7.1.min.js && \
mv jquery-1.7.1.min.js jquery.min.js && \
wget http://ajax.googleapis.com/ajax/libs/jqueryui/1.8.17/jquery-ui.min.js && \
wget http://invenio-software.org/download/jquery/v1.5/js/jquery.jeditable.mini.js && \
wget https://raw.github.com/malsup/form/master/jquery.form.js --no-check-certificate && \
wget http://jquery-multifile-plugin.googlecode.com/svn/trunk/jquery.MultiFile.pack.js && \
wget -O jquery.tablesorter.zip http://invenio-software.org/download/jquery/jquery.tablesorter.20111208.zip && \
wget http://invenio-software.org/download/jquery/uploadify-v2.1.4.zip -O uploadify.zip && \
wget http://www.datatables.net/download/build/jquery.dataTables.min.js && \
wget http://invenio-software.org/download/jquery/jquery.bookmark.package-1.4.0.zip && \
unzip jquery.tablesorter.zip -d tablesorter && \
rm jquery.tablesorter.zip && \
rm -rf uploadify && \
unzip -u uploadify.zip -d uploadify && \
wget http://flot.googlecode.com/files/flot-0.6.zip && \
wget -O jquery-ui-timepicker-addon.js http://invenio-software.org/download/jquery/jquery-ui-timepicker-addon-1.0.3.js && \
unzip -u flot-0.6.zip && \
mv flot/jquery.flot.selection.min.js flot/jquery.flot.min.js flot/excanvas.min.js ./ && \
rm flot-0.6.zip && rm -r flot && \
mv uploadify/swfobject.js ./ && \
mv uploadify/cancel.png uploadify/uploadify.css uploadify/uploadify.allglyphs.swf uploadify/uploadify.fla uploadify/uploadify.swf ../img/ && \
mv uploadify/jquery.uploadify.v2.1.4.min.js ./jquery.uploadify.min.js && \
rm uploadify.zip && rm -r uploadify && \
wget --no-check-certificate https://github.com/douglascrockford/JSON-js/raw/master/json2.js && \
wget http://invenio-software.org/download/jquery/jquery.hotkeys-0.8.js -O jquery.hotkeys.js && \
wget http://jquery.bassistance.de/treeview/jquery.treeview.zip && \
unzip jquery.treeview.zip -d jquery-treeview && \
rm jquery.treeview.zip && \
wget http://invenio-software.org/download/jquery/v1.5/js/jquery.ajaxPager.js && \
unzip jquery.bookmark.package-1.4.0.zip && \
rm -f jquery.bookmark.ext.* bookmarks-big.png bookmarkBasic.html jquery.bookmark.js jquery.bookmark.pack.js && \
mv bookmarks.png ../img/ && \
mv jquery.bookmark.css ../css/ && \
rm -f jquery.bookmark.package-1.4.0.zip && \
mkdir -p ${prefix}/var/www/img && \
cd ${prefix}/var/www/img && \
wget -r -np -nH --cut-dirs=4 -A "png,css" -P jquery-ui/themes http://jquery-ui.googlecode.com/svn/tags/1.8.17/themes/base/ && \
wget -r -np -nH --cut-dirs=4 -A "png,css" -P jquery-ui/themes http://jquery-ui.googlecode.com/svn/tags/1.8.17/themes/smoothness/ && \
wget -r -np -nH --cut-dirs=4 -A "png,css" -P jquery-ui/themes http://jquery-ui.googlecode.com/svn/tags/1.8.17/themes/redmond/ && \
wget --no-check-certificate -O datatables_jquery-ui.css https://github.com/DataTables/DataTables/raw/master/media/css/demo_table_jui.css && \
wget http://jquery-ui.googlecode.com/svn/tags/1.8.17/themes/redmond/jquery-ui.css && \
wget http://jquery-ui.googlecode.com/svn/tags/1.8.17/demos/images/calendar.gif && \
wget -r -np -nH --cut-dirs=5 -A "png" http://jquery-ui.googlecode.com/svn/tags/1.8.17/themes/redmond/images/)
@echo "***********************************************************"
@echo "** The jQuery plugins were successfully installed. **"
@echo "***********************************************************"
uninstall-jquery-plugins:
(cd ${prefix}/var/www/js && \
rm -f jquery.min.js && \
rm -f jquery.MultiFile.pack.js && \
rm -f jquery.jeditable.mini.js && \
rm -f jquery.flot.selection.min.js && \
rm -f jquery.flot.min.js && \
rm -f excanvas.min.js && \
rm -f jquery-ui-timepicker-addon.min.js && \
rm -f json2.js && \
rm -f jquery.uploadify.min.js && \
rm -rf tablesorter && \
rm -rf jquery-treeview && \
rm -f jquery.ajaxPager.js && \
rm -f jquery.form.js && \
rm -f jquery.dataTables.min.js && \
rm -f ui.core.js && \
rm -f jquery.bookmark.min.js && \
rm -f jquery.hotkeys.js && \
rm -f jquery.tablesorter.min.js && \
rm -f jquery-ui-1.7.3.custom.min.js && \
rm -f jquery.metadata.js && \
rm -f jquery-latest.js && \
rm -f jquery-ui.min.js)
(cd ${prefix}/var/www/img && \
rm -f cancel.png uploadify.css uploadify.swf uploadify.allglyphs.swf uploadify.fla && \
rm -f datatables_jquery-ui.css \
rm -f bookmarks.png) && \
(cd ${prefix}/var/www/css && \
rm -f jquery.bookmark.css)
@echo "***********************************************************"
@echo "** The jquery plugins were successfully uninstalled. **"
@echo "***********************************************************"
install-ckeditor-plugin:
@echo "***********************************************************"
@echo "** Installing CKeditor plugin, please wait... **"
@echo "***********************************************************"
rm -rf ${prefix}/lib/python/invenio/ckeditor/
rm -rf /tmp/invenio-ckeditor-plugin
mkdir /tmp/invenio-ckeditor-plugin
(cd /tmp/invenio-ckeditor-plugin && \
wget 'http://invenio-software.org/download/ckeditor/$(CKEDITOR)' && \
unzip -u -d ${prefix}/var/www $(CKEDITOR)) && \
find ${prefix}/var/www/ckeditor/ -depth -name '_*' -exec rm -rf {} \; && \
find ${prefix}/var/www/ckeditor/ckeditor* -maxdepth 0 ! -name "ckeditor.js" -exec rm -r {} \; && \
rm -fr /tmp/invenio-ckeditor-plugin
@echo "* Installing Invenio-specific CKeditor config..."
(cd $(top_srcdir)/modules/webstyle/etc && make install)
@echo "***********************************************************"
@echo "** The CKeditor plugin was successfully installed. **"
@echo "** Please do not forget to properly set the option **"
@echo "** CFG_WEBCOMMENT_USE_RICH_TEXT_EDITOR in invenio.conf. **"
@echo "***********************************************************"
uninstall-ckeditor-plugin:
@rm -rvf ${prefix}/var/www/ckeditor
@rm -rvf ${prefix}/lib/python/invenio/ckeditor
@echo "***********************************************************"
@echo "** The CKeditor plugin was successfully uninstalled. **"
@echo "***********************************************************"
install-pdfa-helper-files:
@echo "***********************************************************"
@echo "** Installing PDF/A helper files, please wait... **"
@echo "***********************************************************"
wget 'http://invenio-software.org/download/invenio-demo-site-files/ISOCoatedsb.icc' -O ${prefix}/etc/websubmit/file_converter_templates/ISOCoatedsb.icc
@echo "***********************************************************"
@echo "** The PDF/A helper files were successfully installed. **"
@echo "***********************************************************"
install-mediaelement:
@echo "***********************************************************"
@echo "** MediaElement.js, please wait... **"
@echo "***********************************************************"
rm -rf /tmp/mediaelement
mkdir /tmp/mediaelement
wget 'http://github.com/johndyer/mediaelement/zipball/master' -O '/tmp/mediaelement/mediaelement.zip' --no-check-certificate
unzip -u -d '/tmp/mediaelement' '/tmp/mediaelement/mediaelement.zip'
rm -rf ${prefix}/var/www/mediaelement
mkdir ${prefix}/var/www/mediaelement
mv /tmp/mediaelement/johndyer-mediaelement-*/build/* ${prefix}/var/www/mediaelement
rm -rf /tmp/mediaelement
@echo "***********************************************************"
@echo "** MediaElement.js was successfully installed. **"
@echo "***********************************************************"
install-bootstrap:
@echo "***********************************************************"
@echo "** Installing Twitter Bootstrap, please wait... **"
@echo "***********************************************************"
rm -rf /tmp/invenio-bootstrap
mkdir /tmp/invenio-bootstrap
(cd /tmp/invenio-bootstrap && \
wget -O bootstrap.zip 'http://invenio-software.org/download/bootstrap/bootstrap-${BOOTSTRAPV}.zip' && \
unzip -u bootstrap.zip && \
cp bootstrap/css/bootstrap-responsive.css ${prefix}/var/www/css/bootstrap-responsive.css && \
cp bootstrap/css/bootstrap-responsive.min.css ${prefix}/var/www/css/bootstrap-responsive.min.css && \
cp bootstrap/css/bootstrap.css ${prefix}/var/www/css/bootstrap.css && \
cp bootstrap/css/bootstrap.min.css ${prefix}/var/www/css/bootstrap.min.css && \
cp bootstrap/img/glyphicons-halflings-white.png ${prefix}/var/www/img/glyphicons-halflings-white.png && \
cp bootstrap/img/glyphicons-halflings.png ${prefix}/var/www/img/glyphicons-halflings.png && \
cp bootstrap/js/bootstrap.js ${prefix}/var/www/js/bootstrap.js && \
cp bootstrap/js/bootstrap.min.js ${prefix}/var/www/js/bootstrap.min.js && \
rm -fr /tmp/invenio-bootstrap )
@echo "***********************************************************"
@echo "** The Twitter Bootstrap was successfully installed. **"
@echo "***********************************************************"
uninstall-bootstrap:
rm ${prefix}/var/www/css/bootstrap-responsive.css && \
rm ${prefix}/var/www/css/bootstrap-responsive.min.css && \
rm ${prefix}/var/www/css/bootstrap.css && \
rm ${prefix}/var/www/css/bootstrap.min.css && \
rm ${prefix}/var/www/img/glyphicons-halflings-white.png && \
rm ${prefix}/var/www/img/glyphicons-halflings.png && \
rm ${prefix}/var/www/js/bootstrap.js && \
rm ${prefix}/var/www/js/bootstrap.min.js
@echo "***********************************************************"
@echo "** The Twitter Bootstrap was successfully uninstalled. **"
@echo "***********************************************************"
install-hogan-plugin:
@echo "***********************************************************"
@echo "** Installing Hogan.js, please wait... **"
@echo "***********************************************************"
rm -rf /tmp/hogan
mkdir /tmp/hogan
(cd /tmp/hogan && \
wget -O hogan-${HOGANVER}.js 'http://twitter.github.com/hogan.js/builds/${HOGANVER}/hogan-${HOGANVER}.js' && \
cp hogan-${HOGANVER}.js ${prefix}/var/www/js/hogan.js && \
rm -fr /tmp/hogan )
@echo "***********************************************************"
@echo "** Hogan.js was successfully installed. **"
@echo "***********************************************************"
uninstall-hogan-plugin:
rm ${prefix}/var/www/js/hogan.js
@echo "***********************************************************"
@echo "** Hogan.js was successfully uninstalled. **"
@echo "***********************************************************"
install-jquery-tokeninput:
@echo "***********************************************************"
@echo "** Installing JQuery Tokeninput, please wait... **"
@echo "***********************************************************"
rm -rf /tmp/jquery-tokeninput
mkdir /tmp/jquery-tokeninput
(cd /tmp/jquery-tokeninput && \
wget -O jquery-tokeninput-master.zip 'https://github.com/loopj/jquery-tokeninput/archive/master.zip' --no-check-certificate && \
unzip -u jquery-tokeninput-master.zip && \
cp jquery-tokeninput-master/styles/token-input-facebook.css ${prefix}/var/www/css/token-input-facebook.css && \
cp jquery-tokeninput-master/styles/token-input-mac.css ${prefix}/var/www/css/token-input-mac.css && \
cp jquery-tokeninput-master/styles/token-input.css ${prefix}/var/www/css/token-input.css && \
cp jquery-tokeninput-master/src/jquery.tokeninput.js ${prefix}/var/www/js/jquery.tokeninput.js && \
rm -fr /tmp/jquery-tokeninput )
@echo "***********************************************************"
@echo "** The JQuery Tokeninput was successfully installed. **"
@echo "***********************************************************"
uninstall-jquery-tokeninput:
rm ${prefix}/var/www/css/token-input-facebook.css && \
rm ${prefix}/var/www/css/token-input-mac.css && \
rm ${prefix}/var/www/css/token-input.css && \
rm ${prefix}/var/www/js/jquery.tokeninput.js
@echo "***********************************************************"
@echo "** The JQuery Tokeninput was successfully uninstalled. **"
@echo "***********************************************************"
install-plupload-plugin:
@echo "***********************************************************"
@echo "** Installing Plupload plugin, please wait... **"
@echo "***********************************************************"
rm -rf /tmp/plupload-plugin
mkdir /tmp/plupload-plugin
(cd /tmp/plupload-plugin && \
wget -O plupload-plugin.zip 'http://plupload.com/downloads/plupload_1_5_5.zip' && \
unzip -u plupload-plugin.zip && \
mkdir -p ${prefix}/var/www/js/plupload/i18n/ && \
cp -R plupload/js/jquery.plupload.queue ${prefix}/var/www/js/plupload/ && \
cp -R plupload/js/jquery.ui.plupload ${prefix}/var/www/js/plupload/ && \
cp plupload/js/plupload.browserplus.js ${prefix}/var/www/js/plupload/plupload.browserplus.js && \
cp plupload/js/plupload.flash.js ${prefix}/var/www/js/plupload/plupload.flash.js && \
cp plupload/js/plupload.flash.swf ${prefix}/var/www/js/plupload/plupload.flash.swf && \
cp plupload/js/plupload.full.js ${prefix}/var/www/js/plupload/plupload.full.js && \
cp plupload/js/plupload.gears.js ${prefix}/var/www/js/plupload/plupload.gears.js && \
cp plupload/js/plupload.html4.js ${prefix}/var/www/js/plupload/plupload.html4.js && \
cp plupload/js/plupload.html5.js ${prefix}/var/www/js/plupload/plupload.html5.js && \
cp plupload/js/plupload.js ${prefix}/var/www/js/plupload/plupload.js && \
cp plupload/js/plupload.silverlight.js ${prefix}/var/www/js/plupload/plupload.silverlight.js && \
cp plupload/js/plupload.silverlight.xap ${prefix}/var/www/js/plupload/plupload.silverlight.xap && \
cp plupload/js/i18n/*.js ${prefix}/var/www/js/plupload/i18n/ && \
rm -fr /tmp/plupload-plugin )
@echo "***********************************************************"
@echo "** The Plupload plugin was successfully installed. **"
@echo "***********************************************************"
uninstall-plupload-plugin:
rm -rf ${prefix}/var/www/js/plupload
@echo "***********************************************************"
@echo "** The Plupload was successfully uninstalled. **"
@echo "***********************************************************"
uninstall-pdfa-helper-files:
rm -f ${prefix}/etc/websubmit/file_converter_templates/ISOCoatedsb.icc
@echo "***********************************************************"
@echo "** The PDF/A helper files were successfully uninstalled. **"
@echo "***********************************************************"
#Solrutils allows automatic installation, running and searching of an external Solr index.
install-solrutils:
@echo "***********************************************************"
@echo "** Installing Solrutils and solr, please wait... **"
@echo "***********************************************************"
cd $(prefix)/lib && \
if test -d apache-solr*; then echo A solr directory already exists in `pwd` . \
Please remove it manually, if you are sure it is not needed; exit 2; fi ; \
if test -f apache-solr*; then echo solr tarball already exists in `pwd` . \
Please remove it manually.; exit 2; fi ; \
wget http://archive.apache.org/dist/lucene/solr/3.1.0/apache-solr-3.1.0.tgz && \
tar -xzf apache-solr-3.1.0.tgz && \
rm apache-solr-3.1.0.tgz
cd $(solrdir)/contrib/ ;\
wget http://mirrors.ibiblio.org/pub/mirrors/maven2/com/jcraft/jzlib/1.0.7/jzlib-1.0.7.jar && \
cd $(solrdir)/contrib/ ;\
jar -xf ../example/webapps/solr.war WEB-INF/lib/lucene-core-3.1.0.jar ; \
if test -d basic-lucene-libs; then rm -rf basic-lucene-libs; fi ; \
mv WEB-INF/lib/ basic-lucene-libs ; \
cp $(solrutils_dir)/schema.xml $(solrdir)/example/solr/conf/
cp $(solrutils_dir)/solrconfig.xml $(solrdir)/example/solr/conf/
cd $(solrutils_dir) && \
javac -classpath $(CLASSPATH) -d $(solrdir)/contrib @$(solrutils_dir)/java_sources.txt && \
cd $(solrdir)/contrib/ && \
jar -cf invenio-solr.jar org/invenio_software/solr/*class
update-v0.99.0-tables:
cat $(top_srcdir)/modules/miscutil/sql/tabcreate.sql | grep -v 'INSERT INTO upgrade' | ${prefix}/bin/dbexec
echo "DROP TABLE IF EXISTS oaiREPOSITORY;" | ${prefix}/bin/dbexec
echo "ALTER TABLE bibdoc ADD COLUMN more_info mediumblob NULL default NULL;" | ${prefix}/bin/dbexec
echo "ALTER TABLE schTASK ADD COLUMN priority tinyint(4) NOT NULL default 0;" | ${prefix}/bin/dbexec
echo "ALTER TABLE schTASK ADD KEY priority (priority);" | ${prefix}/bin/dbexec
echo "ALTER TABLE rnkCITATIONDATA DROP PRIMARY KEY;" | ${prefix}/bin/dbexec
echo "ALTER TABLE rnkCITATIONDATA ADD PRIMARY KEY (id);" | ${prefix}/bin/dbexec
echo "ALTER TABLE rnkCITATIONDATA CHANGE id id mediumint(8) unsigned NOT NULL auto_increment;" | ${prefix}/bin/dbexec
echo "ALTER TABLE rnkCITATIONDATA ADD UNIQUE KEY object_name (object_name);" | ${prefix}/bin/dbexec
echo "ALTER TABLE sbmPARAMETERS CHANGE value value text NOT NULL default '';" | ${prefix}/bin/dbexec
echo "ALTER TABLE sbmAPPROVAL ADD note text NOT NULL default '';" | ${prefix}/bin/dbexec
echo "ALTER TABLE hstDOCUMENT CHANGE docsize docsize bigint(15) unsigned NOT NULL;" | ${prefix}/bin/dbexec
echo "ALTER TABLE cmtACTIONHISTORY CHANGE client_host client_host int(10) unsigned default NULL;" | ${prefix}/bin/dbexec
update-v0.99.1-tables:
@echo "Nothing to do; table structure did not change between v0.99.1 and v0.99.2."
update-v0.99.2-tables:
@echo "Nothing to do; table structure did not change between v0.99.2 and v0.99.3."
update-v0.99.3-tables:
@echo "Nothing to do; table structure did not change between v0.99.3 and v0.99.4."
update-v0.99.4-tables:
@echo "Nothing to do; table structure did not change between v0.99.4 and v0.99.5."
update-v0.99.5-tables:
@echo "Nothing to do; table structure did not change between v0.99.5 and v0.99.6."
update-v0.99.6-tables:
@echo "Nothing to do; table structure did not change between v0.99.6 and v0.99.7."
-update-v0.99.7-tables: # from v0.99.7 to v1.0.0-rc0
+update-v0.99.7-tables:
+ @echo "Nothing to do; table structure did not change between v0.99.7 and v0.99.8."
+
+update-v0.99.8-tables: # from v0.99.8 to v1.0.0-rc0
echo "RENAME TABLE oaiARCHIVE TO oaiREPOSITORY;" | ${prefix}/bin/dbexec
cat $(top_srcdir)/modules/miscutil/sql/tabcreate.sql | grep -v 'INSERT INTO upgrade' | ${prefix}/bin/dbexec
echo "INSERT INTO knwKB (id,name,description,kbtype) SELECT id,name,description,'' FROM fmtKNOWLEDGEBASES;" | ${prefix}/bin/dbexec
echo "INSERT INTO knwKBRVAL (id,m_key,m_value,id_knwKB) SELECT id,m_key,m_value,id_fmtKNOWLEDGEBASES FROM fmtKNOWLEDGEBASEMAPPINGS;" | ${prefix}/bin/dbexec
echo "ALTER TABLE sbmPARAMETERS CHANGE name name varchar(40) NOT NULL default '';" | ${prefix}/bin/dbexec
echo "ALTER TABLE bibdoc CHANGE docname docname varchar(250) COLLATE utf8_bin NOT NULL default 'file';" | ${prefix}/bin/dbexec
echo "ALTER TABLE bibdoc CHANGE status status text NOT NULL default '';" | ${prefix}/bin/dbexec
echo "ALTER TABLE bibdoc ADD COLUMN text_extraction_date datetime NOT NULL default '0000-00-00';" | ${prefix}/bin/dbexec
echo "ALTER TABLE collection DROP COLUMN restricted;" | ${prefix}/bin/dbexec
echo "ALTER TABLE schTASK CHANGE host host varchar(255) NOT NULL default '';" | ${prefix}/bin/dbexec
echo "ALTER TABLE hstTASK CHANGE host host varchar(255) NOT NULL default '';" | ${prefix}/bin/dbexec
echo "ALTER TABLE bib85x DROP INDEX kv, ADD INDEX kv (value(100));" | ${prefix}/bin/dbexec
echo "UPDATE clsMETHOD SET location='http://invenio-software.org/download/invenio-demo-site-files/HEP.rdf' WHERE name='HEP' AND location='';" | ${prefix}/bin/dbexec
echo "UPDATE clsMETHOD SET location='http://invenio-software.org/download/invenio-demo-site-files/NASA-subjects.rdf' WHERE name='NASA-subjects' AND location='';" | ${prefix}/bin/dbexec
echo "UPDATE accACTION SET name='runoairepository', description='run oairepositoryupdater task' WHERE name='runoaiarchive';" | ${prefix}/bin/dbexec
echo "UPDATE accACTION SET name='cfgoaiharvest', description='configure OAI Harvest' WHERE name='cfgbibharvest';" | ${prefix}/bin/dbexec
echo "ALTER TABLE accARGUMENT CHANGE value value varchar(255);" | ${prefix}/bin/dbexec
echo "UPDATE accACTION SET allowedkeywords='doctype,act,categ' WHERE name='submit';" | ${prefix}/bin/dbexec
echo "INSERT INTO accARGUMENT(keyword,value) VALUES ('categ','*');" | ${prefix}/bin/dbexec
echo "INSERT INTO accROLE_accACTION_accARGUMENT(id_accROLE,id_accACTION,id_accARGUMENT,argumentlistid) SELECT DISTINCT raa.id_accROLE,raa.id_accACTION,accARGUMENT.id,raa.argumentlistid FROM accROLE_accACTION_accARGUMENT as raa JOIN accACTION on id_accACTION=accACTION.id,accARGUMENT WHERE accACTION.name='submit' and accARGUMENT.keyword='categ' and accARGUMENT.value='*';" | ${prefix}/bin/dbexec
echo "UPDATE accACTION SET allowedkeywords='name,with_editor_rights' WHERE name='cfgwebjournal';" | ${prefix}/bin/dbexec
echo "INSERT INTO accARGUMENT(keyword,value) VALUES ('with_editor_rights','yes');" | ${prefix}/bin/dbexec
echo "INSERT INTO accROLE_accACTION_accARGUMENT(id_accROLE,id_accACTION,id_accARGUMENT,argumentlistid) SELECT DISTINCT raa.id_accROLE,raa.id_accACTION,accARGUMENT.id,raa.argumentlistid FROM accROLE_accACTION_accARGUMENT as raa JOIN accACTION on id_accACTION=accACTION.id,accARGUMENT WHERE accACTION.name='cfgwebjournal' and accARGUMENT.keyword='with_editor_rights' and accARGUMENT.value='yes';" | ${prefix}/bin/dbexec
echo "ALTER TABLE bskEXTREC CHANGE id id int(15) unsigned NOT NULL auto_increment;" | ${prefix}/bin/dbexec
echo "ALTER TABLE bskEXTREC ADD external_id int(15) NOT NULL default '0';" | ${prefix}/bin/dbexec
echo "ALTER TABLE bskEXTREC ADD collection_id int(15) unsigned NOT NULL default '0';" | ${prefix}/bin/dbexec
echo "ALTER TABLE bskEXTREC ADD original_url text;" | ${prefix}/bin/dbexec
echo "ALTER TABLE cmtRECORDCOMMENT ADD status char(2) NOT NULL default 'ok';" | ${prefix}/bin/dbexec
echo "ALTER TABLE cmtRECORDCOMMENT ADD KEY status (status);" | ${prefix}/bin/dbexec
echo "INSERT INTO sbmALLFUNCDESCR VALUES ('Move_Photos_to_Storage','Attach/edit the pictures uploaded with the \"create_photos_manager_interface()\" function');" | ${prefix}/bin/dbexec
echo "INSERT INTO sbmFIELDDESC VALUES ('Upload_Photos',NULL,'','R',NULL,NULL,NULL,NULL,NULL,'\"\"\"\r\nThis is an example of element that creates a photos upload interface.\r\nClone it, customize it and integrate it into your submission. Then add function \r\n\'Move_Photos_to_Storage\' to your submission functions list, in order for files \r\nuploaded with this interface to be attached to the record. More information in \r\nthe WebSubmit admin guide.\r\n\"\"\"\r\n\r\nfrom invenio.websubmit_functions.ParamFile import ParamFromFile\r\nfrom invenio.websubmit_functions.Move_Photos_to_Storage import read_param_file, create_photos_manager_interface, get_session_id\r\n\r\n# Retrieve session id\r\ntry:\r\n # User info is defined only in MBI/MPI actions...\r\n session_id = get_session_id(None, uid, user_info) \r\nexcept:\r\n session_id = get_session_id(req, uid, {})\r\n\r\n# Retrieve context\r\nindir = curdir.split(\'/\')[-3]\r\ndoctype = curdir.split(\'/\')[-2]\r\naccess = curdir.split(\'/\')[-1]\r\n\r\n# Get the record ID, if any\r\nsysno = ParamFromFile(\"%s/%s\" % (curdir,\'SN\')).strip()\r\n\r\n\"\"\"\r\nModify below the configuration of the photos manager interface.\r\nNote: \'can_reorder_photos\' parameter is not yet fully taken into consideration\r\n\r\nDocumentation of the function is available by running:\r\necho -e \'from invenio.websubmit_functions.Move_Photos_to_Storage import create_photos_manager_interface as f\\nprint f.__doc__\' | python\r\n\"\"\"\r\ntext += create_photos_manager_interface(sysno, session_id, uid,\r\n doctype, indir, curdir, access,\r\n can_delete_photos=True,\r\n can_reorder_photos=True,\r\n can_upload_photos=True,\r\n editor_width=700,\r\n editor_height=400,\r\n initial_slider_value=100,\r\n max_slider_value=200,\r\n min_slider_value=80)','0000-00-00','0000-00-00',NULL,NULL,0);" | ${prefix}/bin/dbexec
echo "INSERT INTO sbmFUNDESC VALUES ('Move_Photos_to_Storage','iconsize');" | ${prefix}/bin/dbexec
echo "INSERT INTO sbmFIELDDESC VALUES ('Upload_Files',NULL,'','R',NULL,NULL,NULL,NULL,NULL,'\"\"\"\r\nThis is an example of element that creates a file upload interface.\r\nClone it, customize it and integrate it into your submission. Then add function \r\n\'Move_Uploaded_Files_to_Storage\' to your submission functions list, in order for files \r\nuploaded with this interface to be attached to the record. More information in \r\nthe WebSubmit admin guide.\r\n\"\"\"\r\nfrom invenio.websubmit_managedocfiles import create_file_upload_interface\r\nfrom invenio.websubmit_functions.Shared_Functions import ParamFromFile\r\n\r\nindir = ParamFromFile(os.path.join(curdir, \'indir\'))\r\ndoctype = ParamFromFile(os.path.join(curdir, \'doctype\'))\r\naccess = ParamFromFile(os.path.join(curdir, \'access\'))\r\ntry:\r\n sysno = int(ParamFromFile(os.path.join(curdir, \'SN\')).strip())\r\nexcept:\r\n sysno = -1\r\nln = ParamFromFile(os.path.join(curdir, \'ln\'))\r\n\r\n\"\"\"\r\nRun the following to get the list of parameters of function \'create_file_upload_interface\':\r\necho -e \'from invenio.websubmit_managedocfiles import create_file_upload_interface as f\\nprint f.__doc__\' | python\r\n\"\"\"\r\ntext = create_file_upload_interface(recid=sysno,\r\n print_outside_form_tag=False,\r\n include_headers=True,\r\n ln=ln,\r\n doctypes_and_desc=[(\'main\',\'Main document\'),\r\n (\'additional\',\'Figure, schema, etc.\')],\r\n can_revise_doctypes=[\'*\'],\r\n can_describe_doctypes=[\'main\'],\r\n can_delete_doctypes=[\'additional\'],\r\n can_rename_doctypes=[\'main\'],\r\n sbm_indir=indir, sbm_doctype=doctype, sbm_access=access)[1]\r\n','0000-00-00','0000-00-00',NULL,NULL,0);" | ${prefix}/bin/dbexec
echo "INSERT INTO sbmFUNDESC VALUES ('Move_Uploaded_Files_to_Storage','forceFileRevision');" | ${prefix}/bin/dbexec
echo "INSERT INTO sbmALLFUNCDESCR VALUES ('Create_Upload_Files_Interface','Display generic interface to add/revise/delete files. To be used before function \"Move_Uploaded_Files_to_Storage\"');" | ${prefix}/bin/dbexec
echo "INSERT INTO sbmALLFUNCDESCR VALUES ('Move_Uploaded_Files_to_Storage','Attach files uploaded with \"Create_Upload_Files_Interface\"')" | ${prefix}/bin/dbexec
echo "INSERT INTO sbmFUNDESC VALUES ('Move_Revised_Files_to_Storage','elementNameToDoctype');" | ${prefix}/bin/dbexec
echo "INSERT INTO sbmFUNDESC VALUES ('Move_Revised_Files_to_Storage','createIconDoctypes');" | ${prefix}/bin/dbexec
echo "INSERT INTO sbmFUNDESC VALUES ('Move_Revised_Files_to_Storage','createRelatedFormats');" | ${prefix}/bin/dbexec
echo "INSERT INTO sbmFUNDESC VALUES ('Move_Revised_Files_to_Storage','iconsize');" | ${prefix}/bin/dbexec
echo "INSERT INTO sbmFUNDESC VALUES ('Move_Revised_Files_to_Storage','keepPreviousVersionDoctypes');" | ${prefix}/bin/dbexec
echo "INSERT INTO sbmALLFUNCDESCR VALUES ('Move_Revised_Files_to_Storage','Revise files initially uploaded with \"Move_Files_to_Storage\"')" | ${prefix}/bin/dbexec
echo "INSERT INTO sbmFUNDESC VALUES ('Create_Upload_Files_Interface','maxsize');" | ${prefix}/bin/dbexec
echo "INSERT INTO sbmFUNDESC VALUES ('Create_Upload_Files_Interface','minsize');" | ${prefix}/bin/dbexec
echo "INSERT INTO sbmFUNDESC VALUES ('Create_Upload_Files_Interface','doctypes');" | ${prefix}/bin/dbexec
echo "INSERT INTO sbmFUNDESC VALUES ('Create_Upload_Files_Interface','restrictions');" | ${prefix}/bin/dbexec
echo "INSERT INTO sbmFUNDESC VALUES ('Create_Upload_Files_Interface','canDeleteDoctypes');" | ${prefix}/bin/dbexec
echo "INSERT INTO sbmFUNDESC VALUES ('Create_Upload_Files_Interface','canReviseDoctypes');" | ${prefix}/bin/dbexec
echo "INSERT INTO sbmFUNDESC VALUES ('Create_Upload_Files_Interface','canDescribeDoctypes');" | ${prefix}/bin/dbexec
echo "INSERT INTO sbmFUNDESC VALUES ('Create_Upload_Files_Interface','canCommentDoctypes');" | ${prefix}/bin/dbexec
echo "INSERT INTO sbmFUNDESC VALUES ('Create_Upload_Files_Interface','canKeepDoctypes');" | ${prefix}/bin/dbexec
echo "INSERT INTO sbmFUNDESC VALUES ('Create_Upload_Files_Interface','canAddFormatDoctypes');" | ${prefix}/bin/dbexec
echo "INSERT INTO sbmFUNDESC VALUES ('Create_Upload_Files_Interface','canRestrictDoctypes');" | ${prefix}/bin/dbexec
echo "INSERT INTO sbmFUNDESC VALUES ('Create_Upload_Files_Interface','canRenameDoctypes');" | ${prefix}/bin/dbexec
echo "INSERT INTO sbmFUNDESC VALUES ('Create_Upload_Files_Interface','canNameNewFiles');" | ${prefix}/bin/dbexec
echo "INSERT INTO sbmFUNDESC VALUES ('Create_Upload_Files_Interface','createRelatedFormats');" | ${prefix}/bin/dbexec
echo "INSERT INTO sbmFUNDESC VALUES ('Create_Upload_Files_Interface','keepDefault');" | ${prefix}/bin/dbexec
echo "INSERT INTO sbmFUNDESC VALUES ('Create_Upload_Files_Interface','showLinks');" | ${prefix}/bin/dbexec
echo "INSERT INTO sbmFUNDESC VALUES ('Create_Upload_Files_Interface','fileLabel');" | ${prefix}/bin/dbexec
echo "INSERT INTO sbmFUNDESC VALUES ('Create_Upload_Files_Interface','filenameLabel');" | ${prefix}/bin/dbexec
echo "INSERT INTO sbmFUNDESC VALUES ('Create_Upload_Files_Interface','descriptionLabel');" | ${prefix}/bin/dbexec
echo "INSERT INTO sbmFUNDESC VALUES ('Create_Upload_Files_Interface','commentLabel');" | ${prefix}/bin/dbexec
echo "INSERT INTO sbmFUNDESC VALUES ('Create_Upload_Files_Interface','restrictionLabel');" | ${prefix}/bin/dbexec
echo "INSERT INTO sbmFUNDESC VALUES ('Create_Upload_Files_Interface','startDoc');" | ${prefix}/bin/dbexec
echo "INSERT INTO sbmFUNDESC VALUES ('Create_Upload_Files_Interface','endDoc');" | ${prefix}/bin/dbexec
echo "INSERT INTO sbmFUNDESC VALUES ('Create_Upload_Files_Interface','defaultFilenameDoctypes');" | ${prefix}/bin/dbexec
echo "INSERT INTO sbmFUNDESC VALUES ('Create_Upload_Files_Interface','maxFilesDoctypes');" | ${prefix}/bin/dbexec
echo "INSERT INTO sbmFUNDESC VALUES ('Move_Uploaded_Files_to_Storage','iconsize');" | ${prefix}/bin/dbexec
echo "INSERT INTO sbmFUNDESC VALUES ('Move_Uploaded_Files_to_Storage','createIconDoctypes');" | ${prefix}/bin/dbexec
echo "INSERT INTO sbmFUNDESC VALUES ('Report_Number_Generation','nblength');" | ${prefix}/bin/dbexec
echo "INSERT INTO sbmFUNDESC VALUES ('Second_Report_Number_Generation','2nd_nb_length');" | ${prefix}/bin/dbexec
echo "INSERT INTO sbmFUNDESC VALUES ('Get_Recid','record_search_pattern');" | ${prefix}/bin/dbexec
echo "INSERT INTO sbmALLFUNCDESCR VALUES ('Move_FCKeditor_Files_to_Storage','Transfer files attached to the record with the FCKeditor');" | ${prefix}/bin/dbexec
echo "INSERT INTO sbmFUNDESC VALUES ('Move_FCKeditor_Files_to_Storage','input_fields');" | ${prefix}/bin/dbexec
echo "INSERT INTO sbmFUNDESC VALUES ('Stamp_Uploaded_Files','layer');" | ${prefix}/bin/dbexec
echo "INSERT INTO sbmFUNDESC VALUES ('Stamp_Replace_Single_File_Approval','layer');" | ${prefix}/bin/dbexec
echo "INSERT INTO sbmFUNDESC VALUES ('Stamp_Replace_Single_File_Approval','switch_file');" | ${prefix}/bin/dbexec
echo "INSERT INTO sbmFUNDESC VALUES ('Stamp_Uploaded_Files','switch_file');" | ${prefix}/bin/dbexec
echo "INSERT INTO sbmFUNDESC VALUES ('Move_Files_to_Storage','paths_and_restrictions');" | ${prefix}/bin/dbexec
echo "INSERT INTO sbmFUNDESC VALUES ('Move_Files_to_Storage','paths_and_doctypes');" | ${prefix}/bin/dbexec
echo "ALTER TABLE cmtRECORDCOMMENT ADD round_name varchar(255) NOT NULL default ''" | ${prefix}/bin/dbexec
echo "ALTER TABLE cmtRECORDCOMMENT ADD restriction varchar(50) NOT NULL default ''" | ${prefix}/bin/dbexec
echo "ALTER TABLE cmtRECORDCOMMENT ADD in_reply_to_id_cmtRECORDCOMMENT int(15) unsigned NOT NULL default '0'" | ${prefix}/bin/dbexec
echo "ALTER TABLE cmtRECORDCOMMENT ADD KEY in_reply_to_id_cmtRECORDCOMMENT (in_reply_to_id_cmtRECORDCOMMENT);" | ${prefix}/bin/dbexec
echo "ALTER TABLE bskRECORDCOMMENT ADD in_reply_to_id_bskRECORDCOMMENT int(15) unsigned NOT NULL default '0'" | ${prefix}/bin/dbexec
echo "ALTER TABLE bskRECORDCOMMENT ADD KEY in_reply_to_id_bskRECORDCOMMENT (in_reply_to_id_bskRECORDCOMMENT);" | ${prefix}/bin/dbexec
echo "ALTER TABLE cmtRECORDCOMMENT ADD reply_order_cached_data blob NULL default NULL;" | ${prefix}/bin/dbexec
echo "ALTER TABLE bskRECORDCOMMENT ADD reply_order_cached_data blob NULL default NULL;" | ${prefix}/bin/dbexec
echo "ALTER TABLE cmtRECORDCOMMENT ADD INDEX (reply_order_cached_data(40));" | ${prefix}/bin/dbexec
echo "ALTER TABLE bskRECORDCOMMENT ADD INDEX (reply_order_cached_data(40));" | ${prefix}/bin/dbexec
echo -e 'from invenio.webcommentadminlib import migrate_comments_populate_threads_index;\
migrate_comments_populate_threads_index()' | $(PYTHON)
echo -e 'from invenio.access_control_firerole import repair_role_definitions;\
repair_role_definitions()' | $(PYTHON)
CLEANFILES = *~ *.pyc *.tmp
diff --git a/NEWS b/NEWS
index a2397db70..a5e695169 100644
--- a/NEWS
+++ b/NEWS
@@ -1,1713 +1,1868 @@
Invenio NEWS
============
Here is a short summary of the most notable changes in Invenio
releases. For more information about the current release, please
consult RELEASE-NOTES. For more information about changes, please
consult ChangeLog.
+Invenio v1.1.2 -- released 2013-08-19
+-------------------------------------
+
+ *) BibAuthorID: fix in name comparisons (#1313 #1314); improvements
+ and fixes; improvements, fixes and optimizations; UI and backend
+ improvements
+
+ *) BibCatalog: removal of print statement (#1337)
+
+ *) BibClassify: escape keywords in tag cloud and MARCXML
+
+ *) BibDocFile: better JS washing in web UI; display file upload
+ progress (#1020 #1021); display "Restricted" label correctly
+ (#1299); fix check-md5 with bibdocfsinfo cache (#1249); fix
+ check-md5 with bibdocfsinfo cache (#1249); fix error in calling
+ register_download (#1311); handling of exceptions in Md5Folder
+ (#1060); revert md5 property patch (#1249); support new magic
+ library (#1207)
+
+ *) BibEncode: minor fix in process_batch_job()
+
+ *) BibFormat: additional fulltext file display in HB (#1219); checks
+ for bibformat bin; fix CLI call to old PHP-based formatter; fixes
+ unit tests (#1320); fix for fulltext file format; fix snippets for
+ phrase queries (#1201); format_element initialisation fix; passing
+ of user_info for Excel format; replacement of CDS Invenio by
+ Invenio; setUp/tearDown in unit tests (#1319); skip hidden icons
+ in OpenGraph image tag
+
+ *) BibIndex: better wording for stemming in admin UI; replacement of
+ CDS Invenio by Invenio; synonym indexing speed up (#1484); use
+ human friendly index name (#1329)
+
+ *) BibKnowledge: /kb/export 500 error fix; optional memoisation of
+ KBR lookups (#1484)
+
+ *) BibMerge: delete cache file on submit
+
+ *) BibSched: bibupload max_priority check; bugfix for high-priority
+ monotasks; increases size of monitor columns;
+ parse_runtime_limit() fix (#1432); parse_runtime_limit() tests fix
+ (#1432)
+
+ *) BibUpload: FMT regression test case fix (#1152); indicators in
+ strong tags (#939)
+
+ *) CKEditor: updated to version 3.6.6
+
+ *) dateutils: strftime improvement (#1065); strptime for Python-2.4
+ compatibility
+
+ *) errorlib: hiding bibcatalog info in exception body
+
+ *) global: test suite nosification
+
+ *) htmlutils: fix single quote escaping; improve js string escaping;
+ MathJax 2.1 (#1050)
+
+ *) I18N: updates to Catalan and Spanish translations
+
+ *) installation: fix collectiondetailedrecordpagetabs (#1496); fix
+ for jQuery hotkeys add-on URL (#1507); fix for MathJax OS X
+ install issue (#1455); support for Apache-2.4 (#1552)
+
+ *) inveniocfg: tests runner file closure fix (#1327)
+
+ *) InvenioConnector: fix for CDS authentication; mechanize dependency
+
+ *) inveniogc: consider journal cache subdirs
+
+ *) memoiseutils: initial release
+
+ *) OAIHarvest: fix path for temporary authorlists; holding-pen UI
+ bugfixes (#1401)
+
+ *) OAIRepository: CFG_OAI_REPOSITORY_MARCXML_SIZE; no bibupload -n
+
+ *) RefExtract: replacement of CDS Invenio by Invenio
+
+ *) WebAccess: fix variable parsing in robot auth (#1456); IP-based
+ rules and offline user fix (#1233); replacement of CDS Invenio by
+ InveniO
+
+ *) WebApiKey: renames unit tests to regression tests (#1324)
+
+ *) WebAuthorProfile: fix XSS vulnerability
+
+ *) WebComment: escape review "title"; escape review "title"
+
+ *) WebSearch: 410 HTTP code for deleted records; advanced search
+ notification if no hits; better cleaning of word patterns; fix
+ infinite synonym lookup cases (#804); handles "find feb 12"
+ (#948); nicer browsing of fuzzy indexes (#1348); respect default
+ `rg` in Advanced Search; SPIRES date math search fixes (#431
+ #948); SPIRES invalid date search fix (#1467); tweaks SPIRES
+ two-digit search; unit test disabling for CFG_CERN_SITE; unit test
+ update (#1326)
+
+ *) WebSession: fix for list of admin activities (#1444); login_method
+ changes; unit vs regression test suite cleanup
+
+ *) WebStat: use CFG_JOURNAL_TAG instead of 773/909C4 (#546)
+
+ *) WebSubmit: new websubmitadmin CLI (#1334); replacement of CDS
+
+Invenio v1.0.5 -- released 2013-08-19
+-------------------------------------
+
+ *) BibClassify: escape keywords in tag cloud and MARCXML
+
+ *) BibDocFile: support new magic library
+
+ *) BibFormat: additional fulltext file display in HB; fix CLI call to
+ old PHP-based formatter; format_element initialisation fix
+
+ *) BibIndex: better wording for stemming in admin UI
+
+ *) BibKnowledge: /kb/export 500 error fix
+
+ *) BibUpload: FMT regression test case fix; indicators in strong tags
+
+ *) errorlib: hiding bibcatalog info in exception body
+
+ *) global: test suite nosification
+
+ *) installation: fix collectiondetailedrecordpagetabs; support for
+ Apache-2.4
+
+ *) WebAccess: IP-based rules and offline user fix; replacement of CDS
+ Invenio by InveniO
+
+ *) WebComment: escape review "title"
+
+ *) WebSearch: respect default `rg` in Advanced Search
+
+ *) WebSession: fix for list of admin activities; login_method changes
+
+ *) WebSubmit: new websubmitadmin CLI
+
+CDS Invenio v0.99.8 -- released 2013-08-19
+------------------------------------------
+
+ *) escape keywords in tag cloud and MARCXML (BibClassify)
+
+ *) fix CLI call to old PHP-based formatter; fix format_element
+ initialisation (BibFormat)
+
+ *) better wording for stemming in admin UI (BibIndex)
+
+ *) IP-based rules and offline user fix (WebAccess)
+
+ *) escape review "title" (WebComment)
+
+ *) fix collectiondetailedrecordpagetabs (installation)
+
Invenio v1.1.1 -- released 2012-12-21
-------------------------------------
*) BatchUploader: error reporting improvements
*) BibAuthorID: arXiv login upgrade; fix for small bug in claim
interface
*) BibConvert: fix bug with SPLITW function; target/source CLI flag
description fix
*) BibDocFile: better error report for unknown format; explicit
redirection to secure URL; fix for file upload in submissions
*) BibEdit: 'bibedit' CSS class addition to page body
*) BibFormat: clean Default_HTML_meta template; fix for js_quicktags
location; ISBN tag update for meta format; "ln" parameter in
bfe_record_url output; meta header output fix; relator code filter
in bfe_authors; fix for reformatting by record IDs
*) errorlib: register_exception improvements
*) global: login link using absolute URL redirection
*) installation: aidUSERINPUTLOG consistency upgrade; bigger
hstRECORD.marcxml size; fix for wrong name in tabcreate; inclusion
of JS quicktags in tarball; mark upgrade recipes as applied;
rephrase 1.1 upgrade recipe warning; safer upgrader bibsched
status parse; strip spaces in CFG list values
*) jQuery: tablesorter location standardisation
*) mailutils: authentication and TLS support
*) OAIRepository: Edit OAI Set page bug fix; fix for OAI set editing;
print_record() fixes
*) plotextractor: washing of captions and context
*) pluginutils: fix for failing bibformat test case
*) solrutils: addition of files into release tarball
*) WebAccess: admin interface usability improvement; guest unit tests
for firerole
*) WebAlert: new regression tests for alerts
*) WebComment: cleaner handling of non-reply comments
*) WebJournal: better language handling in widgets; CERN-specific
translation; explicit RSS icon dimensions; fix for
CFG_TMPSHAREDDIR; fix for retrieval of deleted articles; search
select form by name
*) WebSearch: fix for webcoll grid layout markup;
get_all_field_values() typo; next-hit/previous-hit numbering fix;
respect output format content-type; washing of 'as' argument
*) WebSession: fix for login-with-referer issue; fix for
merge_usera_into_userb()
*) WebStyle: dumb page loading fix Google Analytics documentation
update; memory leak fix in session handling; new /ping handler;
removal of excess language box call; req.is_https() fix;
*) WebSubmit: display login link on /submit page; fix for
Send_APP_Mail function; fix the approval URL for publiline
*) WebUser: fix for referer URL protocol
Invenio v1.0.4 -- released 2012-12-21
-------------------------------------
*) installation: inclusion of JS quicktags in tarball
*) bibdocfile: better error report for unknown format
*) WebAccess: admin interface usability improvement
Invenio v1.0.3 -- released 2012-12-19
-------------------------------------
*) BatchUploader: error reporting improvements
*) BibConvert: fix bug with SPLITW function; target/source CLI flag
description fix
*) BibEdit: 'bibedit' CSS class addition to page body
*) BibFormat: fix for js_quicktags location
*) jQuery: tablesorter location standardisation
*) WebComment: cleaner handling of non-reply comments
*) WebJournal: explicit RSS icon dimensions; fix for
CFG_TMPSHAREDDIR; fix for retrieval of deleted articles
*) WebSearch: external search pattern_list escape fix; respect output
format content-type; washing of 'as' argument
*) WebStyle: dumb page loading fix; Google Analytics documentation
update; memory leak fix in session handling; new /ping handler;
removal of excess language box call; req.is_https() fix
*) WebSubmit: fix for Send_APP_Mail function
*) WebUser: fix for referer URL protocol
CDS Invenio v0.99.7 -- released 2012-12-18
------------------------------------------
*) Google Analytics documentation update (WebStyle)
*) target/source CLI flag description fix (BibConvert)
Invenio v1.1.0 -- released 2012-10-21
-------------------------------------
*) BatchUploader: RESTful interface, runtime checks, TextMARC input,
job priority selection
*) BibAuthorID: new automatic author disambiguation and paper
claiming facility
*) BibCatalog: storage of ticket requestor, default RT user
*) BibCirculation: security fixes
*) BibClassify: UI improvements and refactoring
*) BibConvert: new BibTeX-to-MARCXML conversion, new oaidmf2marcxml
conversion, fixes for WORDS
*) BibDocFile: new filesystem cache for faster statistics, caseless
authorisation, disable HTTP range requests, improve file format
policies, and more
*) BibEdit: new options related to preview and printing, reference
curation, autocompletion, record and field template manager,
editing fields and subfields, per-collection authorisations, use
of knowledge bases, and more
*) BibEditMulti: new actions with conditions on fields, partial
matching for subfields, faster preview generation, and more
*) BibEncode: new audio and video media file processing tool, new
Video demo collection
*) BibFormat: new full-text snippet display facility, new
configuration for I18N caching, updates to EndNote, Excel, Dublin
Core and other formats, updates to formatting elements such as
DOI, author, updates to podcast output, updates to XSLT
processing, and more
*) OAIHarvest: new configurable workflow with reference extraction,
new author list extraction post process, upload priority, OpenAIRE
compliance, better handling of timeouts, and more
*) BibIndex: new full-text indexing via Solr, new support for author
ID indexing, better author tokeniser
*) BibKnowledge: dynamic knowledge bases for record editor, support
for JSON format
*) BibMatch: new matching of restricted collections
*) BibMerge: subfield order in slave record, confirmation pop up,
record selection bug fix
*) BibRank: new index term count ranking method, new support for flot
graphs, updates to citation graphs
*) BibRecord: new possibility to use lxml parser, sanity checks
*) BibSched: new motd-like facility for queue monitor, new
continuable error status for tasks, new tasklet framework, new
multi-node support, new monotask support, new support for task
sequences, improvements to scheduling algorithm
*) BibSort: new in-memory fast sorting tool using configurable
buckets
*) BibUpload: new automatic generation of MARC tag 005, new
`--callback-url' CLI parameter, fixes for appending existing
files, fixes for multiple 001 tags, and more
*) WebAccess: new external person ID support, performance
improvements, robot manager UI improvements, fixes for firerole
handling,
*) WebAlert: new alert description facility, fixes for restricted
collections
*) WebApiKey: new user-signed Web API key facility
*) WebAuthorProfile: new author pages with dynamic box layout
*) WebBasket: add to basket interface improvements, better XML
export, fixes for external records and other improvements
*) WebComment: new collapsible comment support, new permalink to
comments, loss prevention of unsubmitted comments, tidying up HTML
markup of comments, and more
*) WebJournal: new Open Graph markup, more customisable newsletter,
redirect to latest release of specific category, refresh chosen
collections on release, remove unnecessary encoding/decoding,
update weather widget for new APIs, and more
*) WebSearch: new index-time and search-time synonym support, new
Open Graph markup, new Google Scholar friendly metadata in page
header, new limit option for wildcard queries, new support for
access to merged records, new next/previous/back link support, new
`authorcount' indexing and searching, new relative date search
facility, clean OpenSearch support, improved speed, improvements
to SPIRES query syntax support, improvements to self-cite math,
primary collection guessing, other numerous fixes
*) WebSession: new useful guest sessions, reintroduces configurable
IP checking, enforcement of nickname refresh, several other fixes
*) WebStat: new login statistics, new custom query summary, error
analyser, custom event improvements
*) WebStyle: new display restriction flag for restricted records, new
initial right-to-left language support, authenticated user and
HTTPS support, IP check for proxy configurations, layout updates
and fixes for MSIE, and more
*) WebSubmit: new initial support for converting to PDF/X, new
embargo support, better LibreOffice compatibility, better async
file upload, enhancements for Link_Records, support for hiding
HIDDEN files in document manager, configurable initial value for
counter, make use of BibSched task sequences, and more
*) installation: updates to jQuery, CKEditor, unoconv, and other
prerequisites
*) dbdump: new compression support, reworked error handling
*) dbquery: new possibility to query DB slave nodes, new dict-like
output, fix for MySQL 5.5.3 and higher versions
*) errorlib: stack analysis improvements, outline style improvements
for invenio.err
*) htmlutils: improvements to HTML markup removal, HTML tidying
*) I18N: new Arabic and Lithuanian translations, updates to Catalan,
Czech, French, German, Greek, Italian, Russian, Slovak, Spanish
translations
*) intbitset: new performance improvements, new get item support, new
pickle support, several memory leak fixes
*) inveniocfg: new automated Invenio Upgrader tool
*) InvenioConnector: new search with retries, improved search
parameters, improved local site check, use of Invenio user agent
*) jsonutils: new JSON utility library
*) mailutils: possibility to specify Reply-To header, fixes to
multipart
*) plotextractor: better TeX detection, better PDF harvesting from
arXiv, configurable sleep timer
*) pluginutils: new create_enhanced_plugin_builder API, external
plugin loading
*) RefExtract: new daemon operation mode, new DOI recognition, better
author recognition, new author knowledge base
*) remote debugger: new remote debuggng support
*) sequtils: new sequence generator tool
*) solrutils: new support for full-text query dispatching to Solr
*) testutils: new Selenium web test framework
*) textutils: updates to string-to-ascii functions, LaTeX symbols to
Unicode
*) urlutils: fix for redirect_to_url
*) xmlmarclint: fix for error report formatting
*) ... and other numerous smaller fixes and improvements
Invenio v1.0.2 -- released 2012-10-19
-------------------------------------
*) BibConvert: fix for static files in admin guide
*) BibEdit: regression test case fix
*) BibFormat: fix call to bfe_primary_report_number; revert fix for
format validation report
*) BibHarvest: OAI harvesting via HTTP proxy
*) BibRank: begin_date initialisation in del_recids(); INSERT DELAYED
INTO rnkPAGEVIEWS; user-friendlier message for similar docs
*) BibUpload: clarify correct/replace mode help
*) WebJournal: catch ValueError when reading cache; use
CFG_TMPSHAREDDIR in admin UI
*) WebSearch: allow webcoll to query hidden tags; external collection
search fix; external search XSS vulnerability fix; fix for
parentheses inside quotes; get_collection_reclist() fix; more uses
of `rg` configurable default; 'verbose' mode available to admins
only; XSS and verbose improvements
*) WebSession: fix possibly undefined variables; prevent nickname
modification
*) WebStyle: workaround IE bug with cache and HTTPS
*) WebSubmit: configurable Document File Manager; fix JS check for
mandatory fields; unoconv calling fix
*) bibdocfile: guess_format_from_url() improvement;
guess_format_from_url() improvements; INSERT DELAYED INTO
rnkDOWNLOADS
*) global: removal of psyco
*) I18N: Spanish and Catalan updates to Search Tips; updates to
German translation
*) installation: fix for jQuery UI custom; fix md5sum example
arguments; new index on session.session_expiry
*) intbitset: fix memory leak
*) inveniogc: tmp directory removal improvements
*) urlutils: MS Office redirection workaround
CDS Invenio v0.99.6 -- released 2012-10-18
------------------------------------------
*) improved XSS safety in external collection searching (WebSearch)
*) verbose level in the search results pages is now available only to
admins, preventing potential restricted record ID disclosure even
though record content would remain restricted (WebSearch)
Invenio v1.0.1 -- released 2012-06-28
-------------------------------------
*) BibFormat: fix format validation report; fix opensearch prefix
exclusion in RSS; fix retrieval of collection identifier
*) BibIndex: new unit tests for the Greek stemmer
*) BibSched: improve low level submission arg parsing; set ERROR
status when wrong params; task can stop immediately when sleeping
*) BibSword: remove dangling documentation
*) BibUpload: fix setting restriction in -a/-ir modes
*) WebAlert: simplify HTML markup
*) WebComment: only logged users to use report abuse
*) WebJournal: hide deleted records
*) WebSearch: adapt test cases for citation summary; fix collection
order on the search page; look at access control when webcolling;
sorting in citesummary breakdown links
*) WebSession: simplify HTML markup
*) WebSubmit: capitalise doctypes in Doc File Manager; check
authorizations in endaction; check for problems when archiving;
ensure unique tmp file name for upload; fix email formatting; fix
Move_to_Done function; remove 8564_ field from demo templates;
skip file upload if necessary; update CERN-specific config
*) bibdocfile: BibRecDocs recID argument type check
*) data cacher: deletes cache before refilling it
*) dbquery: fix dbexec CLI WRT max allowed packet
*) I18N: updates to Greek translation
*) installation: fix circular install-jquery-plugins; fix demo user
initialisation; fix jQuery tablesorter download URL; fix jQuery
uploadify download URL; more info about max_allowed_packet; remove
unneeded rxp binary package
Invenio v1.0.0 -- released 2012-02-29
-------------------------------------
*) BatchUploader: fix retrieval of recs from extoaiid
*) BibCirculation: fix regexp for dictionary checking; security check
before eval
*) BibConvert: fix UP and DOWN for UTF-8 strings
*) bibdocfile: add missing normalize_format() calls;
check_bibdoc_authorization caseless; fix append WRT
description/restriction; fix cli_set_batch function; fix
documentation WRT --with-version; fix handling of embargo firerole
rule; fix parsing of complex subformats
*) BibEdit: fix crash in Ajax request; fix undefined dictionary key
*) BibFormat: better escape BFE in admin test UI; do not exit if no
XSLT processor found; fix regression test; fix URL to ejournal
resolver; fix XSLT formatting of MARCXML snippets; removes 'No
fulltext' message; special handling of INSPIRE-PUBLIC type; use
default namespace in XSL
*) BibHarvest: check for empty resumptionToken; fix MARCXML creation
in OAI updater; optional JSON dependency
*) BibIndex: fix author:Campbell-Wilson word query; fix
double-stemming upon indexing; fix Porter stemmer in multithread;
Greek stemmer improvements
*) BibKnowledge: make XML/XSLT libs optional
*) BibRank: CERN hack to inactivate similarity lists; fix citation
indexer time stamp updating; fix citation indexing of deleted
records; fix citedby/refersto for infinite sets; fix empty
citation data cacher; fix incremental citation indexer leaks; make
numpy optional; minimum x-axis in citation history graphs; run
citation indexer after word indexer
*) BibRecord: fix for record_get_field_instances()
*) BibSched: fix guess_apache_process_user_from_ps; use larger
timouts for launching tasks
*) BibUpload: FFT regression tests not to use CDS
*) htmlutils: fix FCKeditor upload URLs
*) installation: add note about optional hashlib; change table TYPE
to ENGINE in SQL; fix 'install-mathjax-plugin'; fix issue with
FCKeditor; fix 'make install-jquery-plugins'; fix output message
cosmetics; new 'make install-ckeditor-plugin'; re-enable WSGI
pre-loading
*) intbitset: fix never ending loop in __repr__; fix several memory
leaks
*) inveniocfg: fix resetting ranking method names
*) inveniogc: new CLI options check/optimise tables
*) kwalitee: grep-like output and exit status changes; use
`--check-some` as default CLI option
*) mailutils: remove unnecessary 'multipart/related'
*) plotextractor: fix INSPIRE unit test
*) textmarc2xmlmarc: fix handling of BOM
*) urlutils: new Indico request generator helper
*) WebAccess: fix Access policy page; fix FireRole handling integer
uid; fix retrieving emails from firerole
*) WebAlert: fix the display of records in alerts
*) WebBasket: fix missing return statement; fix number of items in
public baskets
*) WebComment: CERN-specific hack for ATLAS comments; fix discussion
display in bfe_comments; fix washing of email to admin; improve
sanity checks
*) WebHelp: HOWTO MARC document update
*) WebJournal: fix seminar widget encoding issue; fix seminar widget
for new Indico APIs; update weather widget for new APIs
*) WebSearch: add refersto:/a b c/ example to guide; CERN-specific
hack for journal sorting; CERN-specific hack for latest additions;
fix case-insensitive collection search; fix CDSIndico external
search; fix collection translation in admin UI; fix
get_fieldvalues() when recid is str; fix
get_index_id_from_field(); fix structured regexp query parsing;
fix symbol name typo in loop checking; parenthesised collection
definitions; remove accent-search warning in guide; remove Report
for INSPIRE author pages; replace CDS Indico by Indico; updates
some output phrases
*) WebSession: fix crash when no admin user exists
*) WebStyle: better service failure message; fix implementation of
req.get_hostname; fluid width of the menu; pre-load citation
dictionaries for web
*) WebSubmit: avoid printing empty doctype section;
check_user_can_view_record in publiline; fix filename bug in
document manager; fix handling of uploaded files; fix
record_search_pattern in DEMOJRN
*) xmlmarclint: 'no valid record detected' error
*) I18N: updates to Catalan, Czech, French, German, Greek, Italian,
Slovak, and Spanish translations
*) Note: for a complete list of new features in Invenio v1.0 release
series over Invenio v0.99 release series, please see:
CDS Invenio v0.99.5 -- released 2012-02-21
------------------------------------------
*) improved sanity checks when reporting, voting, or replying to a
comment, or when accessing comment attachments, preventing URL
mangling attempts (WebComment)
CDS Invenio v0.99.4 -- released 2011-12-19
------------------------------------------
*) fixed double stemming during indexing (BibIndex)
*) fixed collection translation in admin UI (WebSearch)
*) fixed UP and DOWN functions for UTF-8 strings (BibConvert)
Invenio v1.0.0-rc0 -- released 2010-12-21
-----------------------------------------
*) CDS Invenio becomes Invenio as of this release
*) new facility of hosted collections; support for external records
in search collections, user alerts and baskets (WebSearch,
WebAlert, WebBasket)
*) support for nested parentheses in search query syntax (WebSearch)
*) new refersto/citedby search operators for second-order searches in
citation map (BibRank, WebSearch)
*) numerous improvements to SPIRES query syntax parser (WebSearch)
*) enhancement to search results summaries, e.g. co-author lists on
author pages, e.g. h-index (WebSearch)
*) new support for unAPI, Zotero, OpenSearch, AWS (WebSearch)
*) new phrase and word-pair indexes (BibIndex)
*) new fuzzy author name matching mode (BibIndex)
*) new time-dependent citation ranking family of methods (BibRank)
*) full-text search now shows context snippets (BibFormat)
*) improvements to the basket UI, basket export facility (WebBasket)
*) new support for FCKeditor in submissions and user comments,
possibility to attach files (WebComment, WebSubmit)
*) commenting facility enhanced with rounds and threads (WebComment)
*) new facility to moderate user comments (WebComment)
*) enhanced CLI tool for document file management bringing new
options such as hidden file flag (WebSubmit)
*) numerous improvements to the submission system, e.g. asynchronous
JavaScript upload support, derived document formats, icon
creation, support for automatic conversion of OpenOffice
documents, PDF/A, OCR (WebSubmit)
*) new full-text file metadata reader/writer tool (WebSubmit)
*) new experimental SWORD protocol client application (BibSword)
*) complete rewrite of the record editor using Ajax technology for
faster user operation, with new features such as field templates,
cloning, copy/paste, undo/redo, auto-completion, etc (BibEdit)
*) new multi-record editor to alter many records in one go (BibEdit)
*) new Ajax-based record differ and merger (BibMerge)
*) new fuzzy record matching mode, with possibility to match records
against remote Invenio installations (BibMatch)
*) new circulation and holdings module (BibCirculation)
*) new facility for matching provenance information when uploading
records (BibUpload)
*) new possibility of uploading incoming changes into holding pen
(BibUpload)
*) new batch uploader facility to support uploading of metadata files
and of full-text files either in CLI or over web (BibUpload)
*) new record exporting module supporting e.g. Sitemap and Google
Scholar export methods (BibExport)
*) improvements to the keyword classifier, e.g. author and core
keywords (BibClassify)
*) new facility for external robot-like login method (WebAccess)
*) numerous improvements to the journal creation facility, new
journal `Atlantis Times' demo journal (WebJournal)
*) refactored and improved OAI exporter and harvester (BibHarvest)
*) new taxonomy-based and dynamic-query knowledge base types
(BibKnowledge)
*) possibility to switch on/off user features such as alerts and
baskets based on RBAC rules (WebAccess and other modules)
*) various improvements to task scheduler, for example better
communication with tasks, possibility to run certain bibsched
tasks within given time limit, etc (BibSched)
*) new database dumper for backup purposes (MiscUtil)
*) new plotextractor library for extracting plots from compuscripts,
new figure caption index and the Plots tab (MiscUtil, BibIndex,
Webearch)
*) enhanced reference extrator, e.g. support for DOI, for author name
recognition (MiscUtil)
*) new register emergency feature e.g. to alert admins by SMS in case
the task queue stops (MiscUtil)
*) infrastructure move from mod_python to mod_wsgi, support for
mod_xsendfile (WebStyle and many modules)
*) infrastructure move from jsMath to MathJax (MiscUtil)
*) some notable backward-incompatible changes: removed authentication
methods related to Apache user and group files, changed BibFormat
element's API (BibFormat, many modules)
*) new translations (Afrikaans, Galician, Georgian, Romanian,
Kinyarwanda) plus many translation updates
*) other numerous improvements and bug fixes done in about 1600
commits over Invenio v0.99 series
CDS Invenio v0.99.3 -- released 2010-12-13
------------------------------------------
*) fixed issues in the harvesting daemon when harvesting from more
than one OAI repository (BibHarvest)
*) fixed failure in formatting engine when dealing with
not-yet-existing records (BibFormat)
*) fixed traversal of final URL parts in the URL dispatcher
(WebStyle)
*) improved bibdocfile URL recognition upon upload of MARC files
(BibUpload)
*) fixed bug in admin interface for adding authorizations (WebAccess)
*) keyword extractor is now compatible with rdflib releases older
than 2.3.2 (BibClassify)
*) output of `bibsched status' now shows the queue mode status as
AUTOMATIC or MANUAL to help queue monitoring (BibSched)
CDS Invenio v0.99.2 -- released 2010-10-20
------------------------------------------
*) stricter checking of access to restricted records: in order to
view a restricted record, users are now required to have
authorizations to access all restricted collections the given
record may belong to (WebSearch)
*) strict checking of user query history when setting up email
notification alert, preventing URL mangling attempts (WebAlert)
*) fixed possible Unix signal conflicts for tasks performing I/O
operations or running external processes, relevant notably to
full-text indexing of remote files (BibSched)
*) fixed full-text indexing and improved handling of files of
`unexpected' extensions (BibIndex, WebSubmit)
*) streaming of files of `unknown' MIME type now defaults to
application/octet-stream (WebSubmit)
*) fixed addition of new MARC fields in the record editor (BibEdit)
*) fixed issues in full-text file attachment via MARC (BibUpload)
*) fixed authaction CLI client (WebAccess)
*) ... plus other minor fixes and improvements
CDS Invenio v0.99.1 -- released 2008-07-10
------------------------------------------
*) search engine syntax now supports parentheses (WebSearch)
*) search engine syntax now supports SPIRES query language
(WebSearch)
*) strict respect for per-collection sort options on the search
results pages (WebSearch)
*) improved parsing of search query with respect to non-existing
field terms (WebSearch)
*) fixed "any collection" switch on the search results page
(WebSearch)
*) added possibility for progressive display of detailed record page
tabs (WebSearch)
*) added support for multi-page RSS output (WebSearch)
*) new search engine summarizer module with the cite summary output
format (WebSearch, BibRank)
*) "cited by" links are now generated only when needed (WebSearch)
*) new experimental comprehensive author page (WebSearch)
*) stemming for many indexes is now enabled by default (BibIndex)
*) new intelligent journal index (BibIndex)
*) new logging of missing citations (BibRank)
*) citation indexer and searcher improvements and caching (BibRank)
*) new low-level task submission facility (BibSched)
*) new options in bibsched task monitor: view task options, log and
error files; prune task to a history table; extended status
reporting; failed tasks now need acknowledgement in order to
restart the queue (BibSched)
*) safer handling of task sleeping and waking up (BibSched)
*) new experimental support for task priorities and concurrent task
execution (BibSched)
*) improved user-configured browser language matching (MiscUtil)
*) new default behaviour not differentiating between guest users;
this removes a need to keep sessions/uids for guests and robots
(WebSession)
*) optimized sessions and collecting external user information (WebSession)
*) improved logging conflicts for external vs internal users
(WebAccess)
*) improved Single Sign-On session preservation (WebAccess)
*) new 'become user' debugging facility for admins (WebAccess)
*) new bibdocfile CLI tool to manipulate full-text files archive
(WebSubmit)
*) optimized redirection of old URLs (WebSubmit)
*) new icon creation tool in the submission input chain (WebSubmit)
*) improved full-text file migration tool (WebSubmit)
*) improved stamping of full-text files (WebSubmit)
*) new approval-related end-submission functions (WebSubmit)
*) comments and descriptions of full-text files are now kept also in
bibdoc tables, not only in MARC; they are synchronized during
bibupload (WebSubmit, BibUpload)
*) fixed navigation in public baskets (WebBasket)
*) added detailed record page link to basket records (WebBasket)
*) new removal of HTML markup in alert notification emails (WebAlert)
*) improved OAI harvester logging and handling (BibHarvest)
*) improved error checking (BibConvert)
*) improvements to the record editing tool: subfield order change,
repetitive subfields; improved record locking features;
configurable per-collection curators (BibEdit)
*) fully refactored WebJournal module (WebJournal)
*) new RefWorks output format, thanks to Theodoros Theodoropoulos
(BibFormat)
*) fixed keyword detection tool's output; deactivated taxonomy
compilation (BibClassify)
*) new /stats URL for administrators (WebStat)
*) better filtering of unused translations (WebStyle)
*) updated French, Italian, Norwegian and Swedish translations;
updated Japanese translation (thanks to Makiko Matsumoto and Takao
Ishigaki); updated Greek translation (thanks to Theodoros
Theodoropoulos); new Hungarian translation (thanks to Eva Papp)
*) ... plus many other minor bug fixes and improvements
CDS Invenio v0.99.0 -- released 2008-03-27
------------------------------------------
*) new Invenio configuration language, new inveniocfg configuration
tool permitting more runtime changes and enabling separate local
customizations (MiscUtil)
*) phased out WML dependency everywhere (all modules)
*) new common RSS cache implementation (WebSearch)
*) improved access control to the detailed record pages (WebSearch)
*) when searching non-existing collections, do not revert to
searching in public Home anymore (WebSearch)
*) strict calculation of number of hits per multiple collections
(WebSearch)
*) propagate properly language environment in browse pages, thanks to
Ferran Jorba (WebSearch)
*) search results sorting made accentless, thanks to Ferran Jorba
(WebSearch)
*) new OpenURL interface (WebSearch)
*) added new search engine API argument to limit searches to record
creation/modification dates and times instead of hitherto creation
dates only (WebSearch)
*) do not allow HTTP POST method for searches to prevent hidden
mining (WebSearch)
*) added alert and RSS teaser for search engine queries (WebSearch)
*) new optimized index structure for fast integer bit vector
operations, leading to significant indexing time improvements
(MiscUtil, BibIndex, WebSearch)
*) new tab-based organisation of detailed record pages, with new URL
schema (/record/1/usage) and related CSS changes (BibFormat,
MiscUtil, WebComment, WebSearch, WebStyle, WebSubmit)
*) phased out old PHP based code; migration to Python-based output
formats recommended (BibFormat, WebSubmit)
*) new configurability to show/hide specific output formats for
specific collections (BibFormat, WebSearch)
*) new configurability to have specific stemming settings for
specific indexes (BibIndex, WebSearch)
*) optional removal of LaTeX markup for indexer (BibIndex, WebSearch)
*) performance optimization for webcoll and optional arguments to
refresh only parts of collection cache (WebSearch)
*) optional verbosity argument propagation to the output formatter
(BibFormat, WebSearch)
*) new convenient reindex option to the indexer (BibIndex)
*) fixed problem with indexing of some lengthy UTF-8 accented names,
thanks to Theodoros Theodoropoulos for reporting the problem
(BibIndex)
*) fixed full-text indexing of HTML pages (BibIndex)
*) new Stemmer module dependency, fixes issues on 64-bit systems
(BibIndex)
*) fixed download history graph display (BibRank)
*) improved citation ranking and history graphs, introduced
self-citation distinction, added new demo records (BibRank)
*) fixed range redefinition and output message printing problems in
the ranking indexer, thanks to Mike Marino (BibRank)
*) new XSLT output formatter support; phased out old BFX formats
(BibFormat)
*) I18N output messages are now translated in the output formatter
templates (BibFormat)
*) formats fixed to allow multiple author affiliations (BibFormat)
*) improved speed of the record output reformatter in case of large
sets (BibFormat)
*) support for displaying LaTeX formulas via JavaScript (BibFormat)
*) new and improved output formatter elements (BibFormat)
*) new escaping modes for format elements (BibFormat)
*) output format template editor cache and element dependency
checker improvements (BibFormat)
*) output formatter speed improvements in PHP-compatible mode
(BibFormat)
*) new demo submission configuration and approval workflow examples
(WebSubmit)
*) new submission full-text file stamper utility (WebSubmit)
*) new submission icon-creation utility (WebSubmit)
*) separated submission engine and database layer (WebSubmit)
*) submission functions can now access user information (WebSubmit)
*) implemented support for restricted icons (WebSubmit, WebAccess)
*) new full-text file URL and cleaner storage facility; requires file
names to be unique within a given record (WebSearch, WebSubmit)
*) experimental release of the complex approval and refereeing
workflow (WebSubmit)
*) new end-submission functions to move files to storage space
(WebSubmit)
*) added support for MD5 checking of full-text files (WebSubmit)
*) improved behaviour of the submission system with respect to the
browser "back" button (WebSubmit)
*) removed support for submission "cookies" (WebSubmit)
*) flexible report number generation during submission (WebSubmit)
*) added support for optional filtering step in the OAI harvesting
chain (BibHarvest)
*) new text-oriented converter functions IFDEFP, JOINMULTILINES
(BibConvert)
*) selective harvesting improvements, sets, non-standard responses,
safer resumption token handling (BibHarvest)
*) OAI archive configuration improvements: collections retrieval,
multiple set definitions, new clean mode, timezones, and more
(BibHarvest)
*) OAI gateway improvements: XSLT used to produce configurable output
(BibHarvest)
*) added support for "strong tags" that can resist metadata replace
mode (BibUpload)
*) added external OAI ID tag support to the uploader (BibUpload)
*) added support for full-text file transfer during uploading
(BibUpload)
*) preserving full history of all MARCXML versions of a record
(BibEdit, BibUpload)
*) XMLMARC to TextMarc improvements: empty indicators and more
(BibEdit)
*) numerous reference extraction tool improvements: year handling,
LaTeX handling, URLs, journal titles, output methods, and more
(BibEdit)
*) new classification daemon (BibClassify)
*) classification taxonomy caching resulting in speed optimization
(BibClassify)
*) new possibility to define more than one keyword taxonomy per
collection (BibClassify)
*) fixed non-standalone keyword detection, thanks to Annette Holtkamp
(BibClassify)
*) new embedded page generation profiler (WebStyle)
*) new /help pages layout and webdoc formatting tool (WebStyle)
*) new custom style template verification tool (WebStyle)
*) added support for the XML page() output format, suitable for AJAX
interfaces (WebStyle)
*) introduction of navigation menus (WebStyle)
*) general move from HTML to XHTML markup (all modules)
*) fixed alert deletion tool vulnerability (WebAlert)
*) do not advertise baskets/alerts much for guest users; show only
the login link (WebSession)
*) password reset interface improvements (WebSession)
*) new permanent "remember login" mechanism (WebSession, WebAccess)
*) local user passwords are now encrypted (WebSession, WebAccess)
*) new LDAP external authentication plugin (WebAccess)
*) new password reset mechanism using new secure mail cookies and
temporary role membership facilities (WebAccess, WebSession)
*) added support for Single Sign-On Shibboleth based authentication
method (WebAccess)
*) new firewall-like based role definition language, new demo
examples (WebAccess)
*) external authentication and groups improvements: nicknames,
account switching, and more (WebSession, WebAccess)
*) task log viewer integrated in the task monitor (BibSched)
*) new journal creation module (WebJournal)
*) new generic statistic gathering and display facility (WebStat)
*) deployed new common email sending facility (MiscUtil, WebAlert,
WebComment, WebSession, WebSubmit)
*) dropped support for MySQL-4.0, permitting to use clean and strict
UTF-8 storage methods; upgrade of MySQLdb to at least 1.2.1_p2
required (MiscUtil)
*) uncatched exceptions are now being sent by email to the
administrator (MiscUtil, WebStyle)
*) new general garbage collector with a possibility to run via the
task scheduler and a possibility to clean unreferenced
bibliographic values (MiscUtil)
*) new generic SQL and data cacher (MiscUtil)
*) new HTML page validator plugin (MiscUtil)
*) new web test suite running in a real browser (MiscUtil)
*) improved code kwalitee checker (MiscUtil)
*) translation updates: Spanish and Catalan (thanks to Ferran Jorba),
Japanese (Toru Tsuboyama), German (Benedikt Koeppel), Polish
(Zbigniew Szklarz and Zbigniew Leonowicz), Greek (Theodoros
Theodoropoulos), Russian (Yana Osborne), Swedish, Italian, French
*) new translations: Chinese traditional and Chinese simplified
(thanks to Kam-ming Ku)
*) ... plus many other minor bug fixes and improvements
CDS Invenio v0.92.1 -- released 2007-02-20
------------------------------------------
*) new support for external authentication systems (WebSession,
WebAccess)
*) new support for external user groups (WebSession)
*) new experimental version of the reference extraction program
(BibEdit)
*) new optional Greek stopwords list, thanks to Theodoropoulos
Theodoros (BibIndex)
*) new Get_Recid submission function (WebSubmit)
*) new config variable governing the display of the download history
graph (BibRank)
*) started deployment of user preferences (WebSession, WebSearch)
*) split presentation style for "Narrow search", "Focus on" and
"Search also" search interface boxes (WebSearch, WebStyle)
*) updated CERN Indico and KEK external collection searching facility
(WebSearch)
*) fixed search interface portalbox and collection definition
escaping behaviour (WebSearch Admin)
*) fixed problems with external system number and OAI ID matching
(BibUpload)
*) fixed problem with case matching behaviour (BibUpload)
*) fixed problems with basket record display and basket topic change
(WebBasket)
*) fixed output format template attribution behaviour (BibFormat)
*) improved language context propagation in output formats
(BibFormat)
*) improved output format treatment of HTML-aware fields (BibFormat)
*) improved BibFormat migration kit (BibFormat)
*) improved speed and eliminated set duplication of the OAI
repository gateway (BibHarvest)
*) fixed resumption token handling (BibHarvest)
*) improved record editing interface (BibEdit)
*) fixed problem with empty fields treatment (BibConvert)
*) updated Report_Number_Generation submission function to be able to
easily generate report numbers from any submission information
(WebSubmit)
*) fixed problem with submission field value escaping (WebSubmit)
*) fixed problem with submission collection ordering (WebSubmit)
*) fixed BibSched task signal handling inconsistency (BibSched)
*) fixed TEXT versus BLOB database problems for some tables/columns
*) minor updates to the HOWTO Migrate guide and several admin guides
(WebHelp, BibIndex, BibFormat)
*) minor bugfixes to several modules; see ChangeLog for details and
credits
CDS Invenio v0.92.0 -- released 2006-12-22
------------------------------------------
*) previously experimental output formatter in Python improved and
made default (BibFormat)
*) previously experimental new submission admin interface in Python
improved and made default (WebSubmit)
*) new XML-oriented output formatting mode (BibFormat)
*) new export-oriented output formats: EndNote, NLM (BibFormat)
*) RSS 2.0 latest additions feed service (WebSearch, BibFormat)
*) new XML-oriented metadata converter mode (BibConvert)
*) new metadata uploader in Python (BibUpload)
*) new integrated parallel external collection searching (WebSearch)
*) improved document classifier: composite keywords, wildcards, cloud
output (BibClassify)
*) improved UTF-8 fulltext indexing (BibIndex)
*) improved external login authentication subsystem (WebAccess)
*) added possibility to order submission categories (WebSubmit)
*) improved handling of cached search interface page formats,
preferential sort pattern functionality, international collection
names (WebSearch)
*) improved behaviour of OAI harvester: sets, deleted records,
harvested metadata transformation (BibHarvest)
*) improved MARCXML schema compatibility concerning indicators;
updates to the HTML MARC output format (BibEdit, BibUpload,
BibFormat, and other modules)
*) multiple minor bugs fixed thanks to the wider deployment of the
regression test suite (all modules)
*) new translation (Croatian) and several translation updates
(Catalan, Bulgarian, French, Greek, Spanish); thanks to Ferran
Jorba, Beatriu Piera, Alen Vodopijevec, Jasna Marković, Theodoros
Theodoropoulos, and Nikolay Dyankov (see also THANKS file)
*) removed dependency on PHP; not needed anymore
*) full compatibility with MySQL 4.1 and 5.0; upgrade from MySQL 4.0
now recommended
*) full compatibility with FreeBSD and Mac OS X
CDS Invenio v0.90.1 -- released 2006-07-23
------------------------------------------
*) output messages improved and enhanced to become more easily
translatable in various languages (all modules)
*) new translation (Bulgarian) and several updated translations
(Greek, French, Russian, Slovak)
*) respect langugage choice in various web application links
(WebAlert, WebBasket, WebComment, WebSession, WebSubmit)
*) fixed problem with commenting rights in a group-shared basket that
is also a public basket with lesser rights (WebBasket)
*) guest users are now forbidden to share baskets (WebBasket)
*) fixed guest user garbage collection, adapted to the new baskets
schema (WebSession)
*) added possibility to reject group membership requests; sending
informational messages when users are approved/refused by group
administrators (WebSession)
*) experimental release of the new BibFormat in Python (BibFormat)
*) started massive deployment of the regression test suite, checking
availability of all web interface pages (BibEdit, BibFormat,
BibHarvest, BibIndex, BibRank, MiscUtil, WebAccess, WebBasket,
WebComment, WebMessage, WebSearch, WebSession, WebSubmit)
*) updated developer documentation (I18N output messages policy, test
suite policy, coding style)
CDS Invenio v0.90.0 -- released 2006-06-30
------------------------------------------
*) formerly known as CDSware; the application name change clarifies
the relationship with respect to the CDS Sofware Consortium
producing two flagship applications (CDS Indico and Invenio)
*) version number increased to v0.90 in the anticipation of the
forthcoming v1.0 release after all the major codebase changes are
now over
*) new possibility to define user groups (WebGroup)
*) new personal basket organization in topics (WebBasket)
*) new basket sharing among user groups (WebBasket)
*) new open peer reviewing and commenting on documents (WebComment)
*) new user and group web messaging system (WebMessage)
*) new ontology-based document classification system (BibClassify)
*) new WebSubmit Admin (WebSubmit)
*) new record editing web interface (BibEdit)
*) new record matching tool (BibMatch)
*) new OAI repository administration tool (BibHarvest)
*) new OAI periodical harvesting tool (BibHarvest)
*) new web layout templating system (WebStyle)
*) new clean URL schema (e.g. /collection/Theses, /record/1234)
(WebStyle)
*) new BibTeX output format support (BibFormat)
*) new possibility of secure HTTPS authentication while keeping the
rest of the site non-HTTPS (WebSession)
*) new centralized error library (MiscUtil)
*) new gettext-based international translations, with two new beta
translations (Japanese, Polish)
*) new regression testing suite framework (MiscUtil)
*) new all prerequisites are now apt-gettable for Debian "Sarge"
GNU/Linux
*) new full support for Mac OS X
*) ... plus many fixes and changes worth one year of development
CDSware v0.7.1 -- released 2005-05-04
-------------------------------------
*) important bugfix for bibconvert's ``source data in a directory''
mode, as invoked by the web submission system (BibConvert)
*) minor bugfix in the search engine, thanks to Frederic Gobry
(WebSearch)
*) minor bugfix in the WebSearch Admin interface (WebSearch)
*) automatic linking to Google Print in the ``Haven't found what you
were looking for...'' page box (WebSearch)
*) BibFormat Admin Guide cleaned, thanks to Ferran Jorba
*) new Catalan translation, thanks to Ferran Jorba
*) updated Greek and Portuguese translations, thanks to Theodoros
Theodoropoulos and Flávio C. Coelho
*) updated Spanish translation
CDSware v0.7.0 -- released 2005-04-06
-------------------------------------
*) experimental release of the refextract program for automatic
reference extraction from PDF fulltext files (BibEdit)
*) experimental release of the citation and download ranking tools
(BibRank)
*) new module for gathering usage statistics out of Apache log files
(WebStat)
*) new similar-records-navigation tool exploring end-user viewing
habits: "people who viewed this page also viewed" (WebSearch,
BibRank)
*) OAI gateway validated against OAI Repository Explorer (BibHarvest)
*) fixed "records modified since" option for the indexer (BibIndex)
*) collection cache update is done only when the cache is not up to
date (WebSearch) [closing #WebSearch-016]
*) cleanup of user login mechanism (WebSession, WebAccess)
*) fixed uploading of already-existing records in the insertion mode
(BibUpload)
*) fixed submission in UTF-8 languages (WebSubmit)
*) updated HOWTO Run Your Existing CDSware Installation (WebHelp)
*) test suite improvements (WebSearch, BibHarvest, BibRank,
BibConvert)
*) German translation updated and new German stopwords list added,
thanks to Guido Pelzer
*) new Greek and Ukrainian translations, thanks to Theodoros
Theodoropoulos and Vasyl Ostrovskyi
*) all language codes now comply to RFC 1766 and ISO 639
*) numerous other small fixes and improvements, with many
contributions by the EPFL team headed by Frederic Gobry
(BibConvert, BibUpload, WebSearch, WebSubmit, WebSession)
CDSware v0.5.0 -- released 2004-12-17
-------------------------------------
*) new rank engine, featuring word similarity rank method and the
journal impact factor rank demo (BibRank)
*) search engine includes ranking option (WebSearch)
*) record similarity search based on word frequency (WebSearch,
BibRank)
*) stopwords possibility when ranking and indexing (BibRank, BibIndex)
*) stemming possibility when ranking and indexing (BibRank, BibIndex)
*) search engine boolean query processing stages improved (WebSearch)
*) search engine accent matching in phrase searches (WebSearch)
*) regular expression searching mode introduced into the Simple
Search interface too (WebSearch)
*) Search Tips split into a brief Search Tips page and detailed
Search Guide page (WebSearch)
*) improvements to the ``Try your search on'' hints (WebSearch)
*) author search hints introduced (WebSearch)
*) search interface respects title prologue/epilogue portalboxes
(WebSearch)
*) improvements to admin interfaces (WebSearch, BibIndex, BibRank,
WebAccess)
*) basket item ordering problem fixed (WebBasket)
*) access error messages introduced (WebAccess and its clients)
*) new account management to enable/disable guest users and
automatic vs to-be-approved account registration (WebAccess)
*) possibility for temporary read-only access to, and closure of, the
site; useful for backups (WebAccess and its clients)
*) possibility for external authentication login methods (WebAccess)
*) new XML MARC handling library (BibEdit)
*) when uploading, bad XML records are marked as errors (BibUpload)
*) improvements to the submission engine and its admin interface,
thanks to Tiberiu Dondera (WebSubmit)
*) preparations for electronic mail submission feature, not yet
functional (ElmSubmit)
*) added example on MARC usage at CERN (WebHelp)
*) legacy compatibility with MySQL 3.23.x assured (BibUpload)
*) legacy compatibility with Python 2.2 assured (WebSubmit)
*) test suite additions and corrections (BibRank, BibIndex,
WebSearch, BibEdit)
*) French translation fixes, thanks to Eric Grand
*) minor Czech and Slovak translation cleanup
CDSware v0.3.3 (DEVELOPMENT) -- released 2004-07-16
---------------------------------------------------
*) new international phrases, collection and field names; thanks to
Guido, Flavio, Tullio
*) collection international names are now respected by the search
engine and interfaces (WebSearch)
*) field international names are now respected by the search
engine and interfaces (WebSearch)
*) when no hits found in a given collection, do not display all
public hits straight away but only link to them (WebSearch)
*) records marked as DELETED aren't shown anymore in XML MARC and
other formats (WebSearch)
*) detailed record page now features record creation and modification
times (WebSearch)
*) improved XML MARC parsing and cumulative record count in case of
uploading of several files in one go (BibUpload)
*) personal `your admin activities' page introduced (WebSession)
*) added option to fulltext-index local files only (BibIndex)
*) initial release of the BibIndex Admin interface (BibIndex)
*) checking of mandatory selection box definitions (WebSubmit)
*) WebSearch Admin interface cleanup (WebSearch)
*) introducing common test suite infrastructure (WebSearch, BibIndex,
MiscUtil, WebHelp)
*) fixed accent and link problems for photo demo records (MiscUtil)
*) conference title exported via OAI XML DC (BibHarvest)
*) enabled building out of source directory; thanks to Frederic
CDSware v0.3.2 (DEVELOPMENT) -- released 2004-05-12
---------------------------------------------------
*) admin area improved: all the modules have now Admin Guides; some
guides were updated, some are still to be updated (WebHelp,
BibConvert, BibFormat, BibIndex, BibSched, WebAlert, WebSession,
WebSubmit, BibEdit, BibHarvest, BibRank, BibUpload, WebAccess,
WebBasket, WebSearch, WebStyle)
*) initial release of the WebSearch Admin interface (WebSearch)
*) initial release of the BibRank Admin interface (BibRank)
*) search cache expiry after insertion of new records (WebSearch)
*) search engine now does on-the-fly formatting via BibFormat CLI
call to handle restricted site situations (WebSearch)
*) webcoll default verbosity decreased for efficiency (WebSearch)
*) added BibConvert configuration example for converting XML Dublin
Core to XML MARC (BibConvert)
*) BibConvert knowledge base mode extended by various case-sensitive
matching possibilities (BibConvert)
*) fixed various problems with fulltext file names and the submission
from MS Windows platform (WebSubmit)
*) fixed problem with bibupload append mode not updating XML MARC
properly (BibUpload)
*) fixed small problems with the submission interface such as
multiple fields selection (WebSubmit)
*) session revoking and session expiry strengthened (WebSession)
*) page design and style sheet updated to better fit large variety of
browsers (WebStyle)
*) added output format argument for basket display (WebBasket)
*) new Swedish translation and updated German, Russian, and Spanish
translations; thanks to Urban, Guido, Lyuba, and Magaly
*) faster creation of I18N static HTML and PHP files during make
CDSware v0.3.1 (DEVELOPMENT) -- released 2004-03-12
---------------------------------------------------
*) security fix preventing exposure of local configuration variables
by malicious URL crafting (WebSearch, WebSubmit, WebAlert,
WebBasket, WebSession, BibHarvest, MiscUtil)
*) initial release of the ranking engine (BibRank)
*) new guide on HOWTO Run Your CDSware Installation (WebHelp)
*) fixed submit configurations with respect to fulltext links and
metadata tags (WebSubmit, MiscUtil)
*) Your Account personal corner now shows the list and the status
of submissions and approvals (WebSession)
*) uniform help and version number option for CLI executables
(WebSearch, BibSched, BibIndex, BibRank, BibHarvest, BibConvert,
WebAccess, BibFormat, WebSession, WebAlert)
*) uniform technique for on-the-fly formatting of search results via
`hb_' and `hd_' output format parameters (WebSearch)
*) check for presence of pcntl and mysql PHP libraries (BibUpload)
CDSware v0.3.0 (DEVELOPMENT) -- released 2004-03-05
---------------------------------------------------
*) new development branch release (important SQL table changes)
*) introducing a new submission engine and the end-user web
interface (WebSubmit)
*) bibupload is now a BibSched task with new options (BibUpload)
*) BibWords renamed into BibIndex in the view of future phrase
indexing changes (BibIndex)
*) more secure DB server connectivity (BibSched)
*) record matching functionality (BibConvert)
*) character encoding conversion tables (BibConvert)
*) Qualified Dublin Core conversion example (BibConvert)
*) OAI deleted records policy can now be specified (BibHarvest)
*) multi-language collection portalboxes (WebSearch)
*) HTML pages now respect language selections (WebSearch, WebHelp)
*) minor layout changes (WebStyle)
*) updated Russian and other translations
*) ChangeLog is now generated from CVS log messages
*) plus the usual set of bugfixes (see ChangeLog)
CDSware v0.1.2 (DEVELOPMENT) -- released 2003-12-21
---------------------------------------------------
*) development branch release
*) fix BibReformat task launching problem (BibFormat)
*) fix BibTeX -> XML MARC conversion example (BibConvert)
*) updated Spanish translation
CDSware v0.1.1 (DEVELOPMENT) -- released 2003-12-19
---------------------------------------------------
*) development branch release
*) access control engine now used by BibWords, BibFormat (admin and
bibreformat), WebSearch (webcoll), and BibTaskEx
*) access control engine admin guide started (WebAccess)
*) search engine support for sorting by more than one field (WebSearch)
*) more internationalization of the search engine messages (WebSearch)
*) new language: Norwegian (bokmål)
*) simple example for converting BibTeX into XML MARC (BibConvert)
*) new optional --with-python configuration option
*) Python module detection during configure
*) bugfixes: os.tempnam() warning, login page referer, and others
CDSware v0.1.0 (DEVELOPMENT) -- released 2003-12-04
---------------------------------------------------
*) development branch release
*) search engine redesign to yield five times more search performance
for larger sites (WebSearch, BibWords)
*) fulltext indexation of PDF, PostScript, MS Word, MS PowerPoint and
MS Excel files (WebSearch)
*) integrated combined metadata/fulltext/citation search (WebSearch)
*) multi-stage search guidance in cases of no exact match (WebSearch)
*) OAI-PMH harvestor (BibHarvest)
*) bibliographic task scheduler (BibSched)
*) automatic daemon mode of the indexer, the formatter and the
collection cache generator (BibWords, BibFormat, WebSearch)
*) user management and session handling rewrite (WebSession)
*) user personalization, document baskets and notification alert
system (WebBasket, WebAlert)
*) role-based access control engine (WebAccess)
*) internationalization of the interface started (currently with
Czech, German, English, Spanish, French, Italian, Portuguese,
Russian, and Slovak support)
*) web page design update (WebStyle)
*) introduction of programmer-oriented technical documentation corner
(WebHelp)
*) source tree reorganization, mod_python technology adopted for most
of the modules
CDSware v0.0.9 (STABLE) -- released 2002-08-01
----------------------------------------------
*) first "public" alpha release of CDSware
*) recently standardized Library of Congress' MARC XML schema adopted
in all CDSware modules as the new default internal XML file format
(BibConvert, BibFormat, BibUpload, WebSubmit, WebSearch)
*) support for OAI-PMH v2.0 in addition to OAI-PMH v1.1 (WebSearch)
*) search interface now honors multiple output formats per collection
(BibFormat, WebSearch)
*) search interface now honors search fields, search options, and
sort options from the database config tables (WebSearch,
WebSearch Admin)
*) search interface now honors words indexes from the database config
tables (BibWords, WebSearch)
*) easy reformatting of already uploaded bibliographic records via
web admin. tool (BibFormat Admin/Reformat Records)
*) new submission form field type ("response") allowing
greater flexibility (WebSubmit) [thanks to Frank Sudholt]
*) demo site "Atlantis Institute of Science" updated to demonstrate:
Pictures collection of photographs; specific per-collection
formats; references inside Articles and Preprints; "cited by"
search link; published version linking; subject category
searching; search within, search options, sort options in the web
collection pages.
- end of file -
diff --git a/RELEASE-NOTES b/RELEASE-NOTES
index 518c8f7ad..117b3569e 100644
--- a/RELEASE-NOTES
+++ b/RELEASE-NOTES
@@ -1,126 +1,165 @@
--------------------------------------------------------------------
-Invenio v1.1.1 is released
-December 21, 2012
+Invenio v1.1.2 is released
+August 19, 2013
http://invenio-software.org/
--------------------------------------------------------------------
-Invenio v1.1.1 was released on December 21, 2012.
+Invenio v1.1.2 was released on August 19, 2013.
This stable release contains a number of minor fixes and improvements.
-It is recommended to all Invenio sites using v1.1.0 or previous stable
+It is recommended to all Invenio sites using v1.1.1 or previous stable
release series (v0.99, v1.0).
What's new:
-----------
- *) BatchUploader: error reporting improvements
+ *) BibAuthorID: fix in name comparisons (#1313 #1314); improvements
+ and fixes; improvements, fixes and optimizations; UI and backend
+ improvements
- *) BibAuthorID: arXiv login upgrade; fix for small bug in claim
- interface
+ *) BibCatalog: removal of print statement (#1337)
- *) BibConvert: fix bug with SPLITW function; target/source CLI flag
- description fix
+ *) BibClassify: escape keywords in tag cloud and MARCXML
- *) BibDocFile: better error report for unknown format; explicit
- redirection to secure URL; fix for file upload in submissions
+ *) BibDocFile: better JS washing in web UI; display file upload
+ progress (#1020 #1021); display "Restricted" label correctly
+ (#1299); fix check-md5 with bibdocfsinfo cache (#1249); fix
+ check-md5 with bibdocfsinfo cache (#1249); fix error in calling
+ register_download (#1311); handling of exceptions in Md5Folder
+ (#1060); revert md5 property patch (#1249); support new magic
+ library (#1207)
- *) BibEdit: 'bibedit' CSS class addition to page body
+ *) BibEncode: minor fix in process_batch_job()
- *) BibFormat: clean Default_HTML_meta template; fix for js_quicktags
- location; ISBN tag update for meta format; "ln" parameter in
- bfe_record_url output; meta header output fix; relator code filter
- in bfe_authors; fix for reformatting by record IDs
+ *) BibFormat: additional fulltext file display in HB (#1219); checks
+ for bibformat bin; fix CLI call to old PHP-based formatter; fixes
+ unit tests (#1320); fix for fulltext file format; fix snippets for
+ phrase queries (#1201); format_element initialisation fix; passing
+ of user_info for Excel format; replacement of CDS Invenio by
+ Invenio; setUp/tearDown in unit tests (#1319); skip hidden icons
+ in OpenGraph image tag
- *) errorlib: register_exception improvements
+ *) BibIndex: better wording for stemming in admin UI; replacement of
+ CDS Invenio by Invenio; synonym indexing speed up (#1484); use
+ human friendly index name (#1329)
- *) global: login link using absolute URL redirection
+ *) BibKnowledge: /kb/export 500 error fix; optional memoisation of
+ KBR lookups (#1484)
- *) installation: aidUSERINPUTLOG consistency upgrade; bigger
- hstRECORD.marcxml size; fix for wrong name in tabcreate; inclusion
- of JS quicktags in tarball; mark upgrade recipes as applied;
- rephrase 1.1 upgrade recipe warning; safer upgrader bibsched
- status parse; strip spaces in CFG list values
+ *) BibMerge: delete cache file on submit
- *) jQuery: tablesorter location standardisation
+ *) BibSched: bibupload max_priority check; bugfix for high-priority
+ monotasks; increases size of monitor columns;
+ parse_runtime_limit() fix (#1432); parse_runtime_limit() tests fix
+ (#1432)
- *) mailutils: authentication and TLS support
+ *) BibUpload: FMT regression test case fix (#1152); indicators in
+ strong tags (#939)
- *) OAIRepository: Edit OAI Set page bug fix; fix for OAI set editing;
- print_record() fixes
+ *) CKEditor: updated to version 3.6.6
- *) plotextractor: washing of captions and context
+ *) dateutils: strftime improvement (#1065); strptime for Python-2.4
+ compatibility
- *) pluginutils: fix for failing bibformat test case
+ *) errorlib: hiding bibcatalog info in exception body
- *) solrutils: addition of files into release tarball
+ *) global: test suite nosification
- *) WebAccess: admin interface usability improvement; guest unit tests
- for firerole
+ *) htmlutils: fix single quote escaping; improve js string escaping;
+ MathJax 2.1 (#1050)
- *) WebAlert: new regression tests for alerts
+ *) I18N: updates to Catalan and Spanish translations
- *) WebComment: cleaner handling of non-reply comments
+ *) installation: fix collectiondetailedrecordpagetabs (#1496); fix
+ for jQuery hotkeys add-on URL (#1507); fix for MathJax OS X
+ install issue (#1455); support for Apache-2.4 (#1552)
- *) WebJournal: better language handling in widgets; CERN-specific
- translation; explicit RSS icon dimensions; fix for
- CFG_TMPSHAREDDIR; fix for retrieval of deleted articles; search
- select form by name
+ *) inveniocfg: tests runner file closure fix (#1327)
- *) WebSearch: fix for webcoll grid layout markup;
- get_all_field_values() typo; next-hit/previous-hit numbering fix;
- respect output format content-type; washing of 'as' argument
+ *) InvenioConnector: fix for CDS authentication; mechanize dependency
- *) WebSession: fix for login-with-referer issue; fix for
- merge_usera_into_userb()
+ *) inveniogc: consider journal cache subdirs
- *) WebStyle: dumb page loading fix Google Analytics documentation
- update; memory leak fix in session handling; new /ping handler;
- removal of excess language box call; req.is_https() fix;
+ *) memoiseutils: initial release
- *) WebSubmit: display login link on /submit page; fix for
- Send_APP_Mail function; fix the approval URL for publiline
+ *) OAIHarvest: fix path for temporary authorlists; holding-pen UI
+ bugfixes (#1401)
- *) WebUser: fix for referer URL protocol
+ *) OAIRepository: CFG_OAI_REPOSITORY_MARCXML_SIZE; no bibupload -n
+
+ *) RefExtract: replacement of CDS Invenio by Invenio
+
+ *) WebAccess: fix variable parsing in robot auth (#1456); IP-based
+ rules and offline user fix (#1233); replacement of CDS Invenio by
+ InveniO
+
+ *) WebApiKey: renames unit tests to regression tests (#1324)
+
+ *) WebAuthorProfile: fix XSS vulnerability
+
+ *) WebComment: escape review "title"; escape review "title"
+
+ *) WebSearch: 410 HTTP code for deleted records; advanced search
+ notification if no hits; better cleaning of word patterns; fix
+ infinite synonym lookup cases (#804); handles "find feb 12"
+ (#948); nicer browsing of fuzzy indexes (#1348); respect default
+ `rg` in Advanced Search; SPIRES date math search fixes (#431
+ #948); SPIRES invalid date search fix (#1467); tweaks SPIRES
+ two-digit search; unit test disabling for CFG_CERN_SITE; unit test
+ update (#1326)
+
+ *) WebSession: fix for list of admin activities (#1444); login_method
+ changes; unit vs regression test suite cleanup
+
+ *) WebStat: use CFG_JOURNAL_TAG instead of 773/909C4 (#546)
+
+ *) WebSubmit: new websubmitadmin CLI (#1334); replacement of CDS
+ Invenio by Invenio; use PyPDF2 if available
Download:
---------
-
-
-
+
+
+
Installation notes:
-------------------
Please follow the INSTALL file bundled in the distribution tarball.
Upgrade notes:
--------------
Please proceed as follows:
a) Stop your bibsched queue and your Apache server.
b) Install the update:
- $ tar xvfz invenio-1.1.1.tar.gz
- $ cd invenio-1.1.1
+ $ tar xvfz invenio-1.1.2.tar.gz
+ $ cd invenio-1.1.2
$ sudo rsync -a /opt/invenio/etc/ /opt/invenio/etc.OLD/
$ sh /opt/invenio/etc/build/config.nice
$ make
$ make check-upgrade
$ sudo -u www-data make install
$ sudo rsync -a /opt/invenio/etc.OLD/ \
- --exclude invenio.conf \
- --exclude bibformat/format_templates/Default_HTML_meta.bft \
+ --exclude invenio-autotools.conf \
+ --exclude bibformat/format_templates/Default_HTML_brief.bft \
/opt/invenio/etc/
# Note: if you are upgrading from previous stable release
# series (v0.99 or v1.0), please don't rsync but diff, in order
# to inspect changes and adapt your old configuration to the
# new v1.1 release series.
$ sudo -u www-data /opt/invenio/bin/inveniocfg --update-all
$ sudo -u www-data /opt/invenio/bin/inveniocfg --upgrade
- c) Restart your Apache server and your bibsched queue.
+ c) Update your MathJax and CKeditor plugins:
+
+ $ sudo -u www-data make install-mathjax-plugin
+ $ sudo -u www-data make install-ckeditor-plugin
+
+ d) Restart your Apache server and your bibsched queue.
- end of file -
\ No newline at end of file
diff --git a/modules/webjournal/lib/webjournal_regression_tests.py b/modules/webjournal/lib/webjournal_regression_tests.py
index 0455207fb..a3ec223bc 100644
--- a/modules/webjournal/lib/webjournal_regression_tests.py
+++ b/modules/webjournal/lib/webjournal_regression_tests.py
@@ -1,416 +1,416 @@
# -*- coding: utf-8 -*-
##
## This file is part of Invenio.
## Copyright (C) 2009, 2010, 2011, 2012 CERN.
##
## Invenio is free software; you can redistribute it and/or
## modify it under the terms of the GNU General Public License as
## published by the Free Software Foundation; either version 2 of the
## License, or (at your option) any later version.
##
## Invenio is distributed in the hope that it will be useful, but
## WITHOUT ANY WARRANTY; without even the implied warranty of
## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
## General Public License for more details.
##
## You should have received a copy of the GNU General Public License
## along with Invenio; if not, write to the Free Software Foundation, Inc.,
## 59 Temple Place, Suite 330, Boston, MA 02111-1307, USA.
"""WebJournal Regression Test Suite."""
__revision__ = "$Id$"
import datetime
import unittest
import urllib
from invenio.search_engine import record_public_p
from invenio import webjournal_utils as wju
from invenio.config import CFG_SITE_URL, \
CFG_SITE_LANG, \
CFG_SITE_SUPPORT_EMAIL, \
CFG_PREFIX, \
CFG_DEVEL_SITE
from invenio.testutils import make_test_suite, run_test_suite, \
test_web_page_content, merge_error_messages
class ArticlesRelated(unittest.TestCase):
"""Functions about articles"""
def test_is_new_article(self):
"""webjournal - checks if an article is new or not """
article = wju.is_new_article('AtlantisTimes', '03/2009', 99)
self.assertEqual(article, False)
article = wju.is_new_article('AtlantisTimes', '03/2009', 103)
self.assertEqual(article, True)
class CategoriesRelated(unittest.TestCase):
"""Functions about journal categories"""
def test_get_journal_categories(self):
"""webjournal - returns all categories for a given issue"""
journal1 = wju.get_journal_categories('AtlantisTimes', '03/2009')
self.assertEqual(journal1[0], 'News')
self.assertEqual(journal1[1], 'Science')
journal2 = wju.get_journal_categories('AtlantisTimes', )
self.assertEqual(journal2[0], 'News')
self.assertEqual(journal2[1], 'Science')
self.assertEqual(journal2[2], 'Arts')
def test_get_category_query(self):
"""webjournal - returns the category definition """
self.assertEqual(wju.get_category_query('AtlantisTimes', 'News'),
'980__a:ATLANTISTIMESNEWS or 980__a:ATLANTISTIMESNEWSDRAFT')
self.assertEqual(wju.get_category_query('AtlantisTimes', 'Science'),
'980__a:ATLANTISTIMESSCIENCE or 980__a:ATLANTISTIMESSCIENCEDRAFT')
class JournalConfigVars(unittest.TestCase):
"""Functions to get journal variables """
def test_get_xml_from_config(self):
"""webjournal - returns values from the journal configuration file """
value = wju.get_xml_from_config(["submission/doctype"], 'AtlantisTimes')
self.assertEqual(value.values()[0], ['DEMOJRN'])
self.assertEqual(value.keys(), ['submission/doctype'])
value = wju.get_xml_from_config(["submission/identifier_element"], 'AtlantisTimes')
self.assertEqual(value.values()[0], ['DEMOJRN_RN'])
self.assertEqual(value.keys(), ['submission/identifier_element'])
value = wju.get_xml_from_config(["draft_image_access_policy"], 'AtlantisTimes')
self.assertEqual(value.values()[0], ['allow'])
def test_get_journal_issue_field(self):
"""webjournal - returns the MARC field """
value = wju.get_journal_issue_field('AtlantisTimes')
self.assertEqual(value, '773__n')
def test_get_journal_css_url(self):
"""webjournal - returns URL to this journal's CSS """
self.assertEqual(wju.get_journal_css_url('AtlantisTimes', type='screen'), CFG_SITE_URL + '/img/AtlantisTimes.css')
def test_get_journal_submission_params(self):
"""webjournal - returns params for the submission of articles """
submissions = wju.get_journal_submission_params('AtlantisTimes')
self.assertEqual(submissions[0], 'DEMOJRN')
self.assertEqual(submissions[1], 'DEMOJRN_RN')
self.assertEqual(submissions[2], '037__a')
def test_get_journal_draft_keyword_to_remove(self):
"""webjournal - returns the keyword to removed in order to move the article from Draft to Ready """
self.assertEqual(wju.get_journal_draft_keyword_to_remove('AtlantisTimes'), 'DRAFT')
def test_get_journal_alert_sender_email(self):
"""webjournal - returns the email address used to send of the alert email. """
self.assertEqual(wju.get_journal_alert_sender_email('AtlantisTimes'), CFG_SITE_SUPPORT_EMAIL)
def test_get_journal_alert_recipient_email(self):
"""webjournal - returns the default email address of the recipients of the email"""
if CFG_DEVEL_SITE:
self.assertEqual(wju.get_journal_alert_recipient_email('AtlantisTimes'), '')
else:
self.assertEqual(wju.get_journal_alert_recipient_email('AtlantisTimes'), 'recipients@atlantis.atl')
def test_get_journal_template(self):
"""webjournal - returns the journal templates name for the given template type"""
value = wju.get_journal_template('index', 'AtlantisTimes', ln=CFG_SITE_LANG)
self.assertEqual(value, 'webjournal/AtlantisTimes_Index.bft')
def test_get_journal_name_intl(self):
"""webjournal - returns the nice name of the journal """
name = wju.get_journal_name_intl('AtlantisTimes', ln=CFG_SITE_LANG)
self.assertEqual(name, 'Atlantis Times')
def test_get_journal_languages(self):
"""webjournal - returns the list of languages defined for this journal"""
lang = wju.get_journal_languages('AtlantisTimes')
self.assertEqual(lang[0], 'en')
self.assertEqual(lang[1], 'fr')
def test_get_journal_issue_grouping(self):
"""webjournal - returns the number of issue that are typically released
at the same time"""
issue = wju.get_journal_issue_grouping('AtlantisTimes')
self.assertEqual(issue, 2)
def test_get_journal_nb_issues_per_year(self):
"""webjournal - returns the default number of issues per year for this
journal"""
nb = wju.get_journal_nb_issues_per_year('AtlantisTimes')
self.assertEqual(nb, 52)
def test_get_journal_preferred_language(self):
"""webjournal - returns the most adequate language to display the
journal, given a language """
value = wju.get_journal_preferred_language('AtlantisTimes', 'fr')
self.assertEqual(value, 'fr')
value = wju.get_journal_preferred_language('AtlantisTimes', 'it')
self.assertEqual(value, 'en')
value = wju.get_journal_preferred_language('AtlantisTimes', 'hello')
self.assertEqual(value, 'en')
def test_get_unreleased_issue_hiding_mode(self):
"""webjournal - returns how unreleased issue should be treated"""
value = wju.get_unreleased_issue_hiding_mode('AtlantisTimes')
self.assertEqual(value, 'all')
def test_get_first_issue_from_config(self):
"""webjournal - returns the first issue as defined from config"""
issue = wju.get_first_issue_from_config('AtlantisTimes')
self.assertEqual(issue, '02/2009')
class TimeIssueFunctions(unittest.TestCase):
"""Functions about time, using issues"""
def test_get_current_issue(self):
"""webjournal - returns the current issue of a journal """
issue = wju.get_current_issue('en', 'AtlantisTimes')
self.assertEqual(issue, '03/2009')
def test_get_all_released_issues(self):
"""webjournal - returns the list of released issue"""
issues = wju.get_all_released_issues('AtlantisTimes')
self.assertEqual(issues[0], '03/2009')
self.assertEqual(issues[1], '02/2009')
def test_get_next_journal_issues(self):
"""webjournal - this function suggests the 'n' next issue numbers """
issues = wju.get_next_journal_issues('03/2009', 'AtlantisTimes', n=2)
self.assertEqual(issues[0], '04/2009')
self.assertEqual(issues[1], '05/2009')
def test_get_grouped_issues(self):
"""webjournal - returns all the issues grouped with a given one"""
issues = wju.get_grouped_issues('AtlantisTimes', '03/2009')
self.assertEqual(issues[0], '02/2009')
self.assertEqual(issues[1], '03/2009')
def test_get_issue_number_display(self):
"""webjournal - returns the display string for a given issue number"""
issue_nb = wju.get_issue_number_display('03/2009', 'AtlantisTimes', ln=CFG_SITE_LANG)
self.assertEqual(issue_nb, '02-03/2009')
def test_make_issue_number(self):
"""webjournal - creates a normalized issue number representation"""
issue = wju.make_issue_number('AtlantisTimes', 03, 2009, for_url_p=False)
self.assertEqual(issue, '03/2009')
issue = wju.make_issue_number('AtlantisTimes', 06, 2009, for_url_p=False)
self.assertEqual(issue, '06/2009')
issue = wju.make_issue_number('AtlantisTimes', 03, 2008, for_url_p=False)
self.assertEqual(issue, '03/2008')
def test_get_release_datetime(self):
"""webjournal - gets the date at which an issue was released from the DB"""
value = wju.get_release_datetime('03/2009', 'AtlantisTimes', ln=CFG_SITE_LANG)
self.assertEqual(value, datetime.datetime(2009, 1, 16, 0, 0))
def test_get_announcement_datetime(self):
"""webjournal - get the date at which an issue was announced through
the alert system"""
value = wju.get_announcement_datetime('03/2009', 'AtlantisTimes', ln=CFG_SITE_LANG)
self.assertEqual(value, None)
def test_datetime_to_issue(self):
"""webjournal - returns the issue corresponding to the given datetime object"""
date_value = datetime.datetime(2009, 7, 16, 13, 39, 46, 426373)
value = wju.datetime_to_issue(date_value, 'AtlantisTimes')
self.assertEqual(value, None)
def test_issue_to_datetime(self):
"""webjournal - returns the *theoretical* date of release for given issue"""
issue = wju.issue_to_datetime('03/2009', 'AtlantisTimes', granularity=None)
self.assertEqual(issue, datetime.datetime(2009, 1, 19, 0, 0))
def test_get_number_of_articles_for_issue(self):
"""webjournal - returns a dictionary with all categories and number of
articles in each category"""
value = wju.get_number_of_articles_for_issue('03/2009', 'AtlantisTimes', ln=CFG_SITE_LANG)
self.assertEqual(value.values()[0], 3)
self.assertEqual(value.values()[1], 2)
self.assertEqual(value.keys()[0], 'News')
self.assertEqual(value.keys()[1], 'Science')
def test_is_recid_in_released_issue(self):
"""webjournal - check identification of records as part of a released issue"""
for recid in xrange(1, 99):
# Not articles
self.assertEqual(wju.is_recid_in_released_issue(recid), False)
for recid in xrange(99, 104):
# Article published and well categorized/indexed
self.assertEqual(wju.is_recid_in_released_issue(recid), True)
# Even though article is not in public collection (yet?), it
# is part of a released issue
self.assertEqual(wju.is_recid_in_released_issue(111), True)
# Article is not part of public collection, and is not part of
# a released issue
self.assertEqual(wju.is_recid_in_released_issue(112), False)
def test_article_in_unreleased_issue(self):
"""webjournal - check access to unreleased article"""
# Record is not public
self.assertEqual(record_public_p(112), False)
# Unreleased article is not visible to guest
error_messages = test_web_page_content(CFG_SITE_URL + '/journal/AtlantisTimes/2009/06/News/112' ,
expected_text=["A naturalist's voyage around the world"],
unexpected_text=['Galapagos Archipelago'])
if error_messages:
self.fail(merge_error_messages(error_messages))
# Unreleased article is visible to editor
error_messages = test_web_page_content(CFG_SITE_URL + '/journal/AtlantisTimes/2009/06/News/112',
username='balthasar',
password='b123althasar',
expected_text=['Galapagos Archipelago'],
unexpected_text=['This file is restricted',
'You are not authorized'])
if error_messages:
self.fail(merge_error_messages(error_messages))
def test_restricted_article_in_released_issue(self):
"""webjournal - check access to restricted article in released issue"""
# Record is not public
self.assertEqual(record_public_p(112), False)
# Released article (even if restricted) is visible to guest
error_messages = test_web_page_content(CFG_SITE_URL + '/journal/AtlantisTimes/2009/03/Science/111' ,
expected_text=["Scissor-beak"],
unexpected_text=["A naturalist's voyage around the world"])
if error_messages:
self.fail(merge_error_messages(error_messages))
class JournalRelated(unittest.TestCase):
"""Functions about journal"""
def test_get_journal_info_path(self):
"""webjournal - returns the path to the info file of the given journal"""
info = wju.get_journal_info_path('AtlantisTimes')
path = CFG_PREFIX + '/var/cache/webjournal/AtlantisTimes/info.dat'
self.assertEqual(info, path)
def test_get_journal_article_cache_path(self):
"""webjournal - returns the path to cache file of the articles of a given issue"""
info = wju.get_journal_article_cache_path('AtlantisTimes', '03/2009')
- path = CFG_PREFIX + '/var/cache/webjournal/AtlantisTimes/03_2009_articles_cache.dat'
+ path = CFG_PREFIX + '/var/cache/webjournal/AtlantisTimes/2009/03/articles_cache.dat'
self.assertEqual(info, path)
def test_get_journal_id(self):
"""webjournal - get the id for this journal from the DB"""
jrnid = wju.get_journal_id('AtlantisTimes', ln=CFG_SITE_LANG)
self.assertEqual(jrnid, 1)
def test_guess_journal_name(self):
"""webjournal - tries to take a guess what a user was looking for on
the server if not providing a name for the journal"""
name = wju.guess_journal_name('en', journal_name=None)
self.assertEqual(name, 'AtlantisTimes' )
def test_get_journals_ids_and_names(self):
"""webjournal - returns the list of existing journals IDs and names"""
ids_names = wju.get_journals_ids_and_names()
self.assertEqual(ids_names[0].values(), [1, 'AtlantisTimes'])
self.assertEqual(ids_names[0].keys(), ['journal_id', 'journal_name'])
def test_parse_url_string(self):
"""webjournal - parses any url string given in webjournal"""
d = wju.parse_url_string("/journal/AtlantisTimes/2009/03/News/?ln=en")
self.assertEqual(d['category'], 'News')
self.assertEqual(d['issue_year'], 2009)
self.assertEqual(d['ln'], 'en')
self.assertEqual(d['issue_number'], 3)
self.assertEqual(d['journal_name'], 'AtlantisTimes')
self.assertEqual(d['issue'], '03/2009')
d = wju.parse_url_string("/journal/AtlantisTimes/2009/03/Science?ln=en")
self.assertEqual(d['category'], 'Science')
self.assertEqual(d['issue_year'], 2009)
self.assertEqual(d['ln'], 'en')
self.assertEqual(d['issue_number'], 3)
self.assertEqual(d['journal_name'], 'AtlantisTimes')
self.assertEqual(d['issue'], '03/2009')
d = wju.parse_url_string("/journal/AtlantisTimes/2009/03/News/97?ln=en")
self.assertEqual(d['category'], 'News')
self.assertEqual(d['issue_year'], 2009)
self.assertEqual(d['ln'], 'en')
self.assertEqual(d['issue_number'], 3)
self.assertEqual(d['recid'], 97)
self.assertEqual(d['journal_name'], 'AtlantisTimes')
self.assertEqual(d['issue'], '03/2009')
try:
wju.parse_url_string("/journal/fictivejournal/2009/03/News/97?ln=en")
dont_find_journal = 'not'
except:
dont_find_journal = 'ok'
self.assertEqual(dont_find_journal, 'ok')
class HtmlCachingFunction(unittest.TestCase):
"""HTML caching functions"""
def setUp(self):
"Access some URL for cache to be generated"
urllib.urlopen(CFG_SITE_URL + '/journal/AtlantisTimes/2009/03/News')
urllib.urlopen(CFG_SITE_URL + '/journal/AtlantisTimes/2009/03/News/103')
def test_get_index_page_from_cache(self):
"""webjournal - function to get an index page from the cache"""
value = wju.get_index_page_from_cache('AtlantisTimes', 'News', '03/2009', 'en')
assert("Atlantis (Timaeus)" in value)
def test_get_article_page_from_cache(self):
"""webjournal - gets an article view of a journal from cache"""
value = wju.get_article_page_from_cache('AtlantisTimes', 'News', 103, '03/2009', 'en')
assert("April 14th, 1832.—Leaving Socêgo, we rode to another estate on the Rio Macâe" in value)
def test_clear_cache_for_issue(self):
"""webjournal - clears the cache of a whole issue"""
value = wju.clear_cache_for_issue('AtlantisTimes', '03/2009')
self.assertEqual(value, True)
class FormattingElements(unittest.TestCase):
"""Test how formatting elements behave in various contexts"""
def test_language_handling_in_journal(self):
"""webjournal - check washing of ln parameter in /journal handler"""
error_messages = test_web_page_content(CFG_SITE_URL + '/journal/AtlantisTimes/2009/03/News/103?verbose=9&ln=hello' ,
expected_text=["we rode to another estate",
"The forest abounded with beautiful objects"],
unexpected_text=["Error when evaluating format element WEBJOURNAL_"])
if error_messages:
self.fail(merge_error_messages(error_messages))
def test_language_handling_in_record(self):
"""webjournal - check washing of ln parameter in /record handler"""
error_messages = test_web_page_content(CFG_SITE_URL + '/record/103?verbose=9&ln=hello' ,
expected_text=["we rode to another estate",
"The forest abounded with beautiful objects"],
unexpected_text=["Error when evaluating format element WEBJOURNAL_"])
if error_messages:
self.fail(merge_error_messages(error_messages))
def test_language_handling_in_whatsnew_widget(self):
"""webjournal - check handling of ln parameter in "what's new" widget"""
error_messages = test_web_page_content(CFG_SITE_URL + '/journal/AtlantisTimes/2009/03/News?ln=fr' ,
expected_link_label="Scissor-beak",
expected_link_target=CFG_SITE_URL + "/journal/AtlantisTimes/2009/03/Science/111?ln=fr")
if error_messages:
self.fail(merge_error_messages(error_messages))
TEST_SUITE = make_test_suite(ArticlesRelated,
CategoriesRelated,
JournalConfigVars,
TimeIssueFunctions,
JournalRelated,
HtmlCachingFunction,
FormattingElements)
if __name__ == "__main__":
run_test_suite(TEST_SUITE, warn_user=True)
diff --git a/modules/webjournal/lib/webjournal_utils.py b/modules/webjournal/lib/webjournal_utils.py
index 1405597a7..9f969c3a7 100644
--- a/modules/webjournal/lib/webjournal_utils.py
+++ b/modules/webjournal/lib/webjournal_utils.py
@@ -1,1783 +1,1809 @@
# -*- coding: utf-8 -*-
##
## This file is part of Invenio.
## Copyright (C) 2007, 2008, 2009, 2010, 2011 CERN.
##
## Invenio is free software; you can redistribute it and/or
## modify it under the terms of the GNU General Public License as
## published by the Free Software Foundation; either version 2 of the
## License, or (at your option) any later version.
##
## Invenio is distributed in the hope that it will be useful, but
## WITHOUT ANY WARRANTY; without even the implied warranty of
## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
## General Public License for more details.
##
## You should have received a copy of the GNU General Public License
## along with Invenio; if not, write to the Free Software Foundation, Inc.,
## 59 Temple Place, Suite 330, Boston, MA 02111-1307, USA.
"""
Various utilities for WebJournal, e.g. config parser, etc.
"""
import time
import datetime
import calendar
import re
import os
import cPickle
import math
import urllib
from MySQLdb import OperationalError
from xml.dom import minidom
from urlparse import urlparse
from invenio.config import \
CFG_ETCDIR, \
CFG_SITE_URL, \
CFG_CACHEDIR, \
CFG_SITE_LANG, \
CFG_ACCESS_CONTROL_LEVEL_SITE, \
CFG_SITE_SUPPORT_EMAIL, \
CFG_DEVEL_SITE, \
CFG_CERN_SITE
from invenio.dbquery import run_sql
from invenio.bibformat_engine import BibFormatObject
from invenio.search_engine import search_pattern, record_exists
from invenio.messages import gettext_set_language
from invenio.errorlib import register_exception
from invenio.urlutils import make_invenio_opener
WEBJOURNAL_OPENER = make_invenio_opener('WebJournal')
########################### REGULAR EXPRESSIONS ######################
header_pattern = re.compile('\s*(?P\s*
')
header_pattern2 = re.compile('(?P
')
para_pattern = re.compile('(?P.+?)
', re.DOTALL)
img_pattern = re.compile('\S+?)("|\'|\s).*?/>', re.DOTALL)
image_pattern = re.compile(r'''
(\S*)["']?>)?# get the link location for the image
\s*# after each tag we can have arbitrary whitespaces
# the image is always centered
\s*
\S*)\s*border=1\s*(/)?># getting the image itself
\s*
\s*
()?
(
|
|
)*# the caption can be separated by any nr of line breaks
(
\s*
\s*
(?P.*?)# getting the caption
\s*
\s*
)?''', re.DOTALL | re.VERBOSE | re.IGNORECASE )
#'
############################## FEATURED RECORDS ######################
def get_featured_records(journal_name):
"""
Returns the 'featured' records i.e. records chosen to be displayed
with an image on the main page, in the widgets section, for the
given journal.
parameter:
journal_name - (str) the name of the journal for which we want
to get the featured records
returns:
list of tuples (recid, img_url)
"""
try:
feature_file = open('%s/webjournal/%s/featured_record' % \
(CFG_ETCDIR, journal_name))
except:
return []
records = feature_file.readlines()
return [(record.split('---', 1)[0], record.split('---', 1)[1]) \
for record in records if "---" in record]
def add_featured_record(journal_name, recid, img_url):
"""
Adds the given record to the list of featured records of the given
journal.
parameters:
journal_name - (str) the name of the journal to which the record
should be added.
recid - (int) the record id of the record to be featured.
img_url - (str) a url to an image icon displayed along the
featured record.
returns:
0 if everything went ok
1 if record is already in the list
2 if other problems
"""
# Check that record is not already there
featured_records = get_featured_records(journal_name)
for featured_recid, featured_img in featured_records:
if featured_recid == str(recid):
return 1
try:
fptr = open('%s/webjournal/%s/featured_record'
% (CFG_ETCDIR, journal_name), "a")
fptr.write(str(recid) + '---' + img_url + '\n')
fptr.close()
except:
return 2
return 0
def remove_featured_record(journal_name, recid):
"""
Removes the given record from the list of featured records of the
given journal.
parameters:
journal_name - (str) the name of the journal to which the record
should be added.
recid - (int) the record id of the record to be featured.
"""
featured_records = get_featured_records(journal_name)
try:
fptr = open('%s/webjournal/%s/featured_record'
% (CFG_ETCDIR, journal_name), "w")
for featured_recid, featured_img in featured_records:
if str(featured_recid) != str(recid):
fptr.write(str(featured_recid) + '---' + featured_img + \
'\n')
fptr.close()
except:
return 1
return 0
############################ ARTICLES RELATED ########################
def get_order_dict_from_recid_list(recids, journal_name, issue_number,
newest_first=False,
newest_only=False):
"""
Returns the ordered list of input recids, for given
'issue_number'.
Since there might be several articles at the same position, the
returned structure is a dictionary with keys being order number
indicated in record metadata, and values being list of recids for
this order number (recids for one position are ordered from
highest to lowest recid).
Eg: {'1': [2390, 2386, 2385],
'3': [2388],
'2': [2389],
'4': [2387]}
Parameters:
recids - a list of all recid's that should be brought
into order
journal_name - the name of the journal
issue_number - *str* the issue_number for which we are
deriving the order
newest_first - *bool* if True, new articles should be placed
at beginning of the list. If so, their
position/order will be negative integers
newest_only - *bool* if only new articles should be returned
Returns:
ordered_records: a dictionary with the recids ordered by
keys
"""
ordered_records = {}
ordered_new_records = {}
records_without_defined_order = []
new_records_without_defined_order = []
for record in recids:
temp_rec = BibFormatObject(record)
articles_info = temp_rec.fields('773__')
for article_info in articles_info:
if article_info.get('n', '') == issue_number or \
'0' + article_info.get('n', '') == issue_number:
if article_info.has_key('c') and \
article_info['c'].isdigit():
order_number = int(article_info.get('c', ''))
if (newest_first or newest_only) and \
is_new_article(journal_name, issue_number, record):
if ordered_new_records.has_key(order_number):
ordered_new_records[order_number].append(record)
else:
ordered_new_records[order_number] = [record]
elif not newest_only:
if ordered_records.has_key(order_number):
ordered_records[order_number].append(record)
else:
ordered_records[order_number] = [record]
else:
# No order? No problem! Append it at the end.
if newest_first and is_new_article(journal_name, issue_number, record):
new_records_without_defined_order.append(record)
elif not newest_only:
records_without_defined_order.append(record)
# Append records without order at the end of the list
if records_without_defined_order:
if ordered_records:
ordered_records[max(ordered_records.keys()) + 1] = records_without_defined_order
else:
ordered_records[1] = records_without_defined_order
# Append new records without order at the end of the list of new
# records
if new_records_without_defined_order:
if ordered_new_records:
ordered_new_records[max(ordered_new_records.keys()) + 1] = new_records_without_defined_order
else:
ordered_new_records[1] = new_records_without_defined_order
# Append new records at the beginning of the list of 'old'
# records. To do so, use negative integers
if ordered_new_records:
highest_new_record_order = max(ordered_new_records.keys())
for order, new_records in ordered_new_records.iteritems():
ordered_records[- highest_new_record_order + order - 1] = new_records
for (order, records) in ordered_records.iteritems():
# Reverse so that if there are several articles at same
# positon, newest appear first
records.reverse()
return ordered_records
def get_journal_articles(journal_name, issue, category,
newest_first=False, newest_only=False):
"""
Returns the recids in given category and journal, for given issue
number. The returned recids are grouped according to their 773__c
field.
Example of returned value:
{'1': [2390, 2386, 2385],
'3': [2388],
'2': [2389],
'4': [2387]}
Parameters:
journal_name - *str* the name of the journal (as used in URLs)
issue - *str* the issue. Eg: "08/2007"
category - *str* the name of the category
newest_first - *bool* if True, new articles should be placed
at beginning of the list. If so, their
position/order will be negative integers
newest_only - *bool* if only new articles should be returned
"""
use_cache = True
current_issue = get_current_issue(CFG_SITE_LANG, journal_name)
if issue_is_later_than(issue, current_issue):
# If we are working on unreleased issue, do not use caching
# mechanism
use_cache = False
if use_cache:
cached_articles = _get_cached_journal_articles(journal_name, issue, category)
if cached_articles is not None:
ordered_articles = get_order_dict_from_recid_list(cached_articles,
journal_name,
issue,
newest_first,
newest_only)
return ordered_articles
# Retrieve the list of rules that map Category -> Search Pattern.
# Keep only the rule matching our category
config_strings = get_xml_from_config(["record/rule"], journal_name)
category_to_search_pattern_rules = config_strings["record/rule"]
try:
matching_rule = [rule.split(',', 1) for rule in \
category_to_search_pattern_rules \
if rule.split(',')[0] == category]
except:
return []
recids_issue = search_pattern(p='773__n:%s -980:DELETED' % issue)
recids_rule = search_pattern(p=matching_rule[0][1])
if issue[0] == '0':
# search for 09/ and 9/
recids_issue.union_update(search_pattern(p='773__n:%s -980:DELETED' % issue.lstrip('0')))
recids_rule.intersection_update(recids_issue)
recids = [recid for recid in recids_rule if record_exists(recid) == 1]
if use_cache:
_cache_journal_articles(journal_name, issue, category, recids)
ordered_articles = get_order_dict_from_recid_list(recids,
journal_name,
issue,
newest_first,
newest_only)
return ordered_articles
def _cache_journal_articles(journal_name, issue, category, articles):
"""
Caches given articles IDs.
"""
journal_cache_path = get_journal_article_cache_path(journal_name,
issue)
try:
journal_cache_file = open(journal_cache_path, 'r')
journal_info = cPickle.load(journal_cache_file)
journal_cache_file.close()
except cPickle.PickleError, e:
journal_info = {}
except IOError:
journal_info = {}
except EOFError:
journal_info = {}
except ValueError:
journal_info = {}
if not journal_info.has_key('journal_articles'):
journal_info['journal_articles'] = {}
journal_info['journal_articles'][category] = articles
# Create cache directory if it does not exist
journal_cache_dir = os.path.dirname(journal_cache_path)
if not os.path.exists(journal_cache_dir):
try:
os.makedirs(journal_cache_dir)
except:
return False
journal_cache_file = open(journal_cache_path, 'w')
cPickle.dump(journal_info, journal_cache_file)
journal_cache_file.close()
return True
def _get_cached_journal_articles(journal_name, issue, category):
"""
Retrieve the articles IDs cached for this journal.
Returns None if cache does not exist or more than 5 minutes old
"""
# Check if our cache is more or less up-to-date (not more than 5
# minutes old)
try:
journal_cache_path = get_journal_article_cache_path(journal_name,
issue)
last_update = os.path.getctime(journal_cache_path)
except Exception, e :
return None
now = time.time()
if (last_update + 5*60) < now:
return None
# Get from cache
try:
journal_cache_file = open(journal_cache_path, 'r')
journal_info = cPickle.load(journal_cache_file)
journal_articles = journal_info.get('journal_articles', {}).get(category, None)
journal_cache_file.close()
except cPickle.PickleError, e:
journal_articles = None
except IOError:
journal_articles = None
except EOFError:
journal_articles = None
except ValueError:
journal_articles = None
return journal_articles
def is_new_article(journal_name, issue, recid):
"""
Check if given article should be considered as new or not.
New articles are articles that have never appeared in older issues
than given one.
"""
article_found_in_older_issue = False
temp_rec = BibFormatObject(recid)
publication_blocks = temp_rec.fields('773__')
for publication_block in publication_blocks:
this_issue_number, this_issue_year = issue.split('/')
issue_number, issue_year = publication_block.get('n', '/').split('/', 1)
if int(issue_year) < int(this_issue_year):
# Found an older issue
article_found_in_older_issue = True
break
elif int(issue_year) == int(this_issue_year) and \
int(issue_number) < int(this_issue_number):
# Found an older issue
article_found_in_older_issue = True
break
return not article_found_in_older_issue
############################ CATEGORIES RELATED ######################
def get_journal_categories(journal_name, issue=None):
"""
List the categories for the given journal and issue.
Returns categories in same order as in config file.
Parameters:
journal_name - *str* the name of the journal (as used in URLs)
issue - *str* the issue. Eg:'08/2007'. If None, consider
all categories defined in journal config
"""
categories = []
current_issue = get_current_issue(CFG_SITE_LANG, journal_name)
config_strings = get_xml_from_config(["record/rule"], journal_name)
all_categories = [rule.split(',')[0] for rule in \
config_strings["record/rule"]]
if issue is None:
return all_categories
for category in all_categories:
recids = get_journal_articles(journal_name,
issue,
category)
if len(recids.keys()) > 0:
categories.append(category)
return categories
def get_category_query(journal_name, category):
"""
Returns the category definition for the given category and journal name
Parameters:
journal_name - *str* the name of the journal (as used in URLs)
categoy - *str* a category name, as found in the XML config
"""
config_strings = get_xml_from_config(["record/rule"], journal_name)
category_to_search_pattern_rules = config_strings["record/rule"]
try:
matching_rule = [rule.split(',', 1)[1].strip() for rule in \
category_to_search_pattern_rules \
if rule.split(',')[0] == category]
except:
return None
return matching_rule[0]
######################### JOURNAL CONFIG VARS ######################
cached_parsed_xml_config = {}
def get_xml_from_config(nodes, journal_name):
"""
Returns values from the journal configuration file.
The needed values can be specified by node name, or by a hierarchy
of nodes names using '/' as character to mean 'descendant of'.
Eg. 'record/rule' to get all the values of 'rule' tags inside the
'record' node
Returns a dictionary with a key for each query and a list of
strings (innerXml) results for each key.
Has a special field "config_fetching_error" that returns an error when
something has gone wrong.
"""
# Get and open the config file
results = {}
if cached_parsed_xml_config.has_key(journal_name):
config_file = cached_parsed_xml_config[journal_name]
else:
config_path = '%s/webjournal/%s/%s-config.xml' % \
(CFG_ETCDIR, journal_name, journal_name)
config_file = minidom.Document
try:
config_file = minidom.parse("%s" % config_path)
except:
# todo: raise exception "error: no config file found"
results["config_fetching_error"] = "could not find config file"
return results
else:
cached_parsed_xml_config[journal_name] = config_file
for node_path in nodes:
node = config_file
for node_path_component in node_path.split('/'):
# pylint: disable=E1103
# The node variable can be rewritten in the loop and therefore
# its type can change.
if node != config_file and node.length > 0:
# We have a NodeList object: consider only first child
node = node.item(0)
# pylint: enable=E1103
try:
node = node.getElementsByTagName(node_path_component)
except:
# WARNING, config did not have such value
node = []
break
results[node_path] = []
for result in node:
try:
result_string = result.firstChild.toxml(encoding="utf-8")
except:
# WARNING, config did not have such value
continue
results[node_path].append(result_string)
return results
def get_journal_issue_field(journal_name):
"""
Returns the MARC field in which this journal expects to find
the issue number. Read this from the journal config file
Parameters:
journal_name - *str* the name of the journal (as used in URLs)
"""
config_strings = get_xml_from_config(["issue_number"], journal_name)
try:
issue_field = config_strings["issue_number"][0]
except:
issue_field = '773__n'
return issue_field
def get_journal_css_url(journal_name, type='screen'):
"""
Returns URL to this journal's CSS.
Parameters:
journal_name - *str* the name of the journal (as used in URLs)
type - *str* 'screen' or 'print', depending on the kind
of CSS
"""
config_strings = get_xml_from_config([type], journal_name)
css_path = ''
try:
css_path = config_strings["screen"][0]
except Exception:
register_exception(req=None,
suffix="No css file for journal %s. Is this right?" % \
journal_name)
return CFG_SITE_URL + '/' + css_path
def get_journal_submission_params(journal_name):
"""
Returns the (doctype, identifier element, identifier field) for
the submission of articles in this journal, so that it is possible
to build direct submission links.
Parameter:
journal_name - *str* the name of the journal (as used in URLs)
"""
doctype = ''
identifier_field = ''
identifier_element = ''
config_strings = get_xml_from_config(["submission/doctype"], journal_name)
if config_strings.get('submission/doctype', ''):
doctype = config_strings['submission/doctype'][0]
config_strings = get_xml_from_config(["submission/identifier_element"], journal_name)
if config_strings.get('submission/identifier_element', ''):
identifier_element = config_strings['submission/identifier_element'][0]
config_strings = get_xml_from_config(["submission/identifier_field"], journal_name)
if config_strings.get('submission/identifier_field', ''):
identifier_field = config_strings['submission/identifier_field'][0]
else:
identifier_field = '037__a'
return (doctype, identifier_element, identifier_field)
def get_journal_draft_keyword_to_remove(journal_name):
"""
Returns the keyword that should be removed from the article
metadata in order to move the article from Draft to Ready
"""
config_strings = get_xml_from_config(["draft_keyword"], journal_name)
if config_strings.get('draft_keyword', ''):
return config_strings['draft_keyword'][0]
return ''
def get_journal_alert_sender_email(journal_name):
"""
Returns the email address that should be used as send of the alert
email.
If not specified, use CFG_SITE_SUPPORT_EMAIL
"""
config_strings = get_xml_from_config(["alert_sender"], journal_name)
if config_strings.get('alert_sender', ''):
return config_strings['alert_sender'][0]
return CFG_SITE_SUPPORT_EMAIL
def get_journal_alert_recipient_email(journal_name):
"""
Returns the default email address of the recipients of the email
Return a string of comma-separated emails.
"""
if CFG_DEVEL_SITE:
# To be on the safe side, do not return the default alert recipients.
return ''
config_strings = get_xml_from_config(["alert_recipients"], journal_name)
if config_strings.get('alert_recipients', ''):
return config_strings['alert_recipients'][0]
return ''
def get_journal_collection_to_refresh_on_release(journal_name):
"""
Returns the list of collection to update (WebColl) upon release of
an issue.
"""
from invenio.search_engine import collection_reclist_cache
config_strings = get_xml_from_config(["update_on_release/collection"], journal_name)
return [coll for coll in config_strings.get('update_on_release/collection', []) if \
collection_reclist_cache.cache.has_key(coll)]
def get_journal_index_to_refresh_on_release(journal_name):
"""
Returns the list of indexed to update (BibIndex) upon release of
an issue.
"""
from invenio.bibindex_engine import get_index_id_from_index_name
config_strings = get_xml_from_config(["update_on_release/index"], journal_name)
return [index for index in config_strings.get('update_on_release/index', []) if \
get_index_id_from_index_name(index) != '']
def get_journal_template(template, journal_name, ln=CFG_SITE_LANG):
"""
Returns the journal templates name for the given template type
Raise an exception if template cannot be found.
"""
from invenio.webjournal_config import \
InvenioWebJournalTemplateNotFoundError
config_strings = get_xml_from_config([template], journal_name)
try:
index_page_template = 'webjournal' + os.sep + \
config_strings[template][0]
except:
raise InvenioWebJournalTemplateNotFoundError(ln,
journal_name,
template)
return index_page_template
def get_journal_name_intl(journal_name, ln=CFG_SITE_LANG):
"""
Returns the nice name of the journal, translated if possible
"""
_ = gettext_set_language(ln)
config_strings = get_xml_from_config(["niceName"], journal_name)
if config_strings.get('niceName', ''):
return _(config_strings['niceName'][0])
return ''
def get_journal_languages(journal_name):
"""
Returns the list of languages defined for this journal
"""
config_strings = get_xml_from_config(["languages"], journal_name)
if config_strings.get('languages', ''):
return [ln.strip() for ln in \
config_strings['languages'][0].split(',')]
return []
def get_journal_issue_grouping(journal_name):
"""
Returns the number of issue that are typically released at the
same time.
This is used if every two weeks you release an issue that should
contains issue of next 2 weeks (eg. at week 16, you relase an
issue named '16-17/2009')
This number should help in the admin interface to guess how to
release the next issue (can be overidden by user).
"""
config_strings = get_xml_from_config(["issue_grouping"], journal_name)
if config_strings.get('issue_grouping', ''):
issue_grouping = config_strings['issue_grouping'][0]
if issue_grouping.isdigit() and int(issue_grouping) > 0:
return int(issue_grouping)
return 1
def get_journal_nb_issues_per_year(journal_name):
"""
Returns the default number of issues per year for this journal.
This number should help in the admin interface to guess the next
issue number (can be overidden by user).
"""
config_strings = get_xml_from_config(["issues_per_year"], journal_name)
if config_strings.get('issues_per_year', ''):
issues_per_year = config_strings['issues_per_year'][0]
if issues_per_year.isdigit() and int(issues_per_year) > 0:
return int(issues_per_year)
return 52
def get_journal_preferred_language(journal_name, ln):
"""
Returns the most adequate language to display the journal, given a
language.
"""
languages = get_journal_languages(journal_name)
if ln in languages:
return ln
elif CFG_SITE_LANG in languages:
return CFG_SITE_LANG
elif languages:
return languages
else:
return CFG_SITE_LANG
def get_unreleased_issue_hiding_mode(journal_name):
"""
Returns how unreleased issue should be treated. Can be one of the
following string values:
'future' - only future unreleased issues are hidden. Past
unreleased one can be viewed
'all' - any unreleased issue (past and future) have to be
hidden
- 'none' - no unreleased issue is hidden
"""
config_strings = get_xml_from_config(["hide_unreleased_issues"], journal_name)
if config_strings.get('hide_unreleased_issues', ''):
hide_unreleased_issues = config_strings['hide_unreleased_issues'][0]
if hide_unreleased_issues in ['future', 'all', 'none']:
return hide_unreleased_issues
return 'all'
def get_first_issue_from_config(journal_name):
"""
Returns the first issue as defined from config. This should only
be useful when no issue have been released.
If not specified, returns the issue made of current week number
and year.
"""
config_strings = get_xml_from_config(["first_issue"], journal_name)
if config_strings.has_key('first_issue'):
return config_strings['first_issue'][0]
return time.strftime("%W/%Y", time.localtime())
######################## TIME / ISSUE FUNCTIONS ######################
def get_current_issue(ln, journal_name):
"""
Returns the current issue of a journal as a string.
Current issue is the latest released issue.
"""
journal_id = get_journal_id(journal_name, ln)
try:
current_issue = run_sql("""SELECT issue_number
FROM jrnISSUE
WHERE date_released <= NOW()
AND id_jrnJOURNAL=%s
ORDER BY date_released DESC
LIMIT 1""",
(journal_id,))[0][0]
except:
# start the first journal ever
current_issue = get_first_issue_from_config(journal_name)
run_sql("""INSERT INTO jrnISSUE (id_jrnJOURNAL, issue_number, issue_display)
VALUES(%s, %s, %s)""",
(journal_id,
current_issue,
current_issue))
return current_issue
def get_all_released_issues(journal_name):
"""
Returns the list of released issue, ordered by release date
Note that it only includes the issues that are considered as
released in the DB: it will not for example include articles that
have been imported in the system but not been released
"""
journal_id = get_journal_id(journal_name)
res = run_sql("""SELECT issue_number
FROM jrnISSUE
WHERE id_jrnJOURNAL = %s
AND UNIX_TIMESTAMP(date_released) != 0
ORDER BY date_released DESC""",
(journal_id,))
if res:
return [row[0] for row in res]
else:
return []
def get_next_journal_issues(current_issue_number, journal_name, n=2):
"""
This function suggests the 'n' next issue numbers
"""
number, year = current_issue_number.split('/', 1)
number = int(number)
year = int(year)
number_issues_per_year = get_journal_nb_issues_per_year(journal_name)
next_issues = [make_issue_number(journal_name,
((number - 1 + i) % (number_issues_per_year)) + 1,
year + ((number - 1 + i) / number_issues_per_year)) \
for i in range(1, n + 1)]
return next_issues
def get_grouped_issues(journal_name, issue_number):
"""
Returns all the issues grouped with a given one.
Issues are sorted from the oldest to newest one.
"""
grouped_issues = []
journal_id = get_journal_id(journal_name, CFG_SITE_LANG)
issue_display = get_issue_number_display(issue_number, journal_name)
res = run_sql("""SELECT issue_number
FROM jrnISSUE
WHERE id_jrnJOURNAL=%s AND issue_display=%s""",
(journal_id,
issue_display))
if res:
grouped_issues = [row[0] for row in res]
grouped_issues.sort(compare_issues)
return grouped_issues
def compare_issues(issue1, issue2):
"""
Comparison function for issues.
Returns:
-1 if issue1 is older than issue2
0 if issues are equal
1 if issue1 is newer than issue2
"""
issue1_number, issue1_year = issue1.split('/', 1)
issue2_number, issue2_year = issue2.split('/', 1)
if int(issue1_year) == int(issue2_year):
return cmp(int(issue1_number), int(issue2_number))
else:
return cmp(int(issue1_year), int(issue2_year))
def issue_is_later_than(issue1, issue2):
"""
Returns true if issue1 is later than issue2
"""
issue_number1, issue_year1 = issue1.split('/', 1)
issue_number2, issue_year2 = issue2.split('/', 1)
if int(issue_year1) > int(issue_year2):
return True
elif int(issue_year1) == int(issue_year2):
return int(issue_number1) > int(issue_number2)
else:
return False
def get_issue_number_display(issue_number, journal_name,
ln=CFG_SITE_LANG):
"""
Returns the display string for a given issue number.
"""
journal_id = get_journal_id(journal_name, ln)
issue_display = run_sql("""SELECT issue_display
FROM jrnISSUE
WHERE issue_number=%s
AND id_jrnJOURNAL=%s""",
(issue_number, journal_id))
if issue_display:
return issue_display[0][0]
else:
# Not yet released...
return issue_number
def make_issue_number(journal_name, number, year, for_url_p=False):
"""
Creates a normalized issue number representation with given issue
number (as int or str) and year (as int or str).
Reverse the year and number if for_url_p is True
"""
number_issues_per_year = get_journal_nb_issues_per_year(journal_name)
precision = len(str(number_issues_per_year))
number = int(str(number))
year = int(str(year))
if for_url_p:
return ("%i/%0" + str(precision) + "i") % \
(year, number)
else:
return ("%0" + str(precision) + "i/%i") % \
(number, year)
def get_release_datetime(issue, journal_name, ln=CFG_SITE_LANG):
"""
Gets the date at which an issue was released from the DB.
Returns None if issue has not yet been released.
See issue_to_datetime() to get the *theoretical* release time of an
issue.
"""
journal_id = get_journal_id(journal_name, ln)
try:
release_date = run_sql("""SELECT date_released
FROM jrnISSUE
WHERE issue_number=%s
AND id_jrnJOURNAL=%s""",
(issue, journal_id))[0][0]
except:
return None
if release_date:
return release_date
else:
return None
def get_announcement_datetime(issue, journal_name, ln=CFG_SITE_LANG):
"""
Get the date at which an issue was announced through the alert system.
Return None if not announced
"""
journal_id = get_journal_id(journal_name, ln)
try:
announce_date = run_sql("""SELECT date_announced
FROM jrnISSUE
WHERE issue_number=%s
AND id_jrnJOURNAL=%s""",
(issue, journal_id))[0][0]
except:
return None
if announce_date:
return announce_date
else:
return None
def datetime_to_issue(issue_datetime, journal_name):
"""
Returns the issue corresponding to the given datetime object.
If issue_datetime is too far in the future or in the past, gives
the best possible matching issue, or None, if it does not seem to
exist.
#If issue_datetime is too far in the future, return the latest
#released issue.
#If issue_datetime is too far in the past, return None
Parameters:
issue_datetime - *datetime* date of the issue to be retrieved
journal_name - *str* the name of the journal (as used in URLs)
"""
issue_number = None
journal_id = get_journal_id(journal_name)
# Try to discover how much days an issue is valid
nb_issues_per_year = get_journal_nb_issues_per_year(journal_name)
this_year_number_of_days = 365
if calendar.isleap(issue_datetime.year):
this_year_number_of_days = 366
issue_day_lifetime = math.ceil(float(this_year_number_of_days)/nb_issues_per_year)
res = run_sql("""SELECT issue_number, date_released
FROM jrnISSUE
WHERE date_released < %s
AND id_jrnJOURNAL = %s
ORDER BY date_released DESC LIMIT 1""",
(issue_datetime, journal_id))
if res and res[0][1]:
issue_number = res[0][0]
issue_release_date = res[0][1]
# Check that the result is not too far in the future:
if issue_release_date + datetime.timedelta(issue_day_lifetime) < issue_datetime:
# In principle, the latest issue will no longer be valid
# at that time
return None
else:
# Mmh, are we too far in the past? This can happen in the case
# of articles that have been imported in the system but never
# considered as 'released' in the database. So we should still
# try to approximate/match an issue:
if round(issue_day_lifetime) in [6, 7, 8]:
# Weekly issues. We can use this information to better
# match the issue number
issue_nb = int(issue_datetime.strftime('%W')) # = week number
else:
# Compute the number of days since beginning of year, and
# divide by the lifetime of an issue: we get the
# approximate issue_number
issue_nb = math.ceil((int(issue_datetime.strftime('%j')) / issue_day_lifetime))
issue_number = ("%0" + str(len(str(nb_issues_per_year)))+ "i/%i") % (issue_nb, issue_datetime.year)
# Now check if this issue exists in the system for this
# journal
if not get_journal_categories(journal_name, issue_number):
# This issue did not exist
return None
return issue_number
DAILY = 1
WEEKLY = 2
MONTHLY = 3
def issue_to_datetime(issue_number, journal_name, granularity=None):
"""
Returns the *theoretical* date of release for given issue: useful
if you release on Friday, but the issue date of the journal
should correspond to the next Monday.
This will correspond to the next day/week/month, depending on the
number of issues per year (or the 'granularity' if specified) and
the release time (if close to the end of a period defined by the
granularity, consider next period since release is made a bit in
advance).
See get_release_datetime() for the *real* release time of an issue
THIS FUNCTION SHOULD ONLY BE USED FOR INFORMATIVE DISPLAY PURPOSE,
AS IT GIVES APPROXIMATIVE RESULTS. Do not use it to make decisions.
Parameters:
issue_number - *str* issue number to consider
journal_name - *str* the name of the journal (as used in URLs)
granularity - *int* the granularity to consider
"""
# If we have released, we can use this information. Otherwise we
# have to approximate.
issue_date = get_release_datetime(issue_number, journal_name)
if not issue_date:
# Approximate release date
number, year = issue_number.split('/')
number = int(number)
year = int(year)
nb_issues_per_year = get_journal_nb_issues_per_year(journal_name)
this_year_number_of_days = 365
if calendar.isleap(year):
this_year_number_of_days = 366
issue_day_lifetime = float(this_year_number_of_days)/nb_issues_per_year
# Compute from beginning of the year
issue_date = datetime.datetime(year, 1, 1) + \
datetime.timedelta(days=int(round((number - 1) * issue_day_lifetime)))
# Okay, but if last release is not too far in the past, better
# compute from the release.
current_issue = get_current_issue(CFG_SITE_LANG, journal_name)
current_issue_time = get_release_datetime(current_issue, journal_name)
if current_issue_time.year == issue_date.year:
current_issue_number, current_issue_year = current_issue.split('/')
current_issue_number = int(current_issue_number)
# Compute from last release
issue_date = current_issue_time + \
datetime.timedelta(days=int((number - current_issue_number) * issue_day_lifetime))
# If granularity is not specifed, deduce from config
if granularity is None:
nb_issues_per_year = get_journal_nb_issues_per_year(journal_name)
if nb_issues_per_year > 250:
granularity = DAILY
elif nb_issues_per_year > 40:
granularity = WEEKLY
else:
granularity = MONTHLY
# Now we can adapt the date to match the granularity
if granularity == DAILY:
if issue_date.hour >= 15:
# If released after 3pm, consider it is the issue of the next
# day
issue_date = issue_date + datetime.timedelta(days=1)
elif granularity == WEEKLY:
(year, week_nb, day_nb) = issue_date.isocalendar()
if day_nb > 4:
# If released on Fri, Sat or Sun, consider that it is next
# week's issue.
issue_date = issue_date + datetime.timedelta(weeks=1)
# Get first day of the week
issue_date = issue_date - datetime.timedelta(days=issue_date.weekday())
else:
if issue_date.day > 22:
# If released last week of the month, consider release for
# next month
issue_date = issue_date.replace(month=issue_date.month+1)
date_string = issue_date.strftime("%Y %m 1")
issue_date = datetime.datetime(*(time.strptime(date_string, "%Y %m %d")[0:6]))
return issue_date
def get_number_of_articles_for_issue(issue, journal_name, ln=CFG_SITE_LANG):
"""
Function that returns a dictionary with all categories and number of
articles in each category.
"""
all_articles = {}
categories = get_journal_categories(journal_name, issue)
for category in categories:
all_articles[category] = len(get_journal_articles(journal_name, issue, category))
return all_articles
########################## JOURNAL RELATED ###########################
def get_journal_info_path(journal_name):
"""
Returns the path to the info file of the given journal. The info
file should be used to get information about a journal when database
is not available.
Returns None if path cannot be determined
"""
# We must make sure we don't try to read outside of webjournal
# cache dir
info_path = os.path.abspath("%s/webjournal/%s/info.dat" % \
(CFG_CACHEDIR, journal_name))
if info_path.startswith(CFG_CACHEDIR + '/webjournal/'):
return info_path
else:
return None
def get_journal_article_cache_path(journal_name, issue):
"""
Returns the path to cache file of the articles of a given issue
Returns None if path cannot be determined
"""
# We must make sure we don't try to read outside of webjournal
# cache dir
- cache_path = os.path.abspath("%s/webjournal/%s/%s_articles_cache.dat" % \
+ issue_number, year = issue.replace('/', '_').split('_', 1)
+ cache_path = os.path.abspath("%s/webjournal/%s/%s/%s/articles_cache.dat" % \
(CFG_CACHEDIR, journal_name,
- issue.replace('/', '_')))
+ year, issue_number))
if cache_path.startswith(CFG_CACHEDIR + '/webjournal/'):
return cache_path
else:
return None
def get_journal_id(journal_name, ln=CFG_SITE_LANG):
"""
Get the id for this journal from the DB. If DB is down, try to get
from cache.
"""
journal_id = None
from invenio.webjournal_config import InvenioWebJournalJournalIdNotFoundDBError
if CFG_ACCESS_CONTROL_LEVEL_SITE == 2:
# do not connect to the database as the site is closed for
# maintenance:
journal_info_path = get_journal_info_path(journal_name)
try:
journal_info_file = open(journal_info_path, 'r')
journal_info = cPickle.load(journal_info_file)
journal_id = journal_info.get('journal_id', None)
except cPickle.PickleError, e:
journal_id = None
except IOError:
journal_id = None
except ValueError:
journal_id = None
else:
try:
res = run_sql("SELECT id FROM jrnJOURNAL WHERE name=%s",
(journal_name,))
if len(res) > 0:
journal_id = res[0][0]
except OperationalError, e:
# Cannot connect to database. Try to read from cache
journal_info_path = get_journal_info_path(journal_name)
try:
journal_info_file = open(journal_info_path, 'r')
journal_info = cPickle.load(journal_info_file)
journal_id = journal_info['journal_id']
except cPickle.PickleError, e:
journal_id = None
except IOError:
journal_id = None
except ValueError:
journal_id = None
if journal_id is None:
raise InvenioWebJournalJournalIdNotFoundDBError(ln, journal_name)
return journal_id
def guess_journal_name(ln, journal_name=None):
"""
Tries to take a guess what a user was looking for on the server if
not providing a name for the journal, or if given journal name
does not match case of original journal.
"""
from invenio.webjournal_config import InvenioWebJournalNoJournalOnServerError
from invenio.webjournal_config import InvenioWebJournalNoNameError
journals_id_and_names = get_journals_ids_and_names()
if len(journals_id_and_names) == 0:
raise InvenioWebJournalNoJournalOnServerError(ln)
elif not journal_name and \
journals_id_and_names[0].has_key('journal_name'):
return journals_id_and_names[0]['journal_name']
elif len(journals_id_and_names) > 0:
possible_journal_names = [journal_id_and_name['journal_name'] for journal_id_and_name \
in journals_id_and_names \
if journal_id_and_name.get('journal_name', '').lower() == journal_name.lower()]
if possible_journal_names:
return possible_journal_names[0]
else:
raise InvenioWebJournalNoNameError(ln)
else:
raise InvenioWebJournalNoNameError(ln)
def get_journals_ids_and_names():
"""
Returns the list of existing journals IDs and names. Try to read
from the DB, or from cache if DB is not accessible.
"""
journals = []
if CFG_ACCESS_CONTROL_LEVEL_SITE == 2:
# do not connect to the database as the site is closed for
# maintenance:
files = os.listdir("%s/webjournal" % CFG_CACHEDIR)
info_files = [path + os.sep + 'info.dat' for path in files if \
os.path.isdir(path) and \
os.path.exists(path + os.sep + 'info.dat')]
for info_file in info_files:
try:
journal_info_file = open(info_file, 'r')
journal_info = cPickle.load(journal_info_file)
journal_id = journal_info.get('journal_id', None)
journal_name = journal_info.get('journal_name', None)
current_issue = journal_info.get('current_issue', None)
if journal_id is not None and \
journal_name is not None:
journals.append({'journal_id': journal_id,
'journal_name': journal_name,
'current_issue': current_issue})
except cPickle.PickleError, e:
# Well, can't do anything...
continue
except IOError:
# Well, can't do anything...
continue
except ValueError:
continue
else:
try:
res = run_sql("SELECT id, name FROM jrnJOURNAL ORDER BY id")
for journal_id, journal_name in res:
journals.append({'journal_id': journal_id,
'journal_name': journal_name})
except OperationalError, e:
# Cannot connect to database. Try to read from cache
files = os.listdir("%s/webjournal" % CFG_CACHEDIR)
info_files = [path + os.sep + 'info.dat' for path in files if \
os.path.isdir(path) and \
os.path.exists(path + os.sep + 'info.dat')]
for info_file in info_files:
try:
journal_info_file = open(info_file, 'r')
journal_info = cPickle.load(journal_info_file)
journal_id = journal_info.get('journal_id', None)
journal_name = journal_info.get('journal_name', None)
current_issue = journal_info.get('current_issue', None)
if journal_id is not None and \
journal_name is not None:
journals.append({'journal_id': journal_id,
'journal_name': journal_name,
'current_issue': current_issue})
except cPickle.PickleError, e:
# Well, can't do anything...
continue
except IOError:
# Well, can't do anything...
continue
except ValueError:
continue
return journals
def parse_url_string(uri):
"""
Centralized function to parse any url string given in
webjournal. Useful to retrieve current category, journal,
etc. from within format elements
The webjournal interface handler should already have cleaned the
URI beforehand, so that journal name exist, issue number is
correct, etc. The only remaining problem might be due to the
capitalization of journal name in contact, search and popup pages,
so clean the journal name. Note that language is also as returned
from the URL, which might need to be filtered to match available
languages (WebJournal elements can rely in bfo.lang to retrieve
washed language)
returns:
args: all arguments in dict form
"""
args = {'journal_name' : '',
'issue_year' : '',
'issue_number' : None,
'issue' : None,
'category' : '',
'recid' : -1,
'verbose' : 0,
'ln' : CFG_SITE_LANG,
'archive_year' : None,
'archive_search': ''}
if not uri.startswith('/journal'):
# Mmh, incorrect context. Still, keep language if available
url_params = urlparse(uri)[4]
args['ln'] = dict([part.split('=') for part in url_params.split('&') \
if len(part.split('=')) == 2]).get('ln', CFG_SITE_LANG)
return args
# Take everything after journal and before first question mark
splitted_uri = uri.split('journal', 1)
second_part = splitted_uri[1]
splitted_uri = second_part.split('?')
uri_middle_part = splitted_uri[0]
uri_arguments = ''
if len(splitted_uri) > 1:
uri_arguments = splitted_uri[1]
arg_list = uri_arguments.split("&")
args['ln'] = CFG_SITE_LANG
args['verbose'] = 0
for arg_pair in arg_list:
arg_and_value = arg_pair.split('=')
if len(arg_and_value) == 2:
if arg_and_value[0] == 'ln':
args['ln'] = arg_and_value[1]
elif arg_and_value[0] == 'verbose' and \
arg_and_value[1].isdigit():
args['verbose'] = int(arg_and_value[1])
elif arg_and_value[0] == 'archive_year' and \
arg_and_value[1].isdigit():
args['archive_year'] = int(arg_and_value[1])
elif arg_and_value[0] == 'archive_search':
args['archive_search'] = arg_and_value[1]
elif arg_and_value[0] == 'name':
args['journal_name'] = guess_journal_name(args['ln'],
arg_and_value[1])
arg_list = uri_middle_part.split("/")
if len(arg_list) > 1 and arg_list[1] not in ['search', 'contact', 'popup']:
args['journal_name'] = urllib.unquote(arg_list[1])
elif arg_list[1] not in ['search', 'contact', 'popup']:
args['journal_name'] = guess_journal_name(args['ln'],
args['journal_name'])
cur_issue = get_current_issue(args['ln'], args['journal_name'])
if len(arg_list) > 2:
try:
args['issue_year'] = int(urllib.unquote(arg_list[2]))
except:
args['issue_year'] = int(cur_issue.split('/')[1])
else:
args['issue'] = cur_issue
args['issue_year'] = int(cur_issue.split('/')[1])
args['issue_number'] = int(cur_issue.split('/')[0])
if len(arg_list) > 3:
try:
args['issue_number'] = int(urllib.unquote(arg_list[3]))
except:
args['issue_number'] = int(cur_issue.split('/')[0])
args['issue'] = make_issue_number(args['journal_name'],
args['issue_number'],
args['issue_year'])
if len(arg_list) > 4:
args['category'] = urllib.unquote(arg_list[4])
if len(arg_list) > 5:
try:
args['recid'] = int(urllib.unquote(arg_list[5]))
except:
pass
args['ln'] = get_journal_preferred_language(args['journal_name'],
args['ln'])
# FIXME : wash arguments?
return args
def make_journal_url(current_uri, custom_parameters=None):
"""
Create a URL, using the current URI and overriding values
with the given custom_parameters
Parameters:
current_uri - *str* the current full URI
custom_parameters - *dict* a dictionary of parameters that
should override those of curent_uri
"""
if not custom_parameters:
custom_parameters = {}
default_params = parse_url_string(current_uri)
for key, value in custom_parameters.iteritems():
# Override default params with custom params
default_params[key] = str(value)
uri = CFG_SITE_URL + '/journal/'
if default_params['journal_name']:
uri += urllib.quote(default_params['journal_name']) + '/'
if default_params['issue_year'] and default_params['issue_number']:
uri += make_issue_number(default_params['journal_name'],
default_params['issue_number'],
default_params['issue_year'],
for_url_p=True) + '/'
if default_params['category']:
uri += urllib.quote(default_params['category'])
if default_params['recid'] and \
default_params['recid'] != -1:
uri += '/' + str(default_params['recid'])
printed_question_mark = False
if default_params['ln']:
uri += '?ln=' + default_params['ln']
printed_question_mark = True
if default_params['verbose'] != 0:
if printed_question_mark:
uri += '&verbose=' + str(default_params['verbose'])
else:
uri += '?verbose=' + str(default_params['verbose'])
return uri
############################ HTML CACHING FUNCTIONS ############################
def cache_index_page(html, journal_name, category, issue, ln):
"""
Caches the index page main area of a Bulletin
(right hand menu cannot be cached)
+ @return: tuple (path to cache file (or None), message)
"""
issue = issue.replace("/", "_")
+ issue_number, year = issue.split("_", 1)
category = category.replace(" ", "")
- cache_path = os.path.abspath('%s/webjournal/%s/%s_index_%s_%s.html' % \
+ cache_path = os.path.abspath('%s/webjournal/%s/%s/%s/index_%s_%s.html' % \
(CFG_CACHEDIR, journal_name,
- issue, category,
+ year, issue_number, category,
ln))
if not cache_path.startswith(CFG_CACHEDIR + '/webjournal'):
# Mmh, not accessing correct path. Stop caching
- return False
+ return (None, 'Trying to cache at wrong location: %s' % cache_path)
+
+ cache_path_dir = os.path.dirname(cache_path)
+ try:
+ if not os.path.isdir(cache_path_dir):
+ os.makedirs(cache_path_dir)
+ cached_file = open(cache_path, "w")
+ cached_file.write(html)
+ cached_file.close()
+ except Exception, e:
+ register_exception(req=None,
+ prefix="Could not store index page cache",
+ alert_admin=True)
+ return (None, e)
- cache_path_dir = '%s/webjournal/%s' % (CFG_CACHEDIR, journal_name)
- if not os.path.isdir(cache_path_dir):
- os.makedirs(cache_path_dir)
- cached_file = open(cache_path, "w")
- cached_file.write(html)
- cached_file.close()
+ return (cache_path, '')
def get_index_page_from_cache(journal_name, category, issue, ln):
"""
Function to get an index page from the cache.
False if not in cache.
"""
issue = issue.replace("/", "_")
+ issue_number, year = issue.split("_", 1)
category = category.replace(" ", "")
- cache_path = os.path.abspath('%s/webjournal/%s/%s_index_%s_%s.html' % \
+ cache_path = os.path.abspath('%s/webjournal/%s/%s/%s/index_%s_%s.html' % \
(CFG_CACHEDIR, journal_name,
- issue, category, ln))
+ year, issue_number, category, ln))
if not cache_path.startswith(CFG_CACHEDIR + '/webjournal'):
# Mmh, not accessing correct path. Stop reading cache
return False
try:
cached_file = open(cache_path).read()
except:
return False
return cached_file
def cache_article_page(html, journal_name, category, recid, issue, ln):
"""
Caches an article view of a journal.
+
+ If cache cannot be written, a warning is reported to the admin.
+ @return: tuple (path to cache file (or None), message)
"""
issue = issue.replace("/", "_")
+ issue_number, year = issue.split("_", 1)
category = category.replace(" ", "")
- cache_path = os.path.abspath('%s/webjournal/%s/%s_article_%s_%s_%s.html' % \
+ cache_path = os.path.abspath('%s/webjournal/%s/%s/%s/article_%s_%s_%s.html' % \
(CFG_CACHEDIR, journal_name,
- issue, category, recid, ln))
+ year, issue_number, category, recid, ln))
if not cache_path.startswith(CFG_CACHEDIR + '/webjournal'):
# Mmh, not accessing correct path. Stop caching
- return
- cache_path_dir = '%s/webjournal/%s' % (CFG_CACHEDIR, journal_name)
- if not os.path.isdir(cache_path_dir):
- os.makedirs(cache_path_dir)
- cached_file = open(cache_path, "w")
- cached_file.write(html)
- cached_file.close()
+ return (None, 'Trying to cache at wrong location: %s' % cache_path)
+
+ cache_path_dir = os.path.dirname(cache_path)
+ try:
+ if not os.path.isdir(cache_path_dir):
+ os.makedirs(cache_path_dir)
+ cached_file = open(cache_path, "w")
+ cached_file.write(html)
+ cached_file.close()
+ except Exception, e:
+ register_exception(req=None,
+ prefix="Could not store article cache",
+ alert_admin=True)
+ return (None, e)
+
+ return (cache_path_dir, '')
NOT_FOR_ALERT_COMMENTS_RE = re.compile('.*?', re.IGNORECASE | re.DOTALL)
def get_article_page_from_cache(journal_name, category, recid, issue, ln, bfo=None):
"""
Gets an article view of a journal from cache.
False if not in cache.
"""
issue = issue.replace("/", "_")
+ issue_number, year = issue.split("_", 1)
category = category.replace(" ", "")
- cache_path = os.path.abspath('%s/webjournal/%s/%s_article_%s_%s_%s.html' % \
+ cache_path = os.path.abspath('%s/webjournal/%s/%s/%s/article_%s_%s_%s.html' % \
(CFG_CACHEDIR, journal_name,
- issue, category, recid, ln))
+ year, issue_number, category, recid, ln))
if not cache_path.startswith(CFG_CACHEDIR + '/webjournal'):
# Mmh, not accessing correct path. Stop reading cache
return False
try:
cached_file = open(cache_path).read()
except:
return False
if CFG_CERN_SITE and bfo:
try:
from invenio.bibformat_elements import bfe_webjournal_cern_toolbar
cached_file = NOT_FOR_ALERT_COMMENTS_RE.sub(bfe_webjournal_cern_toolbar.format_element(bfo), cached_file, 1)
except ImportError, e:
pass
return cached_file
def clear_cache_for_article(journal_name, category, recid, issue):
"""
Resets the cache for an article (e.g. after an article has been
modified)
"""
issue = issue.replace("/", "_")
+ issue_number, year = issue.split("_", 1)
category = category.replace(" ", "")
cache_path = os.path.abspath('%s/webjournal/%s/' %
(CFG_CACHEDIR, journal_name))
if not cache_path.startswith(CFG_CACHEDIR + '/webjournal'):
# Mmh, not accessing correct path. Stop deleting cache
return False
# try to delete the article cached file
try:
- os.remove('%s/webjournal/%s/%s_article_%s_%s_en.html' %
- (CFG_CACHEDIR, journal_name, issue, category, recid))
+ os.remove('%s/webjournal/%s/%s/%s/article_%s_%s_en.html' %
+ (CFG_CACHEDIR, journal_name, year, issue_number, category, recid))
except:
pass
try:
- os.remove('%s/webjournal/%s/%s_article_%s_%s_fr.html' %
- (CFG_CACHEDIR, journal_name, issue, category, recid))
+ os.remove('%s/webjournal/%s/%s/%s/article_%s_%s_fr.html' %
+ (CFG_CACHEDIR, journal_name, year, issue_number, category, recid))
except:
pass
# delete the index page for the category
try:
- os.remove('%s/webjournal/%s/%s_index_%s_en.html'
- % (CFG_CACHEDIR, journal_name, issue, category))
+ os.remove('%s/webjournal/%s/%s/%s/index_%s_en.html'
+ % (CFG_CACHEDIR, journal_name, year, issue_number, category))
except:
pass
try:
- os.remove('%s/webjournal/%s/%s_index_%s_fr.html'
- % (CFG_CACHEDIR, journal_name, issue, category))
+ os.remove('%s/webjournal/%s/%s/%s/index_%s_fr.html'
+ % (CFG_CACHEDIR, journal_name, year, issue_number, category))
except:
pass
try:
path = get_journal_article_cache_path(journal_name, issue)
os.remove(path)
except:
pass
return True
def clear_cache_for_issue(journal_name, issue):
"""
clears the cache of a whole issue.
"""
issue = issue.replace("/", "_")
- cache_path_dir = os.path.abspath('%s/webjournal/%s' % \
- (CFG_CACHEDIR, journal_name))
+ issue_number, year = issue.split("_", 1)
+
+ cache_path_dir = os.path.abspath('%s/webjournal/%s/%s/%s/' % \
+ (CFG_CACHEDIR, journal_name,
+ year, issue_number))
if not cache_path_dir.startswith(CFG_CACHEDIR + '/webjournal'):
# Mmh, not accessing correct path. Stop deleting cache
return False
all_cached_files = os.listdir(cache_path_dir)
- non_deleted = []
for cached_file in all_cached_files:
- if cached_file.startswith(issue.replace('/', '_')):
- try:
- os.remove(cache_path_dir + '/' + cached_file)
- except:
- return False
- else:
- non_deleted.append(cached_file)
+ try:
+ os.remove(cache_path_dir + '/' + cached_file)
+ except:
+ return False
return True
######################### CERN SPECIFIC FUNCTIONS #################
def get_recid_from_legacy_number(issue_number, category, number):
"""
Returns the recid based on the issue number, category and
'number'.
This is used to support URLs using the now deprecated 'number'
argument. The function tries to reproduce the behaviour of the
old way of doing, even keeping some of its 'problems' (so that we
reach the same article as before with a given number)..
Returns the recid as int, or -1 if not found
"""
recids = []
if issue_number[0] == "0":
alternative_issue_number = issue_number[1:]
recids = list(search_pattern(p='65017a:"%s" and 773__n:%s' %
(category, issue_number)))
recids.extend(list(search_pattern(p='65017a:"%s" and 773__n:%s' %
(category, alternative_issue_number))))
else:
recids = list(search_pattern(p='65017:"%s" and 773__n:%s' %
(category, issue_number)))
# Now must order the records and pick the one at index 'number'.
# But we have to take into account that there can be multiple
# records at position 1, and that these additional records should
# be numbered with negative numbers:
# 1, 1, 1, 2, 3 -> 1, -1, -2, 2, 3...
negative_index_records = {}
positive_index_records = {}
# Fill in 'negative_index_records' and 'positive_index_records'
# lists with the following loop
for recid in recids:
bfo = BibFormatObject(recid)
order = [subfield['c'] for subfield in bfo.fields('773__') if \
issue_number in subfield.get('n', '')]
if len(order) > 0:
# If several orders are defined for the same article and
# the same issue, keep the first one
order = order[0]
if order.isdigit():
# Order must be an int. Otherwise skip
order = int(order)
if order == 1 and positive_index_records.has_key(1):
# This is then a negative number for this record
index = (len(negative_index_records.keys()) > 0 and \
min(negative_index_records.keys()) -1) or 0
negative_index_records[index] = recid
else:
# Positive number for this record
if not positive_index_records.has_key(order):
positive_index_records[order] = recid
else:
# We make the assumption that we cannot have
# twice the same position for two
# articles. Previous WebJournal module was not
# clear about that. Just drop this record
# (better than crashing or looping forever..)
pass
recid_to_return = -1
# Ok, we can finally pick the recid corresponding to 'number'
if number <= 0:
negative_indexes = negative_index_records.keys()
negative_indexes.sort()
negative_indexes.reverse()
if len(negative_indexes) > abs(number):
recid_to_return = negative_index_records[negative_indexes[abs(number)]]
else:
if positive_index_records.has_key(number):
recid_to_return = positive_index_records[number]
return recid_to_return
def is_recid_in_released_issue(recid):
"""
Returns True if recid is part of the latest issue of the given
journal.
WARNING: the function does not check that the article does not
belong to the draft collection of the record. This is wanted, in
order to workaround the time needed for a record to go from the
draft collection to the final collection
"""
bfo = BibFormatObject(recid)
journal_name = ''
journal_names = [journal_name for journal_name in bfo.fields('773__t') if journal_name]
if journal_names:
journal_name = journal_names[0]
else:
return False
existing_journal_names = [o['journal_name'] for o in get_journals_ids_and_names()]
if not journal_name in existing_journal_names:
# Try to remove whitespace
journal_name = journal_name.replace(' ', '')
if not journal_name in existing_journal_names:
# Journal name unknown from WebJournal
return False
config_strings = get_xml_from_config(["draft_image_access_policy"], journal_name)
if config_strings['draft_image_access_policy'] and \
config_strings['draft_image_access_policy'][0] != 'allow':
# The journal does not want to optimize access to images
return False
article_issues = bfo.fields('773__n')
current_issue = get_current_issue(CFG_SITE_LANG, journal_name)
for article_issue in article_issues:
# Check each issue until a released one is found
if get_release_datetime(article_issue, journal_name):
# Release date exists, issue has been released
return True
else:
# Unreleased issue. Do we still allow based on journal config?
unreleased_issues_mode = get_unreleased_issue_hiding_mode(journal_name)
if (unreleased_issues_mode == 'none' or \
(unreleased_issues_mode == 'future' and \
not issue_is_later_than(article_issue, current_issue))):
return True
return False
diff --git a/modules/webjournal/lib/widgets/bfe_webjournal_widget_whatsNew.py b/modules/webjournal/lib/widgets/bfe_webjournal_widget_whatsNew.py
index 5ce9efad5..0551bc0be 100644
--- a/modules/webjournal/lib/widgets/bfe_webjournal_widget_whatsNew.py
+++ b/modules/webjournal/lib/widgets/bfe_webjournal_widget_whatsNew.py
@@ -1,289 +1,299 @@
# -*- coding: utf-8 -*-
## $Id: bfe_webjournal_widget_whatsNew.py,v 1.24 2009/01/27 07:25:12 jerome Exp $
##
## This file is part of Invenio.
## Copyright (C) 2009, 2010, 2011 CERN.
##
## Invenio is free software; you can redistribute it and/or
## modify it under the terms of the GNU General Public License as
## published by the Free Software Foundation; either version 2 of the
## License, or (at your option) any later version.
##
## Invenio is distributed in the hope that it will be useful, but
## WITHOUT ANY WARRANTY; without even the implied warranty of
## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
## General Public License for more details.
##
## You should have received a copy of the GNU General Public License
## along with Invenio; if not, write to the Free Software Foundation, Inc.,
## 59 Temple Place, Suite 330, Boston, MA 02111-1307, USA.
"""
WebJournal widget - Display the index of the lastest articles,
including 'breaking news'.
"""
import time
import os
from invenio.search_engine import search_pattern, record_exists
from invenio.bibformat_engine import BibFormatObject
from invenio.config import \
CFG_SITE_URL, \
CFG_CACHEDIR, \
CFG_ACCESS_CONTROL_LEVEL_SITE
from invenio.webjournal_utils import \
parse_url_string, \
make_journal_url, \
get_journal_info_path, \
get_journal_categories, \
get_journal_articles, \
get_current_issue
from invenio.messages import gettext_set_language
+from invenio.errorlib import register_exception
def format_element(bfo, latest_issue_only='yes', newest_articles_only='yes',
link_category_headers='yes', display_categories='', hide_when_only_new_records="no"):
"""
Display the index to the newest articles (of the latest issue, or of the displayed issue)
@param latest_issue_only: if 'yes', always display articles of the latest issue, even if viewing a past issue
@param newest_articles_only: only display new articles, not those that also appeared in previous issues
@param link_category_headers: if yes, category headers link to index page of that category
@param display_categories: comma-separated list of categories to display. If none, display all
@param hide_when_only_new_records: if 'yes' display new articles only if old articles exist in this issue
"""
args = parse_url_string(bfo.user_info['uri'])
journal_name = args["journal_name"]
ln = bfo.lang
_ = gettext_set_language(ln)
if latest_issue_only.lower() == 'yes':
issue_number = get_current_issue(bfo.lang, journal_name)
else:
issue_number = args["issue"]
# Try to get HTML from cache
if args['verbose'] == 0:
cached_html = _get_whatsNew_from_cache(journal_name, issue_number, ln)
if cached_html:
return cached_html
# No cache? Build from scratch
# 1. Get the articles
journal_categories = get_journal_categories(journal_name,
issue_number)
if display_categories:
display_categories = display_categories.lower().split(',')
journal_categories = [category for category in journal_categories \
if category.lower() in display_categories]
whats_new_articles = {}
for category in journal_categories:
whats_new_articles[category] = get_journal_articles(journal_name,
issue_number,
category,
newest_only=newest_articles_only.lower() == 'yes')
# Do we want to display new articles only if they have been added
# to an issue that contains non-new records?
if hide_when_only_new_records.lower() == "yes":
# First gather all articles in this issue
all_whats_new_articles = {}
for category in journal_categories:
all_whats_new_articles[category] = get_journal_articles(journal_name,
issue_number,
category,
newest_first=True,
newest_only=False)
# Then check if we have some articles at position > -1
has_old_articles = False
for articles in all_whats_new_articles.values():
if len([order for order in articles.keys() if order > -1]) > 0:
has_old_articles = True
break
if not has_old_articles:
# We don't have old articles? Thend don't consider any
for category in journal_categories:
whats_new_articles[category] = {}
# 2. Build the HTML
html_out = _get_breaking_news(ln, journal_name)
for category in journal_categories:
articles_in_category = whats_new_articles[category]
html_articles_in_category = ""
# Generate the list of articles in this category
order_numbers = articles_in_category.keys()
order_numbers.sort()
for order in order_numbers:
articles = articles_in_category[order]
for recid in articles:
link = make_journal_url(bfo.user_info['uri'], {'journal_name': journal_name,
'issue_number': issue_number.split('/')[0],
'issue_year': issue_number.split('/')[1],
'category': category,
'recid': recid,
'ln': bfo.lang})
temp_rec = BibFormatObject(recid)
if ln == 'fr':
try:
title = temp_rec.fields('246_1a')[0]
except:
try:
title = temp_rec.field('245__a')
except:
continue
else:
try:
title = temp_rec.field('245__a')
except:
continue
try:
html_articles_in_category += '%s' % \
(link, title)
except:
pass
if html_articles_in_category:
# Good, we found some new articles for this category.
# Then insert the genereated results into a larger list
# with category as "parent".
html_out += ''
if link_category_headers.lower() == 'yes':
html_out += '%s' % _(category)
else:
html_out += '%s' % _(category)
html_out += ''
html_out += html_articles_in_category
html_out += '
'
if not html_out:
html_out = '' + _('There are no new articles for the moment') + ''
else:
html_out = ''
if args['verbose'] == 0:
cache_whatsNew(html_out, journal_name, issue_number, ln)
return html_out
def _get_breaking_news(lang, journal_name):
"""
Gets the 'Breaking News' articles that are currently active according to
start and end dates.
"""
# CERN Bulletin only
if not journal_name.lower() == 'cernbulletin':
return ''
# Look for active breaking news
breaking_news_recids = [recid for recid in search_pattern(p='980__a:BULLETINBREAKING') \
if record_exists(recid) == 1]
today = time.mktime(time.localtime())
breaking_news = ""
for recid in breaking_news_recids:
temp_rec = BibFormatObject(recid)
try:
end_date = time.mktime(time.strptime(temp_rec.field("925__b"),
"%m/%d/%Y"))
except:
end_date = time.mktime(time.strptime("01/01/1970", "%m/%d/%Y"))
if end_date < today:
continue
try:
start_date = time.mktime(time.strptime(temp_rec.field("925__a"),
"%m/%d/%Y"))
except:
start_date = time.mktime(time.strptime("01/01/2050", "%m/%d/%Y"))
if start_date > today:
continue
publish_date = temp_rec.field("269__c")
if lang == 'fr':
title = temp_rec.field("246_1a")
else:
title = temp_rec.field("245__a")
breaking_news += '''
''' % ("", publish_date, CFG_SITE_URL, journal_name, recid, lang, title)
if breaking_news:
breaking_news = '%s' % breaking_news
return breaking_news
def _get_whatsNew_from_cache(journal_name, issue, ln):
"""
Try to get the "whats new" box from cache.
"""
- cache_path = os.path.abspath('%s/webjournal/%s/%s_whatsNew_%s.html' % \
+ issue = issue.replace("/", "_")
+ issue_number, year = issue.split("_", 1)
+ cache_path = os.path.abspath('%s/webjournal/%s/%s/%s/whatsNew_%s.html' % \
(CFG_CACHEDIR,
journal_name,
- issue.replace('/','_'),
+ year, issue_number,
ln))
if not cache_path.startswith(CFG_CACHEDIR + '/webjournal'):
# Make sure we are reading from correct directory (you
# know, in case there are '../../' inside journal name..)
return False
try:
last_update = os.path.getctime(cache_path)
except:
return False
try:
# Get last journal update, based on journal info file last
# modification time
journal_info_path = get_journal_info_path(journal_name)
last_journal_update = os.path.getctime(journal_info_path)
except:
return False
now = time.time()
if ((last_update + 30*60) < now) or \
(last_journal_update > last_update):
# invalidate after 30 minutes or if last journal release is
# newer than cache
return False
try:
cached_file = open(cache_path).read()
except:
return False
return cached_file
def cache_whatsNew(html, journal_name, issue, ln):
"""
caches the whats new box for 30 minutes.
"""
if not CFG_ACCESS_CONTROL_LEVEL_SITE == 2:
- cache_path = os.path.abspath('%s/webjournal/%s/%s_whatsNew_%s.html' % \
+ issue = issue.replace("/", "_")
+ issue_number, year = issue.split("_", 1)
+ cache_path = os.path.abspath('%s/webjournal/%s/%s/%s/whatsNew_%s.html' % \
(CFG_CACHEDIR,
journal_name,
- issue.replace('/','_'),
+ year, issue_number,
ln))
if cache_path.startswith(CFG_CACHEDIR + '/webjournal'):
# Do not try to cache if the journal name led us to some
# other directory ('../../' inside journal name for
# example)
- cache_dir = CFG_CACHEDIR + '/webjournal/' + journal_name
- if not os.path.isdir(cache_dir):
- os.makedirs(cache_dir)
- cache_file = file(cache_path, "w")
- cache_file.write(html)
- cache_file.close()
+ try:
+ cache_dir = os.path.dirname(cache_path)
+ if not os.path.isdir(cache_dir):
+ os.makedirs(cache_dir)
+ cache_file = file(cache_path, "w")
+ cache_file.write(html)
+ cache_file.close()
+ except Exception:
+ register_exception(req=None,
+ prefix="Could not store 'Whats new' section",
+ alert_admin=True)
def escape_values(bfo):
"""
Called by BibFormat in order to check if output of this element
should be escaped.
"""
return 0
_ = gettext_set_language('en')
dummy = _("What's new")
diff --git a/modules/websession/lib/inveniogc.py b/modules/websession/lib/inveniogc.py
index c6d8ac678..b46d39fa2 100644
--- a/modules/websession/lib/inveniogc.py
+++ b/modules/websession/lib/inveniogc.py
@@ -1,631 +1,633 @@
## -*- mode: python; coding: utf-8; -*-
##
## This file is part of Invenio.
## Copyright (C) 2007, 2008, 2010, 2011, 2012 CERN.
##
## Invenio is free software; you can redistribute it and/or
## modify it under the terms of the GNU General Public License as
## published by the Free Software Foundation; either version 2 of the
## License, or (at your option) any later version.
##
## Invenio is distributed in the hope that it will be useful, but
## WITHOUT ANY WARRANTY; without even the implied warranty of
## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
## General Public License for more details.
##
## You should have received a copy of the GNU General Public License
## along with Invenio; if not, write to the Free Software Foundation, Inc.,
## 59 Temple Place, Suite 330, Boston, MA 02111-1307, USA.
"""
Invenio garbage collector.
"""
__revision__ = "$Id$"
import sys
import datetime
import time
import os
try:
from invenio.dbquery import run_sql, wash_table_column_name
from invenio.config import CFG_LOGDIR, CFG_TMPDIR, CFG_CACHEDIR, \
CFG_TMPSHAREDDIR, CFG_WEBSEARCH_RSS_TTL, CFG_PREFIX, \
CFG_WEBSESSION_NOT_CONFIRMED_EMAIL_ADDRESS_EXPIRE_IN_DAYS
from invenio.bibtask import task_init, task_set_option, task_get_option, \
write_message, write_messages
from invenio.access_control_mailcookie import mail_cookie_gc
from invenio.bibdocfile import BibDoc
from invenio.bibsched import gc_tasks
from invenio.websubmit_config import CFG_WEBSUBMIT_TMP_VIDEO_PREFIX
from invenio.dateutils import convert_datestruct_to_datetext
except ImportError, e:
print "Error: %s" % (e,)
sys.exit(1)
# configure variables
CFG_MYSQL_ARGUMENTLIST_SIZE = 100
# After how many days to remove obsolete log/err files
CFG_MAX_ATIME_RM_LOG = 28
# After how many days to zip obsolete log/err files
CFG_MAX_ATIME_ZIP_LOG = 7
# After how many days to remove obsolete bibreformat fmt xml files
CFG_MAX_ATIME_RM_FMT = 28
# After how many days to zip obsolete bibreformat fmt xml files
CFG_MAX_ATIME_ZIP_FMT = 7
# After how many days to remove obsolete oaiharvest fmt xml files
CFG_MAX_ATIME_RM_OAI = 14
# After how many days to zip obsolete oaiharvest fmt xml files
CFG_MAX_ATIME_ZIP_OAI = 3
# After how many days to remove deleted bibdocs
CFG_DELETED_BIBDOC_MAXLIFE = 365 * 10
# After how many day to remove old cached webjournal files
CFG_WEBJOURNAL_TTL = 7
# After how many days to zip obsolete bibsword xml log files
CFG_MAX_ATIME_ZIP_BIBSWORD = 7
# After how many days to remove obsolete bibsword xml log files
CFG_MAX_ATIME_RM_BIBSWORD = 28
# After how many days to remove temporary video uploads
CFG_MAX_ATIME_WEBSUBMIT_TMP_VIDEO = 3
# After how many days to remove obsolete refextract xml output files
CFG_MAX_ATIME_RM_REFEXTRACT = 28
# After how many days to remove obsolete bibdocfiles temporary files
CFG_MAX_ATIME_RM_BIBDOC = 4
# After how many days to remove obsolete WebSubmit-created temporary
# icon files
CFG_MAX_ATIME_RM_ICON = 7
# After how many days to remove obsolete WebSubmit-created temporary
# stamp files
CFG_MAX_ATIME_RM_STAMP = 7
# After how many days to remove obsolete WebJournal-created update XML
CFG_MAX_ATIME_RM_WEBJOURNAL_XML = 7
# After how many days to remove obsolete temporary files attached with
# the CKEditor in WebSubmit context?
CFG_MAX_ATIME_RM_WEBSUBMIT_CKEDITOR_FILE = 28
# After how many days to remove obsolete temporary files related to BibEdit
# cache
CFG_MAX_ATIME_BIBEDIT_TMP = 3
def gc_exec_command(command):
""" Exec the command logging in appropriate way its output."""
write_message(' %s' % command, verbose=9)
(dummy, output, errors) = os.popen3(command)
write_messages(errors.read())
write_messages(output.read())
def clean_logs():
""" Clean the logs from obsolete files. """
write_message("""CLEANING OF LOG FILES STARTED""")
write_message("- deleting/gzipping bibsched empty/old err/log "
"BibSched files")
vstr = task_get_option('verbose') > 1 and '-v' or ''
gc_exec_command('find %s -name "bibsched_task_*"'
' -size 0c -exec rm %s -f {} \;' \
% (CFG_LOGDIR, vstr))
gc_exec_command('find %s -name "bibsched_task_*"'
' -atime +%s -exec rm %s -f {} \;' \
% (CFG_LOGDIR, CFG_MAX_ATIME_RM_LOG, vstr))
gc_exec_command('find %s -name "bibsched_task_*"'
' -atime +%s -exec gzip %s -9 {} \;' \
% (CFG_LOGDIR, CFG_MAX_ATIME_ZIP_LOG, vstr))
write_message("""CLEANING OF LOG FILES FINISHED""")
def clean_tempfiles():
""" Clean old temporary files. """
write_message("""CLEANING OF TMP FILES STARTED""")
write_message("- deleting/gzipping temporary empty/old "
"BibReformat xml files")
vstr = task_get_option('verbose') > 1 and '-v' or ''
gc_exec_command('find %s %s -name "rec_fmt_*"'
' -size 0c -exec rm %s -f {} \;' \
% (CFG_TMPDIR, CFG_TMPSHAREDDIR, vstr))
gc_exec_command('find %s %s -name "rec_fmt_*"'
' -atime +%s -exec rm %s -f {} \;' \
% (CFG_TMPDIR, CFG_TMPSHAREDDIR, \
CFG_MAX_ATIME_RM_FMT, vstr))
gc_exec_command('find %s %s -name "rec_fmt_*"'
' -atime +%s -exec gzip %s -9 {} \;' \
% (CFG_TMPDIR, CFG_TMPSHAREDDIR, \
CFG_MAX_ATIME_ZIP_FMT, vstr))
write_message("- deleting/gzipping temporary old "
"OAIHarvest xml files")
gc_exec_command('find %s %s -name "oaiharvestadmin.*"'
' -exec rm %s -f {} \;' \
% (CFG_TMPDIR, CFG_TMPSHAREDDIR, vstr))
gc_exec_command('find %s %s -name "bibconvertrun.*"'
' -exec rm %s -f {} \;' \
% (CFG_TMPDIR, CFG_TMPSHAREDDIR, vstr))
# Using mtime and -r here to include directories.
gc_exec_command('find %s %s -name "oaiharvest*"'
' -mtime +%s -exec gzip %s -9 {} \;' \
% (CFG_TMPDIR, CFG_TMPSHAREDDIR, \
CFG_MAX_ATIME_ZIP_OAI, vstr))
gc_exec_command('find %s %s -name "oaiharvest*"'
' -mtime +%s -exec rm %s -rf {} \;' \
% (CFG_TMPDIR, CFG_TMPSHAREDDIR, \
CFG_MAX_ATIME_RM_OAI, vstr))
gc_exec_command('find %s %s -name "oai_archive*"'
' -mtime +%s -exec rm %s -rf {} \;' \
% (CFG_TMPDIR, CFG_TMPSHAREDDIR, \
CFG_MAX_ATIME_RM_OAI, vstr))
write_message("- deleting/gzipping temporary old "
"BibSword files")
gc_exec_command('find %s %s -name "bibsword_*"'
' -atime +%s -exec rm %s -f {} \;' \
% (CFG_TMPDIR, CFG_TMPSHAREDDIR, \
CFG_MAX_ATIME_RM_BIBSWORD, vstr))
gc_exec_command('find %s %s -name "bibsword_*"'
' -atime +%s -exec gzip %s -9 {} \;' \
% (CFG_TMPDIR, CFG_TMPSHAREDDIR, \
CFG_MAX_ATIME_ZIP_BIBSWORD, vstr))
# DELETE ALL FILES CREATED DURING VIDEO SUBMISSION
write_message("- deleting old video submissions")
gc_exec_command('find %s -name %s* -atime +%s -exec rm %s -f {} \;' \
% (CFG_TMPSHAREDDIR, CFG_WEBSUBMIT_TMP_VIDEO_PREFIX,
CFG_MAX_ATIME_WEBSUBMIT_TMP_VIDEO, vstr))
write_message("- deleting temporary old "
"RefExtract files")
gc_exec_command('find %s %s -name "refextract*"'
' -atime +%s -exec rm %s -f {} \;' \
% (CFG_TMPDIR, CFG_TMPSHAREDDIR,
CFG_MAX_ATIME_RM_REFEXTRACT, vstr))
write_message("- deleting temporary old bibdocfiles")
gc_exec_command('find %s %s -name "bibdocfile_*"'
' -atime +%s -exec rm %s -f {} \;' \
% (CFG_TMPDIR, CFG_TMPSHAREDDIR, \
CFG_MAX_ATIME_RM_BIBDOC, vstr))
write_message("- deleting old temporary WebSubmit icons")
gc_exec_command('find %s %s -name "websubmit_icon_creator_*"'
' -atime +%s -exec rm %s -f {} \;' \
% (CFG_TMPDIR, CFG_TMPSHAREDDIR, \
CFG_MAX_ATIME_RM_ICON, vstr))
write_message("- deleting old temporary WebSubmit stamps")
gc_exec_command('find %s %s -name "websubmit_file_stamper_*"'
' -atime +%s -exec rm %s -f {} \;' \
% (CFG_TMPDIR, CFG_TMPSHAREDDIR, \
CFG_MAX_ATIME_RM_STAMP, vstr))
write_message("- deleting old temporary WebJournal XML files")
gc_exec_command('find %s %s -name "webjournal_publish_*"'
' -atime +%s -exec rm %s -f {} \;' \
% (CFG_TMPDIR, CFG_TMPSHAREDDIR, \
CFG_MAX_ATIME_RM_WEBJOURNAL_XML, vstr))
write_message("- deleting old temporary files attached with CKEditor")
gc_exec_command('find %s/var/tmp/attachfile/ '
' -atime +%s -exec rm %s -f {} \;' \
% (CFG_PREFIX, CFG_MAX_ATIME_RM_WEBSUBMIT_CKEDITOR_FILE,
vstr))
write_message("- deleting old temporary files attached with BibEdit")
gc_exec_command('find %s -name "bibedit*.tmp"'
' -atime +%s -exec rm %s -f {} \;' \
% (CFG_TMPSHAREDDIR + '/bibedit-cache/', CFG_MAX_ATIME_BIBEDIT_TMP,
vstr))
write_message("""CLEANING OF TMP FILES FINISHED""")
def clean_cache():
"""Clean the cache for expired and old files."""
write_message("""CLEANING OF OLD CACHED RSS REQUEST STARTED""")
rss_cache_dir = "%s/rss/" % CFG_CACHEDIR
try:
filenames = os.listdir(rss_cache_dir)
except OSError:
filenames = []
count = 0
for filename in filenames:
filename = os.path.join(rss_cache_dir, filename)
last_update_time = datetime.datetime.fromtimestamp(os.stat(os.path.abspath(filename)).st_mtime)
if not (datetime.datetime.now() < last_update_time + datetime.timedelta(minutes=CFG_WEBSEARCH_RSS_TTL)):
try:
os.remove(filename)
count += 1
except OSError, e:
write_message("Error: %s" % e)
write_message("""%s rss cache file pruned out of %s.""" % (count, len(filenames)))
write_message("""CLEANING OF OLD CACHED RSS REQUEST FINISHED""")
write_message("""CLEANING OF OLD CACHED WEBJOURNAL FILES STARTED""")
webjournal_cache_dir = "%s/webjournal/" % CFG_CACHEDIR
+ filenames = []
try:
- filenames = os.listdir(webjournal_cache_dir)
+ for root, dummy, files in os.walk(webjournal_cache_dir):
+ filenames.extend(os.path.join(root, filename) for filename in files)
except OSError:
- filenames = []
+ pass
count = 0
for filename in filenames:
filename = os.path.join(webjournal_cache_dir, filename)
last_update_time = datetime.datetime.fromtimestamp(os.stat(os.path.abspath(filename)).st_mtime)
if not (datetime.datetime.now() < last_update_time + datetime.timedelta(days=CFG_WEBJOURNAL_TTL)):
try:
os.remove(filename)
count += 1
except OSError, e:
write_message("Error: %s" % e)
write_message("""%s webjournal cache file pruned out of %s.""" % (count, len(filenames)))
write_message("""CLEANING OF OLD CACHED WEBJOURNAL FILES FINISHED""")
def clean_bibxxx():
"""
Clean unreferenced bibliographic values from bibXXx tables.
This is useful to prettify browse results, as it removes
old, no longer used values.
WARNING: this function must be run only when no bibupload is
running and/or sleeping.
"""
write_message("""CLEANING OF UNREFERENCED bibXXx VALUES STARTED""")
for xx in range(0, 100):
bibxxx = 'bib%02dx' % xx
bibrec_bibxxx = 'bibrec_bib%02dx' % xx
if task_get_option('verbose') >= 9:
num_unref_values = run_sql("""SELECT COUNT(*) FROM %(bibxxx)s
LEFT JOIN %(bibrec_bibxxx)s
ON %(bibxxx)s.id=%(bibrec_bibxxx)s.id_bibxxx
WHERE %(bibrec_bibxxx)s.id_bibrec IS NULL""" % \
{'bibxxx': bibxxx,
'bibrec_bibxxx': bibrec_bibxxx, })[0][0]
run_sql("""DELETE %(bibxxx)s FROM %(bibxxx)s
LEFT JOIN %(bibrec_bibxxx)s
ON %(bibxxx)s.id=%(bibrec_bibxxx)s.id_bibxxx
WHERE %(bibrec_bibxxx)s.id_bibrec IS NULL""" % \
{'bibxxx': bibxxx,
'bibrec_bibxxx': bibrec_bibxxx, })
if task_get_option('verbose') >= 9:
write_message(""" - %d unreferenced %s values cleaned""" % \
(num_unref_values, bibxxx))
write_message("""CLEANING OF UNREFERENCED bibXXx VALUES FINISHED""")
def clean_documents():
"""Delete all the bibdocs that have been set as deleted and have not been
modified since CFG_DELETED_BIBDOC_MAXLIFE days. Returns the number of
bibdocs involved."""
write_message("""CLEANING OF OBSOLETED DELETED DOCUMENTS STARTED""")
write_message("select id from bibdoc where status='DELETED' and NOW()>ADDTIME(modification_date, '%s 0:0:0')" % CFG_DELETED_BIBDOC_MAXLIFE, verbose=9)
records = run_sql("select id from bibdoc where status='DELETED' and NOW()>ADDTIME(modification_date, '%s 0:0:0')", (CFG_DELETED_BIBDOC_MAXLIFE,))
for record in records:
bibdoc = BibDoc.create_instance(record[0])
bibdoc.expunge()
write_message("DELETE FROM bibdoc WHERE id=%i" % int(record[0]), verbose=9)
run_sql("DELETE FROM bibdoc WHERE id=%s", (record[0],))
write_message("""%s obsoleted deleted documents cleaned""" % len(records))
write_message("""CLEANING OF OBSOLETED DELETED DOCUMENTS FINISHED""")
return len(records)
def check_tables():
"""
Check all DB tables. Useful to run from time to time when the
site is idle, say once a month during a weekend night.
FIXME: should produce useful output about outcome.
"""
res = run_sql("SHOW TABLES")
for row in res:
table_name = row[0]
write_message("checking table %s" % table_name)
run_sql("CHECK TABLE %s" % wash_table_column_name(table_name)) # kwalitee: disable=sql
def optimise_tables():
"""
Optimise all DB tables to defragment them in order to increase DB
performance. Useful to run from time to time when the site is
idle, say once a month during a weekend night.
FIXME: should produce useful output about outcome.
"""
res = run_sql("SHOW TABLES")
for row in res:
table_name = row[0]
write_message("optimising table %s" % table_name)
run_sql("OPTIMIZE TABLE %s" % wash_table_column_name(table_name)) # kwalitee: disable=sql
def guest_user_garbage_collector():
"""Session Garbage Collector
program flow/tasks:
1: delete expired sessions
1b:delete guest users without session
2: delete queries not attached to any user
3: delete baskets not attached to any user
4: delete alerts not attached to any user
5: delete expired mailcookies
5b: delete expired not confirmed email address
6: delete expired roles memberships
verbose - level of program output.
0 - nothing
1 - default
9 - max, debug"""
# dictionary used to keep track of number of deleted entries
delcount = {'session': 0,
'user': 0,
'user_query': 0,
'query': 0,
'bskBASKET': 0,
'user_bskBASKET': 0,
'bskREC': 0,
'bskRECORDCOMMENT': 0,
'bskEXTREC': 0,
'bskEXTFMT': 0,
'user_query_basket': 0,
'mail_cookie': 0,
'email_addresses': 0,
'role_membership' : 0}
write_message("CLEANING OF GUEST SESSIONS STARTED")
# 1 - DELETE EXPIRED SESSIONS
write_message("- deleting expired sessions")
timelimit = convert_datestruct_to_datetext(time.gmtime())
write_message(" DELETE FROM session WHERE"
" session_expiry < %s \n" % (timelimit,), verbose=9)
delcount['session'] += run_sql("DELETE FROM session WHERE"
" session_expiry < %s """, (timelimit,))
# 1b - DELETE GUEST USERS WITHOUT SESSION
write_message("- deleting guest users without session")
# get uids
write_message(""" SELECT u.id\n FROM user AS u LEFT JOIN session AS s\n ON u.id = s.uid\n WHERE s.uid IS NULL AND u.email = ''""", verbose=9)
result = run_sql("""SELECT u.id
FROM user AS u LEFT JOIN session AS s
ON u.id = s.uid
WHERE s.uid IS NULL AND u.email = ''""")
write_message(result, verbose=9)
if result:
# work on slices of result list in case of big result
for i in range(0, len(result), CFG_MYSQL_ARGUMENTLIST_SIZE):
# create string of uids
uidstr = ''
for (id_user,) in result[i:i + CFG_MYSQL_ARGUMENTLIST_SIZE]:
if uidstr: uidstr += ','
uidstr += "%s" % (id_user,)
# delete users
write_message(" DELETE FROM user WHERE"
" id IN (TRAVERSE LAST RESULT) AND email = '' \n", verbose=9)
delcount['user'] += run_sql("DELETE FROM user WHERE"
" id IN (%s) AND email = ''" % (uidstr,))
# 2 - DELETE QUERIES NOT ATTACHED TO ANY USER
# first step, delete from user_query
write_message("- deleting user_queries referencing"
" non-existent users")
# find user_queries referencing non-existent users
write_message(" SELECT DISTINCT uq.id_user\n"
" FROM user_query AS uq LEFT JOIN user AS u\n"
" ON uq.id_user = u.id\n WHERE u.id IS NULL", verbose=9)
result = run_sql("""SELECT DISTINCT uq.id_user
FROM user_query AS uq LEFT JOIN user AS u
ON uq.id_user = u.id
WHERE u.id IS NULL""")
write_message(result, verbose=9)
# delete in user_query one by one
write_message(" DELETE FROM user_query WHERE"
" id_user = 'TRAVERSE LAST RESULT' \n", verbose=9)
for (id_user,) in result:
delcount['user_query'] += run_sql("""DELETE FROM user_query
WHERE id_user = %s""" % (id_user,))
# delete the actual queries
write_message("- deleting queries not attached to any user")
# select queries that must be deleted
write_message(""" SELECT DISTINCT q.id\n FROM query AS q LEFT JOIN user_query AS uq\n ON uq.id_query = q.id\n WHERE uq.id_query IS NULL AND\n q.type <> 'p' """, verbose=9)
result = run_sql("""SELECT DISTINCT q.id
FROM query AS q LEFT JOIN user_query AS uq
ON uq.id_query = q.id
WHERE uq.id_query IS NULL AND
q.type <> 'p'""")
write_message(result, verbose=9)
# delete queries one by one
write_message(""" DELETE FROM query WHERE id = 'TRAVERSE LAST RESULT \n""", verbose=9)
for (id_user,) in result:
delcount['query'] += run_sql("""DELETE FROM query WHERE id = %s""", (id_user,))
# 3 - DELETE BASKETS NOT OWNED BY ANY USER
write_message("- deleting baskets not owned by any user")
# select basket ids
write_message(""" SELECT ub.id_bskBASKET\n FROM user_bskBASKET AS ub LEFT JOIN user AS u\n ON u.id = ub.id_user\n WHERE u.id IS NULL""", verbose=9)
try:
result = run_sql("""SELECT ub.id_bskBASKET
FROM user_bskBASKET AS ub LEFT JOIN user AS u
ON u.id = ub.id_user
WHERE u.id IS NULL""")
except:
result = []
write_message(result, verbose=9)
# delete from user_basket and basket one by one
write_message(""" DELETE FROM user_bskBASKET WHERE id_bskBASKET = 'TRAVERSE LAST RESULT' """, verbose=9)
write_message(""" DELETE FROM bskBASKET WHERE id = 'TRAVERSE LAST RESULT' """, verbose=9)
write_message(""" DELETE FROM bskREC WHERE id_bskBASKET = 'TRAVERSE LAST RESULT'""", verbose=9)
write_message(""" DELETE FROM bskRECORDCOMMENT WHERE id_bskBASKET = 'TRAVERSE LAST RESULT' \n""", verbose=9)
for (id_basket,) in result:
delcount['user_bskBASKET'] += run_sql("""DELETE FROM user_bskBASKET WHERE id_bskBASKET = %s""", (id_basket,))
delcount['bskBASKET'] += run_sql("""DELETE FROM bskBASKET WHERE id = %s""", (id_basket,))
delcount['bskREC'] += run_sql("""DELETE FROM bskREC WHERE id_bskBASKET = %s""", (id_basket,))
delcount['bskRECORDCOMMENT'] += run_sql("""DELETE FROM bskRECORDCOMMENT WHERE id_bskBASKET = %s""", (id_basket,))
write_message(""" SELECT DISTINCT ext.id, rec.id_bibrec_or_bskEXTREC FROM bskEXTREC AS ext \nLEFT JOIN bskREC AS rec ON ext.id=-rec.id_bibrec_or_bskEXTREC WHERE id_bibrec_or_bskEXTREC is NULL""", verbose=9)
try:
result = run_sql("""SELECT DISTINCT ext.id FROM bskEXTREC AS ext
LEFT JOIN bskREC AS rec ON ext.id=-rec.id_bibrec_or_bskEXTREC
WHERE id_bibrec_or_bskEXTREC is NULL""")
except:
result = []
write_message(result, verbose=9)
write_message(""" DELETE FROM bskEXTREC WHERE id = 'TRAVERSE LAST RESULT' """, verbose=9)
write_message(""" DELETE FROM bskEXTFMT WHERE id_bskEXTREC = 'TRAVERSE LAST RESULT' \n""", verbose=9)
for (id_basket,) in result:
delcount['bskEXTREC'] += run_sql("""DELETE FROM bskEXTREC WHERE id=%s""", (id_basket,))
delcount['bskEXTFMT'] += run_sql("""DELETE FROM bskEXTFMT WHERE id_bskEXTREC=%s""", (id_basket,))
# 4 - DELETE ALERTS NOT OWNED BY ANY USER
write_message('- deleting alerts not owned by any user')
# select user ids in uqb that reference non-existent users
write_message("""SELECT DISTINCT uqb.id_user FROM user_query_basket AS uqb LEFT JOIN user AS u ON uqb.id_user = u.id WHERE u.id IS NULL""", verbose=9)
result = run_sql("""SELECT DISTINCT uqb.id_user FROM user_query_basket AS uqb LEFT JOIN user AS u ON uqb.id_user = u.id WHERE u.id IS NULL""")
write_message(result, verbose=9)
# delete all these entries
for (id_user,) in result:
write_message("""DELETE FROM user_query_basket WHERE id_user = 'TRAVERSE LAST RESULT """, verbose=9)
delcount['user_query_basket'] += run_sql("""DELETE FROM user_query_basket WHERE id_user = %s """, (id_user,))
# 5 - delete expired mailcookies
write_message("""mail_cookie_gc()""", verbose=9)
delcount['mail_cookie'] = mail_cookie_gc()
## 5b - delete expired not confirmed email address
write_message("""DELETE FROM user WHERE note='2' AND NOW()>ADDTIME(last_login, '%s 0:0:0')""" % CFG_WEBSESSION_NOT_CONFIRMED_EMAIL_ADDRESS_EXPIRE_IN_DAYS, verbose=9)
delcount['email_addresses'] = run_sql("""DELETE FROM user WHERE note='2' AND NOW()>ADDTIME(last_login, '%s 0:0:0')""", (CFG_WEBSESSION_NOT_CONFIRMED_EMAIL_ADDRESS_EXPIRE_IN_DAYS,))
# 6 - delete expired roles memberships
write_message("""DELETE FROM user_accROLE WHERE expiration