diff --git a/.gitignore b/.gitignore index 7d7f3e181..ced0edc28 100644 --- a/.gitignore +++ b/.gitignore @@ -1,98 +1,99 @@ .version Makefile Makefile.in compile configure config.cache config.log config.status config.nice config.guess config.sub install-sh missing autom4te.cache aclocal.m4 TAGS invenio-autotools.conf po/POTFILES po/POTFILES-py po/POTFILES-webdoc po/stamp-po po/*.gmo po/*.mo po/*.sed *~ *.pyc *.clisp.mem *.cmucl.core *.sbcl.core *.fas *.fasl *.sse2f *.lib *.x86f modules/webauthorprofile/bin/webauthorprofile modules/bibauthorid/bin/bibauthorid modules/bibcirculation/bin/bibcircd modules/bibclassify/bin/bibclassify modules/bibconvert/bin/bibconvert modules/bibdocfile/bin/bibdocfile modules/bibedit/bin/bibedit modules/bibrecord/bin/textmarc2xmlmarc modules/bibrecord/bin/xmlmarc2textmarc modules/bibrecord/bin/xmlmarclint modules/docextract/bin/refextract modules/docextract/bin/docextract modules/bibencode/bin/bibencode modules/bibexport/bin/bibexport modules/bibformat/bin/bibreformat modules/oaiharvest/bin/oaiharvest modules/oairepository/bin/oairepositoryupdater modules/bibindex/bin/bibindex modules/bibindex/bin/bibstat modules/bibmatch/bin/bibmatch modules/bibrank/bin/bibrank modules/bibrank/bin/bibrankgkb modules/bibrank/etc/bibrankgkb.cfg modules/bibrank/etc/demo_jif.cfg modules/bibrank/etc/template_single_tag_rank_method.cfg modules/bibsched/bin/bibsched modules/bibsched/bin/bibtaskex modules/bibsched/bin/bibtasklet modules/bibsort/bin/bibsort modules/bibsword/bin/bibsword modules/bibupload/bin/batchuploader modules/bibupload/bin/bibupload modules/elmsubmit/bin/elmsubmit modules/elmsubmit/etc/elmsubmit.cfg modules/miscutil/bin/dbdump modules/miscutil/bin/dbexec modules/miscutil/bin/inveniocfg modules/miscutil/bin/inveniomanage modules/miscutil/bin/plotextractor modules/miscutil/etc/bash_completion.d/inveniocfg modules/miscutil/lib/build modules/webaccess/bin/authaction modules/webaccess/bin/webaccessadmin modules/webalert/bin/alertengine modules/webmessage/bin/webmessageadmin modules/websearch/bin/webcoll modules/websession/bin/inveniogc modules/webstat/bin/webstat modules/webstat/bin/webstatadmin modules/webstyle/bin/gotoadmin modules/webstyle/bin/webdoc modules/websubmit/bin/bibdocfile modules/websubmit/bin/inveniounoconv +modules/websubmit/bin/websubmitadmin modules/bibcirculation/bin/bibcircd tags config.status.lineno configure.lineno *.kdevelop *.kdevses .project .noseids .settings .pydevproject org.eclipse.core.resources.prefs diff --git a/AUTHORS b/AUTHORS index 9efae2f01..fe3125b4f 100644 --- a/AUTHORS +++ b/AUTHORS @@ -1,383 +1,387 @@ Invenio AUTHORS =============== Invenio is being co-developed by an international collaboration comprising institutes such as CERN, CfA, DESY, EPFL, FNAL, SLAC. The CERN development team currently consists of: - Jean-Yves Le Meur CERN Digital Library Services team leader. - Tibor Simko CERN Digital Library Technology team leader. Head Developer of Invenio. General system architecture, release management. WebSearch, BibIndex, BibSched, WebStat, WebStyle, WebSession, WebHelp, and more. - Jerome Caffaro BibFormat, redesign and rewrite in Python. BibConvert XML-oriented mode. OAI harvester improvements. Improvements to BibEdit. WebDoc tool. WebJournal refactoring and rewrite. WebComment rounds and threads. WebSubmit asynchronous upload support. Improvements to numerous modules. - Samuele Kaplun Authentication and user preferences rewrite and improvements. Firewall-like access control RBAC system. Fulltext file management rewrite and upload feature. Intbitset Python C extension for the indexer. Improvents to the task scheduler and session hander. Improvements to numerous modules. - Ludmila Marian Citerank family of ranking methods. Fixes to numerous modules. - - Chris Montarbaud - Multimedia and photo management. - - Jaime Garcia Llopis Improvements to the BibCirculation module. - Flavio Costa Contributions to the Italian translation. - Jiri Kuncar - Contributions to the Czech translation. + Fix for demo site installation. Contributions to the Czech + translation. - Esteban J. G. Gabancho Initial release of WebApiKey, enhancements for WebSubmit. - Lars Holm Nielsen Initial release of Invenio Upgrader and jsonutils; patches for pluginutils, - Patrick Glauner Cleanup of SQL queries for several modules. - Raquel Jimenez Encinar Errorlib refactoring, improvements to search UI, discussion tab, merged record redirect, adaptation to new web test framework. - - Vasanth Venkatraman - Improvements to BibUpload version treatment, monotask and - sequence tasks for BibSched. + - Grzegorz Szpura + Better browsing of fuzzy indexes. - Thorsten Schwander Improvements to dbdump. - Jan Aage Lavik Improvements to BibMatch with remote matching capabilities, improvements to plot extractor, improvements to harvesting and other small fixes. - Piotr Praczyk OAI harvesting from arXiv. Test harvesting interface, for OAIHarvest. Record comparison library functions, for BibRecord. Numerous improvements to BibEdit, e.g. holding pen, copy/paste, undo/redo. - Samuele Carli Initial implementation of BibAuthorID module, with Henning Weiler. Fixes for basket export facility and adding external items to baskets. - Alessio Deiana - Improvements to data cacher and cite summary. + Fix for BibFormat element initialisation. Improvements to data + cacher and cite summary. - - Daniel Stanculescu - Improvements to Unicode treatment for textutils. + - Wojciech Ziolek + Fixes for OAI holding pen facility, Strftime improvements for + dateutils. - Sebastian Witowski Improvements to multi-record editor. - Laura Rueda Mechanize compatibility for Invenio Connector. - Annette Holtkamp Updates to `Howto MARC' guide. - Jocelyne Jerdelet Updates to `Howto MARC' guide. The EPFL development team currently consists of: - Gregory Favre Rewrite of WebBasket. WebMessage. Improvements to WebComment. Other contributions and improvements. The SLAC development team currently consists of: - Mike Sullivan Improvements to author pages. - Eduardo Benavidez Improvements to BibCatalog. The Harvard-Smithsonian Center for Astrophysics development team currently consists of: - Alberto Accomazzi Team leader. - Giovanni Di Milia Recognition of /record/sysno URLs, ADS formatting. - Jay Luker Improvements to the emergency notification sending facility. - Roman Chyla WSGI handler accepts UTF-8 strings. -The Cornell development team currently consists of: +Many former team members (mostly CERN staff and fellows, technical +students, diploma students, summer students) contributed to the +project since 2002. In an approximately decreasing chronological +order: - - Simeon Warner - Team leader. + - Daniel Stanculescu + Improvements to Unicode treatment for textutils. + + - Vasanth Venkatraman + Improvements to BibUpload version treatment, monotask and + sequence tasks for BibSched. - Peter Halliday Configurable /record URL name space, improvements to dbquery. -Many former team members (mostly CERN staff and fellows, technical -students, diploma students, summer students) contributed to the -project since 2002. In an approximately decreasing chronological -order: + - Chris Montarbaud + Multimedia and photo management. - Joe Blaylock Rewrite of SPIRES query syntax parser, support for nested parenthesis for WebSearch, fuzzy author name tokenizer, enrichment of author pages with h-index. - Benoit Thiell Fixes for BibRecord library, detailed record links, improvements to code kwalitee in numerous modules. Improvements to BibClassify. - Nikola Yolov Improvements and refactoring of BibAuthorID, fixes for WebAuthorProfile. - Lewis Barnes Amendments for INSPIRE linking style. - Olivier Canévet Fixes for WebComment report abuse facility. - Belinda Chan User documentation for personal features like alerts and baskets. - Carmen Alvarez Perez Improvements to WebStat. - Henning Weiler Initial implementation of BibAuthorID module, with Samuele Carli. - Juan Francisco Pereira Corral Fix taxonomy regression test, for BibKnowledge. - Stamen Todorov Peev Enrichment of Dublin Core XSL stylesheet. - Jan Iwaszkiewicz Full-text snippet feature for full-text search. - Björn Oltmanns Initial release of BibEncode, multi-node support for BibSched, style refactoring for WebComment. - Christopher Dickinson Patch for auto-suggest facility. - Christopher Hayward Improvements to the reference extraction tool. - Travis Brooks Support for SPIRES search syntax and other improvements. - Juliusz Sompolski Reimplementation of pdf2hocr2pdf. - Jurga Girdzijauskaite Contributions to the Lithuanian translation. - Tony Ohls Fixes for regexp treatment in BibConvert. - Marko Niinimaki Contributions to the BibRank citation module and WebSearch summary output formats. Initial implementation of BibCatalog and BibKnowledge. - Mathieu Barras Initial implementation of SWORD client application. - Fabio Souto Initial implementation of the invenio config dumper/loader. - Pablo Vázquez Caderno Prevent loop in collection trees, for WebSearch. - Victor Engmark Bash completion for inveniocfg, patche for dist-packages. - Javier Martin Moderation tools for WebComment, improvements to BibEdit, initial implementation of the batch uploader. - Nikolaos Kasioumis Hosted collections for WebSearch, rewrite of WebBasket UI, improvements to WebAlert. - Valkyrie Savage Initial implementation of the plot extractor library. - Miguel Martinez Pedreira Tool for manipulated embedded metadata in full-text files. - Jorge Aranda Sumarroca Support for FCKeditor-uploaded files for WebSubmit. - Glenn Gard Implemented many unit, regression and web tests for WebAlert, WebJournal, WebSubmit, WebComment, WebMessage, WebSession modules. - Christopher Parker Improvements to the submission approval workflow. - Martin Vesely OAIHarvest, OAIRepository, OAI daemon and admin interface. BibConvert text-oriented mode. BibMatch. - Tony Osborne Improvements to the reference extractor. - Radoslav Ivanov Contributions to the WebBasket module test suite. Support for parentheses and SPIRES search syntax in WebSearch. Initial implementation of the multi-record editor. Initial implementation of BibExport. - Joaquim Rodrigues Silvestre Initial implementation of the BibCirculation module to handle physical item copies. - Kyriakos Liakopoulos Initial implementation of BibMerge. Improvements to BibEdit. - Lars Christian Raae Record locking, per-collection curating authentication, reverting older record versions, for the BibEdit. Rewrite of BibEdit in Ajax. - Ruben Pollan Contributions to the WebStat module. - Nicholas Robinson WebSubmit. Reference extraction for the BibEdit module. - Gabriel Hase WebJournal module. - Diane Berkovits Ranking by downloads, for the BibRank and WebSubmit modules. Group management for WebSession. - Joël Vogt Contributions to the BibClassify module. - Marcus Johansson Contributions to the WebStat module. - Jan Brice Krause Original implementation of the fulltext file transfer mode for BibUpload. - Axel Voitier Complex approval and refereeing subsystem, for WebSubmit. - Alberto Pepe BibClassify, OAIHarvest Admin. - Øyvind Østlund Sample BibTeX to MARCXML conversion, for BibConvert. - Nikolay Dyankov XML-based BFX formatting engine, for BibFormat. - Olivier Serres External collections searching, for WebSearch. - Eric Stahl Rewrite of BibUpload in Python. - Frederic Gobry Contributions to the templating system, the URL handler, the gettext infrastructure, the regression test suite infrastructure, numerous patches for many modules. - Krzysztof Jedrzejek Improvements to ElmSubmit. - Yohann Paris BibEdit Admin. - Paulo Cabral WebComment, error library, design of collaborative features. - Thomas Baron WebSubmit and BibUpload. Improvements to BibSched. - Maja Gracco System librarian, MARC21 expertise. - Tiberiu Dondera Patches for the WebSubmit engine and the admin interface. Templatizing codebase. - Anna Afshar Ranking by citations, for the BibRank module. - Trond Aksel Myklebust Ranking engine, the BibRank module. Stemming and stopwords for the BibIndex module. Site access policies and external authentication methods, for the WebAccess module and its clients. Administration interfaces to WebSearch, BibIndex, BibRank, and additions to WebAccess. - Hector Sanchez Metadata output formatter, the BibFormat module. Session management, for the WebSession module. - Richard Owen Electronic mail submission system, the ElmSubmit module. - Alexandra Silva Rewriting and enhancing BibRecord XML MARC and record handling library, for the BibEdit module. - Arturo Montejo Raez Automatic text classification and keyword indexing. (upcoming) - Mikael Vik Role-based access control engine and its admin interface, the WebAccess module. Guest user sessions garbage collector, for the WebSession module. - Erik Simon , Eric Simon Alert engine, for the WebAlert module. - Roberta Faggian Rewrite of the alert and basket user interfaces, for the WebAlert and the WebBasket modules. - Julio Pernia Aznar Parts of user and session management, for the WebSession module. - Franck Grenier Parts of web design and graphics, for the WebStyle module. - Eduardo Margallo Enhancements to the indexing engine, for the BibWords module. Initial implementation of the task scheduler, for the BibSched module. - end of file - diff --git a/INSTALL b/INSTALL index 61ce5933d..3bc0c54db 100644 --- a/INSTALL +++ b/INSTALL @@ -1,909 +1,909 @@ Invenio INSTALLATION ==================== About ===== This document specifies how to build, customize, and install Invenio -v1.1.1 for the first time. See RELEASE-NOTES if you are upgrading +v1.1.2 for the first time. See RELEASE-NOTES if you are upgrading from a previous Invenio release. Contents ======== 0. Prerequisites 1. Quick instructions for the impatient Invenio admin 2. Detailed instructions for the patient Invenio admin 0. Prerequisites ================ Here is the software you need to have around before you start installing Invenio: a) Unix-like operating system. The main development and production platforms for Invenio at CERN are GNU/Linux distributions Debian, Gentoo, Scientific Linux (aka RHEL), Ubuntu, but we also develop on Mac OS X. Basically any Unix system supporting the software listed below should do. If you are using Debian GNU/Linux ``Lenny'' or later, then you can install most of the below-mentioned prerequisites and recommendations by running: $ sudo aptitude install python-dev apache2-mpm-prefork \ mysql-server mysql-client python-mysqldb \ python-4suite-xml python-simplejson python-xml \ python-libxml2 python-libxslt1 gnuplot poppler-utils \ gs-common clisp gettext libapache2-mod-wsgi unzip \ pdftk html2text giflib-tools \ pstotext netpbm python-chardet You also need to install following packages from PyPi by running: $ sudo pip install -r requirements.txt $ sudo pip install -r requirements-extras.txt $ sudo pip install -r requirements-flask.txt $ sudo pip install -r requirements-flask-ext.txt You may also want to install some of the following packages, if you have them available on your concrete architecture: $ sudo aptitude install sbcl cmucl pylint pychecker pyflakes \ python-profiler python-epydoc libapache2-mod-xsendfile \ openoffice.org python-utidylib python-beautifulsoup Moreover, you should install some Message Transfer Agent (MTA) such as Postfix so that Invenio can email notification alerts or registration information to the end users, contact moderators and reviewers of submitted documents, inform administrators about various runtime system information, etc: $ sudo aptitude install postfix After running the above-quoted aptitude command(s), you can proceed to configuring your MySQL server instance (max_allowed_packet in my.cnf, see item 0b below) and then to installing the Invenio software package in the section 1 below. If you are using another operating system, then please continue reading the rest of this prerequisites section, and please consult our wiki pages for any concrete hints for your specific operating system. b) MySQL server (may be on a remote machine), and MySQL client (must be available locally too). MySQL versions 4.1 or 5.0 are supported. Please set the variable "max_allowed_packet" in your "my.cnf" init file to at least 4M. (For sites such as INSPIRE, having 1M records with 10M citer-citee pairs in its citation map, you may need to increase max_allowed_packet to 1G.) You may perhaps also want to run your MySQL server natively in UTF-8 mode by setting "default-character-set=utf8" in various parts of your "my.cnf" file, such as in the "[mysql]" part and elsewhere; but this is not really required. c) Apache 2 server, with support for loading DSO modules, and optionally with SSL support for HTTPS-secure user authentication, and mod_xsendfile for off-loading file downloads away from Invenio processes to Apache. d) Python v2.4 or above: as well as the following Python modules: - (mandatory) MySQLdb (version >= 1.2.1_p2; see below) - (mandatory) Pyparsing, for document parsing - (recommended) python-dateutil, for complex date processing: - (recommended) PyXML, for XML processing: - (recommended) PyRXP, for very fast XML MARC processing: - (recommended) lxml, for XML/XLST processing: - (recommended) libxml2-python, for XML/XLST processing: - (recommended) simplejson, for AJAX apps: Note that if you are using Python-2.6, you don't need to install simplejson, because the module is already included in the main Python distribution. - (recommended) Gnuplot.Py, for producing graphs: - (recommended) Snowball Stemmer, for stemming: - (recommended) py-editdist, for record merging: - (recommended) numpy, for citerank methods: - (recommended) magic, for full-text file handling: - (optional) chardet, for character encoding detection: - (optional) 4suite, slower alternative to PyRXP and libxml2-python: - (optional) feedparser, for web journal creation: - (optional) RDFLib, to use RDF ontologies and thesauri: - (optional) mechanize, to run regression web test suite: - (optional) python-mock, mocking library for the test suite: - (optional) hashlib, needed only for Python-2.4 and only if you would like to use AWS connectivity: - (optional) utidylib, for HTML washing: - (optional) Beautiful Soup, for HTML washing: - (optional) Python Twitter (and its dependencies) if you want to use the Twitter Fetcher bibtasklet: - (optional) Python OpenID if you want to enable OpenID support for authentication: - (optional) Python Rauth if you want to enable OAuth 1.0/2.0 support for authentication (depends on Python-2.6 or later): Note: MySQLdb version 1.2.1_p2 or higher is recommended. If you are using an older version of MySQLdb, you may get into problems with character encoding. e) mod_wsgi Apache module. Versions 3.x and above are recommended. Note: if you are using Python 2.4 or earlier, then you should also install the wsgiref Python module, available from: (As of Python 2.5 this module is included in standard Python distribution.) f) If you want to be able to extract references from PDF fulltext files, then you need to install pdftotext version 3 at least. g) If you want to be able to search for words in the fulltext files (i.e. to have fulltext indexing) or to stamp submitted files, then you need as well to install some of the following tools: - for Microsoft Office/OpenOffice.org document conversion: OpenOffice.org - for PDF file stamping: pdftk, pdf2ps - for PDF files: pdftotext or pstotext - for PostScript files: pstotext or ps2ascii - for DjVu creation, elaboration: DjVuLibre - to perform OCR: OCRopus (tested only with release 0.3.1) - to perform different image elaborations: ImageMagick - to generate PDF after OCR: netpbm, ReportLab and pyPdf or pyPdf2 h) If you have chosen to install fast XML MARC Python processors in the step d) above, then you have to install the parsers themselves: - (optional) 4suite: i) (recommended) Gnuplot, the command-line driven interactive plotting program. It is used to display download and citation history graphs on the Detailed record pages on the web interface. Note that Gnuplot must be compiled with PNG output support, that is, with the GD library. Note also that Gnuplot is not required, only recommended. j) (recommended) A Common Lisp implementation, such as CLISP, SBCL or CMUCL. It is used for the web server log analysing tool and the metadata checking program. Note that any of the three implementations CLISP, SBCL, or CMUCL will do. CMUCL produces fastest machine code, but it does not support UTF-8 yet. Pick up CLISP if you don't know what to do. Note that a Common Lisp implementation is not required, only recommended. k) GNU gettext, a set of tools that makes it possible to translate the application in multiple languages. This is available by default on many systems. l) (recommended) xlwt 0.7.2, Library to create spreadsheet files compatible with MS Excel 97/2000/XP/2003 XLS files, on any platform, with Python 2.3 to 2.6 m) (recommended) matplotlib 1.0.0 is a python 2D plotting library which produces publication quality figures in a variety of hardcopy formats and interactive environments across platforms. matplotlib can be used in python scripts, the python and ipython shell (ala MATLAB® or Mathematica®), web application servers, and six graphical user interface toolkits. It is used to generate pie graphs in the custom summary query (WebStat) n) (optional) FFmpeg, an open-source tools an libraries collection to convert video and audio files. It makes use of both internal as well as external libraries to generate videos for the web, such as Theora, WebM and H.264 out of almost any thinkable video input. FFmpeg is needed to run video related modules and submission workflows in Invenio. The minimal configuration of ffmpeg for the Invenio demo site requires a number of external libraries. It is highly recommended to remove all installed versions and packages that are comming with various Linux distributions and install the latest versions from sources. Additionally, you will need the Mediainfo Library for multimedia metadata handling. Minimum libraries for the demo site: - the ffmpeg multimedia encoder tools - a library for jpeg images needed for thumbnail extraction - a library for the ogg container format, needed for Vorbis and Theora - the OGG Vorbis audi codec library - the OGG Theora video codec library - the WebM video codec library - the mediainfo library for multimedia metadata Recommended for H.264 video (!be aware of licensing issues!): - a library for H.264 video encoding - a library for Advanced Audi Coding - a library for MP3 encoding o) (recommended) RabbitMQ is a message broker used by Celery for running a distributed task queue . - Install sudo aptitude install rabbitmq-server - Enable web interface sudo rabbitmq-plugins enable rabbitmq_management - Add user and vhost sudo rabbitmqctl add_user myuser mypassword sudo rabbitmqctl add_vhost myvhost sudo rabbitmqctl set_permissions -p myvhost myuser ".*" ".*" ".*" - Allow Web UI login sudo rabbitmqctl set_user_tags myuser management - Change default user password sudo rabbitmqctl change_password guest guest sudo service rabbitmq-server restart - Starting Celery worker (after Invenio is installed): celery worker -A invenio -l info -B -E - Starting Flower (monitoring web interface, requires Python 2.6): pip install flower flower --port=5555 http://localhost:55672 (RabbitMQ web admin) http://localhost:5555 (Flower UI) 1. Quick instructions for the impatient Invenio admin ========================================================= 1a. Installation ---------------- $ cd $HOME/src/ - $ wget http://invenio-software.org/download/invenio-1.1.1.tar.gz - $ wget http://invenio-software.org/download/invenio-1.1.1.tar.gz.md5 - $ wget http://invenio-software.org/download/invenio-1.1.1.tar.gz.sig - $ md5sum -c invenio-1.1.1.tar.gz.md5 - $ gpg --verify invenio-1.1.1.tar.gz.sig invenio-1.1.1.tar.gz - $ tar xvfz invenio-1.1.1.tar.gz - $ cd invenio-1.1.1 + $ wget http://invenio-software.org/download/invenio-1.1.2.tar.gz + $ wget http://invenio-software.org/download/invenio-1.1.2.tar.gz.md5 + $ wget http://invenio-software.org/download/invenio-1.1.2.tar.gz.sig + $ md5sum -c invenio-1.1.2.tar.gz.md5 + $ gpg --verify invenio-1.1.2.tar.gz.sig invenio-1.1.2.tar.gz + $ tar xvfz invenio-1.1.2.tar.gz + $ cd invenio-1.1.2 $ ./configure $ make $ make install $ make install-bootstrap $ make install-hogan-plugin $ make install-mathjax-plugin ## optional $ make install-jquery-plugins ## optional $ make install-jquery-tokeninput ## optional $ make install-plupload-plugin ## optional $ make install-ckeditor-plugin ## optional $ make install-pdfa-helper-files ## optional $ make install-mediaelement ## optional $ make install-solrutils ## optional $ make install-js-test-driver ## optional 1b. Configuration ----------------- $ sudo chown -R www-data.www-data /opt/invenio $ sudo -u www-data emacs /opt/invenio/etc/invenio-local.conf $ sudo -u www-data /opt/invenio/bin/inveniocfg --update-all $ sudo -u www-data /opt/invenio/bin/inveniocfg --create-secret-key $ sudo -u www-data /opt/invenio/bin/inveniocfg --update-all $ sudo -u www-data /opt/invenio/bin/inveniocfg --create-tables $ sudo -u www-data /opt/invenio/bin/inveniocfg --load-bibfield-conf $ sudo -u www-data /opt/invenio/bin/inveniocfg --load-webstat-conf $ sudo -u www-data /opt/invenio/bin/inveniocfg --create-apache-conf $ sudo /etc/init.d/apache2 restart $ sudo -u www-data /opt/invenio/bin/inveniocfg --check-openoffice $ sudo -u www-data /opt/invenio/bin/inveniocfg --create-demo-site $ sudo -u www-data /opt/invenio/bin/inveniocfg --load-demo-records $ sudo -u www-data /opt/invenio/bin/inveniocfg --run-unit-tests $ sudo -u www-data /opt/invenio/bin/inveniocfg --run-regression-tests $ sudo -u www-data /opt/invenio/bin/inveniocfg --run-web-tests $ sudo -u www-data /opt/invenio/bin/inveniocfg --remove-demo-records $ sudo -u www-data /opt/invenio/bin/inveniocfg --drop-demo-site $ firefox http://your.site.com/help/admin/howto-run 2. Detailed instructions for the patient Invenio admin ========================================================== 2a. Installation ---------------- The Invenio uses standard GNU autoconf method to build and install its files. This means that you proceed as follows: $ cd $HOME/src/ Change to a directory where we will build the Invenio sources. (The built files will be installed into different "target" directories later.) - $ wget http://invenio-software.org/download/invenio-1.1.1.tar.gz - $ wget http://invenio-software.org/download/invenio-1.1.1.tar.gz.md5 - $ wget http://invenio-software.org/download/invenio-1.1.1.tar.gz.sig + $ wget http://invenio-software.org/download/invenio-1.1.2.tar.gz + $ wget http://invenio-software.org/download/invenio-1.1.2.tar.gz.md5 + $ wget http://invenio-software.org/download/invenio-1.1.2.tar.gz.sig Fetch Invenio source tarball from the distribution server, together with MD5 checksum and GnuPG cryptographic signature files useful for verifying the integrity of the tarball. - $ md5sum -c invenio-1.1.1.tar.gz.md5 + $ md5sum -c invenio-1.1.2.tar.gz.md5 Verify MD5 checksum. - $ gpg --verify invenio-1.1.1.tar.gz.sig invenio-1.1.1.tar.gz + $ gpg --verify invenio-1.1.2.tar.gz.sig invenio-1.1.2.tar.gz Verify GnuPG cryptographic signature. Note that you may first have to import my public key into your keyring, if you haven't done that already: $ gpg --keyserver wwwkeys.eu.pgp.net --recv-keys 0xBA5A2B67 The output of the gpg --verify command should then read: Good signature from "Tibor Simko " You can safely ignore any trusted signature certification warning that may follow after the signature has been successfully verified. - $ tar xvfz invenio-1.1.1.tar.gz + $ tar xvfz invenio-1.1.2.tar.gz Untar the distribution tarball. - $ cd invenio-1.1.1 + $ cd invenio-1.1.2 Go to the source directory. $ ./configure Configure Invenio software for building on this specific platform. You can use the following optional parameters: --prefix=/opt/invenio Optionally, specify the Invenio general installation directory (default is /opt/invenio). It will contain command-line binaries and program libraries containing the core Invenio functionality, but also store web pages, runtime log and cache information, document data files, etc. Several subdirs like `bin', `etc', `lib', or `var' will be created inside the prefix directory to this effect. Note that the prefix directory should be chosen outside of the Apache htdocs tree, since only one its subdirectory (prefix/var/www) is to be accessible directly via the Web (see below). Note that Invenio won't install to any other directory but to the prefix mentioned in this configuration line. --with-python=/opt/python/bin/python2.4 Optionally, specify a path to some specific Python binary. This is useful if you have more than one Python installation on your system. If you don't set this option, then the first Python that will be found in your PATH will be chosen for running Invenio. --with-mysql=/opt/mysql/bin/mysql Optionally, specify a path to some specific MySQL client binary. This is useful if you have more than one MySQL installation on your system. If you don't set this option, then the first MySQL client executable that will be found in your PATH will be chosen for running Invenio. --with-clisp=/opt/clisp/bin/clisp Optionally, specify a path to CLISP executable. This is useful if you have more than one CLISP installation on your system. If you don't set this option, then the first executable that will be found in your PATH will be chosen for running Invenio. --with-cmucl=/opt/cmucl/bin/lisp Optionally, specify a path to CMUCL executable. This is useful if you have more than one CMUCL installation on your system. If you don't set this option, then the first executable that will be found in your PATH will be chosen for running Invenio. --with-sbcl=/opt/sbcl/bin/sbcl Optionally, specify a path to SBCL executable. This is useful if you have more than one SBCL installation on your system. If you don't set this option, then the first executable that will be found in your PATH will be chosen for running Invenio. --with-openoffice-python Optionally, specify the path to the Python interpreter embedded with OpenOffice.org. This is normally not contained in the normal path. If you don't specify this it won't be possible to use OpenOffice.org to convert from and to Microsoft Office and OpenOffice.org documents. This configuration step is mandatory. Usually, you do this step only once. (Note that if you are building Invenio not from a released tarball, but from the Git sources, then you have to generate the configure file via autotools: $ sudo aptitude install automake1.9 autoconf $ aclocal-1.9 $ automake-1.9 -a $ autoconf after which you proceed with the usual configure command.) $ make Launch the Invenio build. Since many messages are printed during the build process, you may want to run it in a fast-scrolling terminal such as rxvt or in a detached screen session. During this step all the pages and scripts will be pre-created and customized based on the config you have edited in the previous step. Note that on systems such as FreeBSD or Mac OS X you have to use GNU make ("gmake") instead of "make". $ make install Install the web pages, scripts, utilities and everything needed for Invenio runtime into respective installation directories, as specified earlier by the configure command. Note that if you are installing Invenio for the first time, you will be asked to create symbolic link(s) from Python's site-packages system-wide directory(ies) to the installation location. This is in order to instruct Python where to find Invenio's Python files. You will be hinted as to the exact command to use based on the parameters you have used in the configure command. $ make install-bootstrap This will automatically download and install Twitter Bootstrap prerequisite. $ make install-mathjax-plugin ## optional This will automatically download and install in the proper place MathJax, a JavaScript library to render LaTeX formulas in the client browser. Note that in order to enable the rendering you will have to set the variable CFG_WEBSEARCH_USE_MATHJAX_FOR_FORMATS in invenio-local.conf to a suitable list of output format codes. For example: CFG_WEBSEARCH_USE_MATHJAX_FOR_FORMATS = hd,hb $ make install-jquery-plugins ## optional This will automatically download and install in the proper place jQuery and related plugins. They are used for AJAX applications such as the record editor. Note that `unzip' is needed when installing jquery plugins. $ make install-jquery-tokeninput ## optional This will automatically download and install jQuery Tokeninput pre-requisite. $ make install-plupload-plugin ## optional This will automatically download and install plupload pre-requisite that is used in the deposition interface for submitting files. $ make install-ckeditor-plugin ## optional This will automatically download and install in the proper place CKeditor, a WYSIWYG Javascript-based editor (e.g. for the WebComment module). Note that in order to enable the editor you have to set the CFG_WEBCOMMENT_USE_RICH_EDITOR to True. $ make install-pdfa-helper-files ## optional This will automatically download and install in the proper place the helper files needed to create PDF/A files out of existing PDF files. $ make install-mediaelement ## optional This will automatically download and install the MediaElementJS HTML5 video player that is needed for videos on the DEMO site. $ make install-solrutils ## optional This will automatically download and install a Solr instance which can be used for full-text searching. See CFG_SOLR_URL variable in the invenio.conf. Note that the admin later has to take care of running init.d scripts which would start the Solr instance automatically. $ make install-js-test-driver ## optional This will automatically download and install JsTestDriver which is needed to run JS unit tests. Recommended for developers. 2b. Configuration ----------------- Once the basic software installation is done, we proceed to configuring your Invenio system. $ sudo chown -R www-data.www-data /opt/invenio For the sake of simplicity, let us assume that your Invenio installation will run under the `www-data' user process identity. The above command changes ownership of installed files to www-data, so that we shall run everything under this user identity from now on. For production purposes, you would typically enable Apache server to read all files from the installation place but to write only to the `var' subdirectory of your installation place. You could achieve this by configuring Unix directory group permissions, for example. $ sudo -u www-data emacs /opt/invenio/etc/invenio-local.conf Customize your Invenio installation. Please read the 'invenio.conf' file located in the same directory that contains the vanilla default configuration parameters of your Invenio installation. If you want to customize some of these parameters, you should create a file named 'invenio-local.conf' in the same directory where 'invenio.conf' lives and you should write there only the customizations that you want to be different from the vanilla defaults. Here is a realistic, minimalist, yet production-ready example of what you would typically put there: $ cat /opt/invenio/etc/invenio-local.conf [Invenio] CFG_SITE_NAME = John Doe's Document Server CFG_SITE_NAME_INTL_fr = Serveur des Documents de John Doe CFG_SITE_URL = http://your.site.com CFG_SITE_SECURE_URL = https://your.site.com CFG_SITE_ADMIN_EMAIL = john.doe@your.site.com CFG_SITE_SUPPORT_EMAIL = john.doe@your.site.com CFG_WEBALERT_ALERT_ENGINE_EMAIL = john.doe@your.site.com CFG_WEBCOMMENT_ALERT_ENGINE_EMAIL = john.doe@your.site.com CFG_WEBCOMMENT_DEFAULT_MODERATOR = john.doe@your.site.com CFG_DATABASE_HOST = localhost CFG_DATABASE_NAME = invenio CFG_DATABASE_USER = invenio CFG_DATABASE_PASS = my123p$ss CFG_BIBDOCFILE_ENABLE_BIBDOCFSINFO_CACHE = 1 You should override at least the parameters mentioned above in order to define some very essential runtime parameters such as the name of your document server (CFG_SITE_NAME and CFG_SITE_NAME_INTL_*), the visible URL of your document server (CFG_SITE_URL and CFG_SITE_SECURE_URL), the email address of the local Invenio administrator, comment moderator, and alert engine (CFG_SITE_SUPPORT_EMAIL, CFG_SITE_ADMIN_EMAIL, etc), and last but not least your database credentials (CFG_DATABASE_*). If this is a first installation of Invenio it is recommended you set the CFG_BIBDOCFILE_ENABLE_BIBDOCFSINFO_CACHE variable to 1. If this is instead an upgrade from an existing installation don't add it until you have run: $ bibdocfile --fix-bibdocfsinfo-cache . The Invenio system will then read both the default invenio.conf file and your customized invenio-local.conf file and it will override any default options with the ones you have specifield in your local file. This cascading of configuration parameters will ease your future upgrades. If you want to have multiple Invenio instances for distributed video encoding, you need to share the same configuration amongs them and make some of the folders of the Invenio installation available for all nodes. Configure the allowed tasks for every node: CFG_BIBSCHED_NODE_TASKS = { "hostname_machine1" : ["bibindex", "bibupload", "bibreformat","webcoll", "bibtaskex", "bibrank", "oaiharvest", "oairepositoryupdater", "inveniogc", "webstatadmin", "bibclassify", "bibexport", "dbdump", "batchuploader", "bibauthorid", "bibtasklet"], "hostname_machine2" : ['bibencode',] } Share the following directories among Invenio instances: /var/tmp-shared hosts video uploads in a temporary form /var/tmp-shared/bibencode/jobs hosts new job files for the video encoding daemon /var/tmp-shared/bibencode/jobs/done hosts job files that have been processed by the daemon /var/data/files hosts fulltext and media files associated to records /var/data/submit hosts files created during submissions $ sudo -u www-data /opt/invenio/bin/inveniocfg --update-all Make the rest of the Invenio system aware of your invenio-local.conf changes. This step is mandatory each time you edit your conf files. $ sudo -u www-data /opt/invenio/bin/inveniocfg --create-secret-key You may need to create secret key for the Flask application if you have not done so yet during customisation of your `invenio-local.conf'. This command will check the contents of this file and will update it with randomly generated secret key value. $ sudo -u www-data /opt/invenio/bin/inveniocfg --update-all Make the rest of the Invenio system aware of the secret key change in invenio-local.conf. $ sudo -u www-data /opt/invenio/bin/inveniocfg --create-tables If you are installing Invenio for the first time, you have to create database tables. Note that this step checks for potential problems such as the database connection rights and may ask you to perform some more administrative steps in case it detects a problem. Notably, it may ask you to set up database access permissions, based on your configure values. If you are installing Invenio for the first time, you have to create a dedicated database on your MySQL server that the Invenio can use for its purposes. Please contact your MySQL administrator and ask him to execute the commands this step proposes you. At this point you should now have successfully completed the "make install" process. We continue by setting up the Apache web server. $ sudo -u www-data /opt/invenio/bin/inveniocfg --load-bibfield-conf Load the configuration file of the BibField module. It will create `bibfield_config.py' file. (FIXME: When BibField becomes essential part of Invenio, this step should be later automatised so that people do not have to run it manually.) $ sudo -u www-data /opt/invenio/bin/inveniocfg --load-webstat-conf Load the configuration file of webstat module. It will create the tables in the database for register customevents, such as basket hits. $ sudo -u www-data /opt/invenio/bin/inveniocfg --create-apache-conf Running this command will generate Apache virtual host configurations matching your installation. You will be instructed to check created files (usually they are located under /opt/invenio/etc/apache/) and edit your httpd.conf to activate Invenio virtual hosts. If you are using Debian GNU/Linux ``Lenny'' or later, then you can do the following to create your SSL certificate and to activate your Invenio vhosts: ## make SSL certificate: $ sudo aptitude install ssl-cert $ sudo mkdir /etc/apache2/ssl $ sudo /usr/sbin/make-ssl-cert /usr/share/ssl-cert/ssleay.cnf \ /etc/apache2/ssl/apache.pem ## add Invenio web sites: $ sudo ln -s /opt/invenio/etc/apache/invenio-apache-vhost.conf \ /etc/apache2/sites-available/invenio $ sudo ln -s /opt/invenio/etc/apache/invenio-apache-vhost-ssl.conf \ /etc/apache2/sites-available/invenio-ssl ## disable Debian's default web site: $ sudo /usr/sbin/a2dissite default ## enable Invenio web sites: $ sudo /usr/sbin/a2ensite invenio $ sudo /usr/sbin/a2ensite invenio-ssl ## enable SSL module: $ sudo /usr/sbin/a2enmod ssl ## if you are using xsendfile module, enable it too: $ sudo /usr/sbin/a2enmod xsendfile If you are using another operating system, you should do the equivalent, for example edit your system-wide httpd.conf and put the following include statements: Include /opt/invenio/etc/apache/invenio-apache-vhost.conf Include /opt/invenio/etc/apache/invenio-apache-vhost-ssl.conf Note that you may need to adapt generated vhost file snippets to match your concrete operating system specifics. For example, the generated configuration snippet will preload Invenio WSGI daemon application upon Apache start up for faster site response. The generated configuration assumes that you are using mod_wsgi version 3 or later. If you are using the old legacy mod_wsgi version 2, then you would need to comment out the WSGIImportScript directive from the generated snippet, or else move the WSGI daemon setup to the top level, outside of the VirtualHost section. Note also that you may want to tweak the generated Apache vhost snippet for performance reasons, especially with respect to WSGIDaemonProcess parameters. For example, you can increase the number of processes from the default value `processes=5' if you have lots of RAM and if many concurrent users may access your site in parallel. However, note that you must use `threads=1' there, because Invenio WSGI daemon processes are not fully thread safe yet. This may change in the future. $ sudo /etc/init.d/apache2 restart Please ask your webserver administrator to restart the Apache server after the above "httpd.conf" changes. $ sudo -u www-data /opt/invenio/bin/inveniocfg --check-openoffice If you plan to support MS Office or Open Document Format files in your installation, you should check whether LibreOffice or OpenOffice.org is well integrated with Invenio by running the above command. You may be asked to create a temporary directory for converting office files with special ownership (typically as user nobody) and permissions. Note that you can do this step later. $ sudo -u www-data /opt/invenio/bin/inveniocfg --create-demo-site This step is recommended to test your local Invenio installation. It should give you our "Atlantis Institute of Science" demo installation, exactly as you see it at . $ sudo -u www-data /opt/invenio/bin/inveniocfg --load-demo-records Optionally, load some demo records to be able to test indexing and searching of your local Invenio demo installation. $ sudo -u www-data /opt/invenio/bin/inveniocfg --run-unit-tests Optionally, you can run the unit test suite to verify the unit behaviour of your local Invenio installation. Note that this command should be run only after you have installed the whole system via `make install'. $ sudo -u www-data /opt/invenio/bin/inveniocfg --run-regression-tests Optionally, you can run the full regression test suite to verify the functional behaviour of your local Invenio installation. Note that this command requires to have created the demo site and loaded the demo records. Note also that running the regression test suite may alter the database content with junk data, so that rebuilding the demo site is strongly recommended afterwards. $ sudo -u www-data /opt/invenio/bin/inveniocfg --run-web-tests Optionally, you can run additional automated web tests running in a real browser. This requires to have Firefox with the Selenium IDE extension installed. $ sudo -u www-data /opt/invenio/bin/inveniocfg --remove-demo-records Optionally, remove the demo records loaded in the previous step, but keeping otherwise the demo collection, submission, format, and other configurations that you may reuse and modify for your own production purposes. $ sudo -u www-data /opt/invenio/bin/inveniocfg --drop-demo-site Optionally, drop also all the demo configuration so that you'll end up with a completely blank Invenio system. However, you may want to find it more practical not to drop the demo site configuration but to start customizing from there. $ firefox http://your.site.com/help/admin/howto-run In order to start using your Invenio installation, you can start indexing, formatting and other daemons as indicated in the "HOWTO Run" guide on the above URL. You can also use the Admin Area web interfaces to perform further runtime configurations such as the definition of data collections, document types, document formats, word indexes, etc. $ sudo ln -s /opt/invenio/etc/bash_completion.d/inveniocfg \ /etc/bash_completion.d/inveniocfg Optionally, if you are using Bash shell completion, then you may want to create the above symlink in order to configure completion for the inveniocfg command. Good luck, and thanks for choosing Invenio. - Invenio Development Team diff --git a/Makefile.am b/Makefile.am index 47b6c4cc4..b57f6af54 100644 --- a/Makefile.am +++ b/Makefile.am @@ -1,580 +1,583 @@ ## This file is part of Invenio. ## Copyright (C) 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011, 2012, 2013 CERN. ## ## Invenio is free software; you can redistribute it and/or ## modify it under the terms of the GNU General Public License as ## published by the Free Software Foundation; either version 2 of the ## License, or (at your option) any later version. ## ## Invenio is distributed in the hope that it will be useful, but ## WITHOUT ANY WARRANTY; without even the implied warranty of ## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ## General Public License for more details. ## ## You should have received a copy of the GNU General Public License ## along with Invenio; if not, write to the Free Software Foundation, Inc., ## 59 Temple Place, Suite 330, Boston, MA 02111-1307, USA. confignicedir = $(sysconfdir)/build confignice_SCRIPTS=config.nice SUBDIRS = po config modules EXTRA_DIST = UNINSTALL THANKS RELEASE-NOTES configure-tests.py config.nice.in \ config.rpath # current MathJax version and packages # See also modules/miscutil/lib/htmlutils.py (get_mathjax_header) MJV = 2.1 MATHJAX = http://invenio-software.org/download/mathjax/MathJax-v$(MJV).zip # current CKeditor version CKV = 3.6.6 CKEDITOR = ckeditor_$(CKV).zip # current MediaElement.js version MEV = master MEDIAELEMENT = http://github.com/johndyer/mediaelement/zipball/$(MEV) #for solrutils INVENIO_JAVA_PATH = org/invenio_software/solr solrdirname = apache-solr-3.1.0 solrdir = $(prefix)/lib/$(solrdirname) solrutils_dir=$(CURDIR)/modules/miscutil/lib/solrutils CLASSPATH=.:${solrdir}/dist/solrj-lib/commons-io-1.4.jar:${solrdir}/dist/apache-solr-core-*jar:${solrdir}/contrib/jzlib-1.0.7.jar:${solrdir}/dist/apache-solr-solrj-3.1.0.jar:${solrdir}/dist/solrj-lib/slf4j-api-1.5.5.jar:${solrdir}/dist/*:${solrdir}/contrib/basic-lucene-libs/*:${solrdir}/contrib/analysis-extras/lucene-libs/*:${solrdir}/dist/solrj-lib/* # git-version-get stuff: BUILT_SOURCES = $(top_srcdir)/.version $(top_srcdir)/.version: echo $(VERSION) > $@-t && mv $@-t $@ dist-hook: echo $(VERSION) > $(distdir)/.tarball-version # Bootstrap version BOOTSTRAPV = 2.2.1 # Hogan.js version HOGANVER = 2.0.0 check-upgrade: $(PYTHON) $(top_srcdir)/modules/miscutil/lib/inveniocfg_upgrader.py $(top_srcdir) --upgrade-check check-custom-templates: $(PYTHON) $(top_srcdir)/modules/webstyle/lib/template.py --check-custom-templates $(top_srcdir) kwalitee-check: @$(PYTHON) $(top_srcdir)/modules/miscutil/lib/kwalitee.py --stats $(top_srcdir) kwalitee-check-errors-only: @$(PYTHON) $(top_srcdir)/modules/miscutil/lib/kwalitee.py --check-errors $(top_srcdir) kwalitee-check-variables: @$(PYTHON) $(top_srcdir)/modules/miscutil/lib/kwalitee.py --check-variables $(top_srcdir) kwalitee-check-indentation: @$(PYTHON) $(top_srcdir)/modules/miscutil/lib/kwalitee.py --check-indentation $(top_srcdir) kwalitee-check-sql-queries: @$(PYTHON) $(top_srcdir)/modules/miscutil/lib/kwalitee.py --check-sql $(top_srcdir) etags: \rm -f $(top_srcdir)/TAGS (cd $(top_srcdir) && find $(top_srcdir) -name "*.py" -print | xargs etags) install-data-local: for d in / /cache /cache/RTdata /log /tmp /tmp-shared /data /run /tmp-shared/bibencode/jobs/done /tmp-shared/bibedit-cache; do \ mkdir -p $(localstatedir)$$d ; \ done @echo "************************************************************" @echo "** Invenio software has been successfully installed! **" @echo "** **" @echo "** You may proceed to customizing your installation now. **" @echo "************************************************************" install-mathjax-plugin: @echo "***********************************************************" @echo "** Installing MathJax plugin, please wait... **" @echo "***********************************************************" rm -rf /tmp/invenio-mathjax-plugin mkdir /tmp/invenio-mathjax-plugin rm -fr ${prefix}/var/www/MathJax mkdir -p ${prefix}/var/www/MathJax (cd /tmp/invenio-mathjax-plugin && \ wget '$(MATHJAX)' -O mathjax.zip && \ unzip -q mathjax.zip && cd mathjax-MathJax-* && cp -r * \ ${prefix}/var/www/MathJax) rm -fr /tmp/invenio-mathjax-plugin @echo "************************************************************" @echo "** The MathJax plugin was successfully installed. **" @echo "** Please do not forget to properly set the option **" @echo "** CFG_WEBSEARCH_USE_MATHJAX_FOR_FORMATS and **" @echo "** CFG_WEBSUBMIT_USE_MATHJAX in invenio.conf. **" @echo "************************************************************" uninstall-mathjax-plugin: @rm -rvf ${prefix}/var/www/MathJax @echo "***********************************************************" @echo "** The MathJax plugin was successfully uninstalled. **" @echo "***********************************************************" install-jscalendar-plugin: @echo "***********************************************************" @echo "** Installing jsCalendar plugin, please wait... **" @echo "***********************************************************" rm -rf /tmp/invenio-jscalendar-plugin mkdir /tmp/invenio-jscalendar-plugin (cd /tmp/invenio-jscalendar-plugin && \ wget 'http://www.dynarch.com/static/jscalendar-1.0.zip' && \ unzip -u jscalendar-1.0.zip && \ mkdir -p ${prefix}/var/www/jsCalendar && \ cp jscalendar-1.0/img.gif ${prefix}/var/www/jsCalendar/jsCalendar.gif && \ cp jscalendar-1.0/calendar.js ${prefix}/var/www/jsCalendar/ && \ cp jscalendar-1.0/calendar-setup.js ${prefix}/var/www/jsCalendar/ && \ cp jscalendar-1.0/lang/calendar-en.js ${prefix}/var/www/jsCalendar/ && \ cp jscalendar-1.0/calendar-blue.css ${prefix}/var/www/jsCalendar/) rm -fr /tmp/invenio-jscalendar-plugin @echo "***********************************************************" @echo "** The jsCalendar plugin was successfully installed. **" @echo "***********************************************************" uninstall-jscalendar-plugin: @rm -rvf ${prefix}/var/www/jsCalendar @echo "***********************************************************" @echo "** The jsCalendar plugin was successfully uninstalled. **" @echo "***********************************************************" install-js-test-driver: @echo "*******************************************************" @echo "** Installing js-test-driver, please wait... **" @echo "*******************************************************" mkdir -p $(prefix)/lib/java/js-test-driver && \ cd $(prefix)/lib/java/js-test-driver && \ wget http://invenio-software.org/download/js-test-driver/JsTestDriver-1.3.5.jar -O JsTestDriver.jar uninstall-js-test-driver: @rm -rvf ${prefix}/lib/java/js-test-driver @echo "*********************************************************" @echo "** The js-test-driver was successfully uninstalled. **" @echo "*********************************************************" install-jquery-plugins: @echo "***********************************************************" @echo "** Installing various jQuery plugins, please wait... **" @echo "***********************************************************" mkdir -p ${prefix}/var/www/js mkdir -p $(prefix)/var/www/css (cd ${prefix}/var/www/js && \ wget http://code.jquery.com/jquery-1.7.1.min.js && \ mv jquery-1.7.1.min.js jquery.min.js && \ wget http://ajax.googleapis.com/ajax/libs/jqueryui/1.8.17/jquery-ui.min.js && \ wget http://invenio-software.org/download/jquery/v1.5/js/jquery.jeditable.mini.js && \ wget https://raw.github.com/malsup/form/master/jquery.form.js --no-check-certificate && \ wget http://jquery-multifile-plugin.googlecode.com/svn/trunk/jquery.MultiFile.pack.js && \ wget -O jquery.tablesorter.zip http://invenio-software.org/download/jquery/jquery.tablesorter.20111208.zip && \ wget http://invenio-software.org/download/jquery/uploadify-v2.1.4.zip -O uploadify.zip && \ wget http://www.datatables.net/download/build/jquery.dataTables.min.js && \ wget http://invenio-software.org/download/jquery/jquery.bookmark.package-1.4.0.zip && \ unzip jquery.tablesorter.zip -d tablesorter && \ rm jquery.tablesorter.zip && \ rm -rf uploadify && \ unzip -u uploadify.zip -d uploadify && \ wget http://flot.googlecode.com/files/flot-0.6.zip && \ wget -O jquery-ui-timepicker-addon.js http://invenio-software.org/download/jquery/jquery-ui-timepicker-addon-1.0.3.js && \ unzip -u flot-0.6.zip && \ mv flot/jquery.flot.selection.min.js flot/jquery.flot.min.js flot/excanvas.min.js ./ && \ rm flot-0.6.zip && rm -r flot && \ mv uploadify/swfobject.js ./ && \ mv uploadify/cancel.png uploadify/uploadify.css uploadify/uploadify.allglyphs.swf uploadify/uploadify.fla uploadify/uploadify.swf ../img/ && \ mv uploadify/jquery.uploadify.v2.1.4.min.js ./jquery.uploadify.min.js && \ rm uploadify.zip && rm -r uploadify && \ wget --no-check-certificate https://github.com/douglascrockford/JSON-js/raw/master/json2.js && \ wget http://invenio-software.org/download/jquery/jquery.hotkeys-0.8.js -O jquery.hotkeys.js && \ wget http://jquery.bassistance.de/treeview/jquery.treeview.zip && \ unzip jquery.treeview.zip -d jquery-treeview && \ rm jquery.treeview.zip && \ wget http://invenio-software.org/download/jquery/v1.5/js/jquery.ajaxPager.js && \ unzip jquery.bookmark.package-1.4.0.zip && \ rm -f jquery.bookmark.ext.* bookmarks-big.png bookmarkBasic.html jquery.bookmark.js jquery.bookmark.pack.js && \ mv bookmarks.png ../img/ && \ mv jquery.bookmark.css ../css/ && \ rm -f jquery.bookmark.package-1.4.0.zip && \ mkdir -p ${prefix}/var/www/img && \ cd ${prefix}/var/www/img && \ wget -r -np -nH --cut-dirs=4 -A "png,css" -P jquery-ui/themes http://jquery-ui.googlecode.com/svn/tags/1.8.17/themes/base/ && \ wget -r -np -nH --cut-dirs=4 -A "png,css" -P jquery-ui/themes http://jquery-ui.googlecode.com/svn/tags/1.8.17/themes/smoothness/ && \ wget -r -np -nH --cut-dirs=4 -A "png,css" -P jquery-ui/themes http://jquery-ui.googlecode.com/svn/tags/1.8.17/themes/redmond/ && \ wget --no-check-certificate -O datatables_jquery-ui.css https://github.com/DataTables/DataTables/raw/master/media/css/demo_table_jui.css && \ wget http://jquery-ui.googlecode.com/svn/tags/1.8.17/themes/redmond/jquery-ui.css && \ wget http://jquery-ui.googlecode.com/svn/tags/1.8.17/demos/images/calendar.gif && \ wget -r -np -nH --cut-dirs=5 -A "png" http://jquery-ui.googlecode.com/svn/tags/1.8.17/themes/redmond/images/) @echo "***********************************************************" @echo "** The jQuery plugins were successfully installed. **" @echo "***********************************************************" uninstall-jquery-plugins: (cd ${prefix}/var/www/js && \ rm -f jquery.min.js && \ rm -f jquery.MultiFile.pack.js && \ rm -f jquery.jeditable.mini.js && \ rm -f jquery.flot.selection.min.js && \ rm -f jquery.flot.min.js && \ rm -f excanvas.min.js && \ rm -f jquery-ui-timepicker-addon.min.js && \ rm -f json2.js && \ rm -f jquery.uploadify.min.js && \ rm -rf tablesorter && \ rm -rf jquery-treeview && \ rm -f jquery.ajaxPager.js && \ rm -f jquery.form.js && \ rm -f jquery.dataTables.min.js && \ rm -f ui.core.js && \ rm -f jquery.bookmark.min.js && \ rm -f jquery.hotkeys.js && \ rm -f jquery.tablesorter.min.js && \ rm -f jquery-ui-1.7.3.custom.min.js && \ rm -f jquery.metadata.js && \ rm -f jquery-latest.js && \ rm -f jquery-ui.min.js) (cd ${prefix}/var/www/img && \ rm -f cancel.png uploadify.css uploadify.swf uploadify.allglyphs.swf uploadify.fla && \ rm -f datatables_jquery-ui.css \ rm -f bookmarks.png) && \ (cd ${prefix}/var/www/css && \ rm -f jquery.bookmark.css) @echo "***********************************************************" @echo "** The jquery plugins were successfully uninstalled. **" @echo "***********************************************************" install-ckeditor-plugin: @echo "***********************************************************" @echo "** Installing CKeditor plugin, please wait... **" @echo "***********************************************************" rm -rf ${prefix}/lib/python/invenio/ckeditor/ rm -rf /tmp/invenio-ckeditor-plugin mkdir /tmp/invenio-ckeditor-plugin (cd /tmp/invenio-ckeditor-plugin && \ wget 'http://invenio-software.org/download/ckeditor/$(CKEDITOR)' && \ unzip -u -d ${prefix}/var/www $(CKEDITOR)) && \ find ${prefix}/var/www/ckeditor/ -depth -name '_*' -exec rm -rf {} \; && \ find ${prefix}/var/www/ckeditor/ckeditor* -maxdepth 0 ! -name "ckeditor.js" -exec rm -r {} \; && \ rm -fr /tmp/invenio-ckeditor-plugin @echo "* Installing Invenio-specific CKeditor config..." (cd $(top_srcdir)/modules/webstyle/etc && make install) @echo "***********************************************************" @echo "** The CKeditor plugin was successfully installed. **" @echo "** Please do not forget to properly set the option **" @echo "** CFG_WEBCOMMENT_USE_RICH_TEXT_EDITOR in invenio.conf. **" @echo "***********************************************************" uninstall-ckeditor-plugin: @rm -rvf ${prefix}/var/www/ckeditor @rm -rvf ${prefix}/lib/python/invenio/ckeditor @echo "***********************************************************" @echo "** The CKeditor plugin was successfully uninstalled. **" @echo "***********************************************************" install-pdfa-helper-files: @echo "***********************************************************" @echo "** Installing PDF/A helper files, please wait... **" @echo "***********************************************************" wget 'http://invenio-software.org/download/invenio-demo-site-files/ISOCoatedsb.icc' -O ${prefix}/etc/websubmit/file_converter_templates/ISOCoatedsb.icc @echo "***********************************************************" @echo "** The PDF/A helper files were successfully installed. **" @echo "***********************************************************" install-mediaelement: @echo "***********************************************************" @echo "** MediaElement.js, please wait... **" @echo "***********************************************************" rm -rf /tmp/mediaelement mkdir /tmp/mediaelement wget 'http://github.com/johndyer/mediaelement/zipball/master' -O '/tmp/mediaelement/mediaelement.zip' --no-check-certificate unzip -u -d '/tmp/mediaelement' '/tmp/mediaelement/mediaelement.zip' rm -rf ${prefix}/var/www/mediaelement mkdir ${prefix}/var/www/mediaelement mv /tmp/mediaelement/johndyer-mediaelement-*/build/* ${prefix}/var/www/mediaelement rm -rf /tmp/mediaelement @echo "***********************************************************" @echo "** MediaElement.js was successfully installed. **" @echo "***********************************************************" install-bootstrap: @echo "***********************************************************" @echo "** Installing Twitter Bootstrap, please wait... **" @echo "***********************************************************" rm -rf /tmp/invenio-bootstrap mkdir /tmp/invenio-bootstrap (cd /tmp/invenio-bootstrap && \ wget -O bootstrap.zip 'http://invenio-software.org/download/bootstrap/bootstrap-${BOOTSTRAPV}.zip' && \ unzip -u bootstrap.zip && \ cp bootstrap/css/bootstrap-responsive.css ${prefix}/var/www/css/bootstrap-responsive.css && \ cp bootstrap/css/bootstrap-responsive.min.css ${prefix}/var/www/css/bootstrap-responsive.min.css && \ cp bootstrap/css/bootstrap.css ${prefix}/var/www/css/bootstrap.css && \ cp bootstrap/css/bootstrap.min.css ${prefix}/var/www/css/bootstrap.min.css && \ cp bootstrap/img/glyphicons-halflings-white.png ${prefix}/var/www/img/glyphicons-halflings-white.png && \ cp bootstrap/img/glyphicons-halflings.png ${prefix}/var/www/img/glyphicons-halflings.png && \ cp bootstrap/js/bootstrap.js ${prefix}/var/www/js/bootstrap.js && \ cp bootstrap/js/bootstrap.min.js ${prefix}/var/www/js/bootstrap.min.js && \ rm -fr /tmp/invenio-bootstrap ) @echo "***********************************************************" @echo "** The Twitter Bootstrap was successfully installed. **" @echo "***********************************************************" uninstall-bootstrap: rm ${prefix}/var/www/css/bootstrap-responsive.css && \ rm ${prefix}/var/www/css/bootstrap-responsive.min.css && \ rm ${prefix}/var/www/css/bootstrap.css && \ rm ${prefix}/var/www/css/bootstrap.min.css && \ rm ${prefix}/var/www/img/glyphicons-halflings-white.png && \ rm ${prefix}/var/www/img/glyphicons-halflings.png && \ rm ${prefix}/var/www/js/bootstrap.js && \ rm ${prefix}/var/www/js/bootstrap.min.js @echo "***********************************************************" @echo "** The Twitter Bootstrap was successfully uninstalled. **" @echo "***********************************************************" install-hogan-plugin: @echo "***********************************************************" @echo "** Installing Hogan.js, please wait... **" @echo "***********************************************************" rm -rf /tmp/hogan mkdir /tmp/hogan (cd /tmp/hogan && \ wget -O hogan-${HOGANVER}.js 'http://twitter.github.com/hogan.js/builds/${HOGANVER}/hogan-${HOGANVER}.js' && \ cp hogan-${HOGANVER}.js ${prefix}/var/www/js/hogan.js && \ rm -fr /tmp/hogan ) @echo "***********************************************************" @echo "** Hogan.js was successfully installed. **" @echo "***********************************************************" uninstall-hogan-plugin: rm ${prefix}/var/www/js/hogan.js @echo "***********************************************************" @echo "** Hogan.js was successfully uninstalled. **" @echo "***********************************************************" install-jquery-tokeninput: @echo "***********************************************************" @echo "** Installing JQuery Tokeninput, please wait... **" @echo "***********************************************************" rm -rf /tmp/jquery-tokeninput mkdir /tmp/jquery-tokeninput (cd /tmp/jquery-tokeninput && \ wget -O jquery-tokeninput-master.zip 'https://github.com/loopj/jquery-tokeninput/archive/master.zip' --no-check-certificate && \ unzip -u jquery-tokeninput-master.zip && \ cp jquery-tokeninput-master/styles/token-input-facebook.css ${prefix}/var/www/css/token-input-facebook.css && \ cp jquery-tokeninput-master/styles/token-input-mac.css ${prefix}/var/www/css/token-input-mac.css && \ cp jquery-tokeninput-master/styles/token-input.css ${prefix}/var/www/css/token-input.css && \ cp jquery-tokeninput-master/src/jquery.tokeninput.js ${prefix}/var/www/js/jquery.tokeninput.js && \ rm -fr /tmp/jquery-tokeninput ) @echo "***********************************************************" @echo "** The JQuery Tokeninput was successfully installed. **" @echo "***********************************************************" uninstall-jquery-tokeninput: rm ${prefix}/var/www/css/token-input-facebook.css && \ rm ${prefix}/var/www/css/token-input-mac.css && \ rm ${prefix}/var/www/css/token-input.css && \ rm ${prefix}/var/www/js/jquery.tokeninput.js @echo "***********************************************************" @echo "** The JQuery Tokeninput was successfully uninstalled. **" @echo "***********************************************************" install-plupload-plugin: @echo "***********************************************************" @echo "** Installing Plupload plugin, please wait... **" @echo "***********************************************************" rm -rf /tmp/plupload-plugin mkdir /tmp/plupload-plugin (cd /tmp/plupload-plugin && \ wget -O plupload-plugin.zip 'http://plupload.com/downloads/plupload_1_5_5.zip' && \ unzip -u plupload-plugin.zip && \ mkdir -p ${prefix}/var/www/js/plupload/i18n/ && \ cp -R plupload/js/jquery.plupload.queue ${prefix}/var/www/js/plupload/ && \ cp -R plupload/js/jquery.ui.plupload ${prefix}/var/www/js/plupload/ && \ cp plupload/js/plupload.browserplus.js ${prefix}/var/www/js/plupload/plupload.browserplus.js && \ cp plupload/js/plupload.flash.js ${prefix}/var/www/js/plupload/plupload.flash.js && \ cp plupload/js/plupload.flash.swf ${prefix}/var/www/js/plupload/plupload.flash.swf && \ cp plupload/js/plupload.full.js ${prefix}/var/www/js/plupload/plupload.full.js && \ cp plupload/js/plupload.gears.js ${prefix}/var/www/js/plupload/plupload.gears.js && \ cp plupload/js/plupload.html4.js ${prefix}/var/www/js/plupload/plupload.html4.js && \ cp plupload/js/plupload.html5.js ${prefix}/var/www/js/plupload/plupload.html5.js && \ cp plupload/js/plupload.js ${prefix}/var/www/js/plupload/plupload.js && \ cp plupload/js/plupload.silverlight.js ${prefix}/var/www/js/plupload/plupload.silverlight.js && \ cp plupload/js/plupload.silverlight.xap ${prefix}/var/www/js/plupload/plupload.silverlight.xap && \ cp plupload/js/i18n/*.js ${prefix}/var/www/js/plupload/i18n/ && \ rm -fr /tmp/plupload-plugin ) @echo "***********************************************************" @echo "** The Plupload plugin was successfully installed. **" @echo "***********************************************************" uninstall-plupload-plugin: rm -rf ${prefix}/var/www/js/plupload @echo "***********************************************************" @echo "** The Plupload was successfully uninstalled. **" @echo "***********************************************************" uninstall-pdfa-helper-files: rm -f ${prefix}/etc/websubmit/file_converter_templates/ISOCoatedsb.icc @echo "***********************************************************" @echo "** The PDF/A helper files were successfully uninstalled. **" @echo "***********************************************************" #Solrutils allows automatic installation, running and searching of an external Solr index. install-solrutils: @echo "***********************************************************" @echo "** Installing Solrutils and solr, please wait... **" @echo "***********************************************************" cd $(prefix)/lib && \ if test -d apache-solr*; then echo A solr directory already exists in `pwd` . \ Please remove it manually, if you are sure it is not needed; exit 2; fi ; \ if test -f apache-solr*; then echo solr tarball already exists in `pwd` . \ Please remove it manually.; exit 2; fi ; \ wget http://archive.apache.org/dist/lucene/solr/3.1.0/apache-solr-3.1.0.tgz && \ tar -xzf apache-solr-3.1.0.tgz && \ rm apache-solr-3.1.0.tgz cd $(solrdir)/contrib/ ;\ wget http://mirrors.ibiblio.org/pub/mirrors/maven2/com/jcraft/jzlib/1.0.7/jzlib-1.0.7.jar && \ cd $(solrdir)/contrib/ ;\ jar -xf ../example/webapps/solr.war WEB-INF/lib/lucene-core-3.1.0.jar ; \ if test -d basic-lucene-libs; then rm -rf basic-lucene-libs; fi ; \ mv WEB-INF/lib/ basic-lucene-libs ; \ cp $(solrutils_dir)/schema.xml $(solrdir)/example/solr/conf/ cp $(solrutils_dir)/solrconfig.xml $(solrdir)/example/solr/conf/ cd $(solrutils_dir) && \ javac -classpath $(CLASSPATH) -d $(solrdir)/contrib @$(solrutils_dir)/java_sources.txt && \ cd $(solrdir)/contrib/ && \ jar -cf invenio-solr.jar org/invenio_software/solr/*class update-v0.99.0-tables: cat $(top_srcdir)/modules/miscutil/sql/tabcreate.sql | grep -v 'INSERT INTO upgrade' | ${prefix}/bin/dbexec echo "DROP TABLE IF EXISTS oaiREPOSITORY;" | ${prefix}/bin/dbexec echo "ALTER TABLE bibdoc ADD COLUMN more_info mediumblob NULL default NULL;" | ${prefix}/bin/dbexec echo "ALTER TABLE schTASK ADD COLUMN priority tinyint(4) NOT NULL default 0;" | ${prefix}/bin/dbexec echo "ALTER TABLE schTASK ADD KEY priority (priority);" | ${prefix}/bin/dbexec echo "ALTER TABLE rnkCITATIONDATA DROP PRIMARY KEY;" | ${prefix}/bin/dbexec echo "ALTER TABLE rnkCITATIONDATA ADD PRIMARY KEY (id);" | ${prefix}/bin/dbexec echo "ALTER TABLE rnkCITATIONDATA CHANGE id id mediumint(8) unsigned NOT NULL auto_increment;" | ${prefix}/bin/dbexec echo "ALTER TABLE rnkCITATIONDATA ADD UNIQUE KEY object_name (object_name);" | ${prefix}/bin/dbexec echo "ALTER TABLE sbmPARAMETERS CHANGE value value text NOT NULL default '';" | ${prefix}/bin/dbexec echo "ALTER TABLE sbmAPPROVAL ADD note text NOT NULL default '';" | ${prefix}/bin/dbexec echo "ALTER TABLE hstDOCUMENT CHANGE docsize docsize bigint(15) unsigned NOT NULL;" | ${prefix}/bin/dbexec echo "ALTER TABLE cmtACTIONHISTORY CHANGE client_host client_host int(10) unsigned default NULL;" | ${prefix}/bin/dbexec update-v0.99.1-tables: @echo "Nothing to do; table structure did not change between v0.99.1 and v0.99.2." update-v0.99.2-tables: @echo "Nothing to do; table structure did not change between v0.99.2 and v0.99.3." update-v0.99.3-tables: @echo "Nothing to do; table structure did not change between v0.99.3 and v0.99.4." update-v0.99.4-tables: @echo "Nothing to do; table structure did not change between v0.99.4 and v0.99.5." update-v0.99.5-tables: @echo "Nothing to do; table structure did not change between v0.99.5 and v0.99.6." update-v0.99.6-tables: @echo "Nothing to do; table structure did not change between v0.99.6 and v0.99.7." -update-v0.99.7-tables: # from v0.99.7 to v1.0.0-rc0 +update-v0.99.7-tables: + @echo "Nothing to do; table structure did not change between v0.99.7 and v0.99.8." + +update-v0.99.8-tables: # from v0.99.8 to v1.0.0-rc0 echo "RENAME TABLE oaiARCHIVE TO oaiREPOSITORY;" | ${prefix}/bin/dbexec cat $(top_srcdir)/modules/miscutil/sql/tabcreate.sql | grep -v 'INSERT INTO upgrade' | ${prefix}/bin/dbexec echo "INSERT INTO knwKB (id,name,description,kbtype) SELECT id,name,description,'' FROM fmtKNOWLEDGEBASES;" | ${prefix}/bin/dbexec echo "INSERT INTO knwKBRVAL (id,m_key,m_value,id_knwKB) SELECT id,m_key,m_value,id_fmtKNOWLEDGEBASES FROM fmtKNOWLEDGEBASEMAPPINGS;" | ${prefix}/bin/dbexec echo "ALTER TABLE sbmPARAMETERS CHANGE name name varchar(40) NOT NULL default '';" | ${prefix}/bin/dbexec echo "ALTER TABLE bibdoc CHANGE docname docname varchar(250) COLLATE utf8_bin NOT NULL default 'file';" | ${prefix}/bin/dbexec echo "ALTER TABLE bibdoc CHANGE status status text NOT NULL default '';" | ${prefix}/bin/dbexec echo "ALTER TABLE bibdoc ADD COLUMN text_extraction_date datetime NOT NULL default '0000-00-00';" | ${prefix}/bin/dbexec echo "ALTER TABLE collection DROP COLUMN restricted;" | ${prefix}/bin/dbexec echo "ALTER TABLE schTASK CHANGE host host varchar(255) NOT NULL default '';" | ${prefix}/bin/dbexec echo "ALTER TABLE hstTASK CHANGE host host varchar(255) NOT NULL default '';" | ${prefix}/bin/dbexec echo "ALTER TABLE bib85x DROP INDEX kv, ADD INDEX kv (value(100));" | ${prefix}/bin/dbexec echo "UPDATE clsMETHOD SET location='http://invenio-software.org/download/invenio-demo-site-files/HEP.rdf' WHERE name='HEP' AND location='';" | ${prefix}/bin/dbexec echo "UPDATE clsMETHOD SET location='http://invenio-software.org/download/invenio-demo-site-files/NASA-subjects.rdf' WHERE name='NASA-subjects' AND location='';" | ${prefix}/bin/dbexec echo "UPDATE accACTION SET name='runoairepository', description='run oairepositoryupdater task' WHERE name='runoaiarchive';" | ${prefix}/bin/dbexec echo "UPDATE accACTION SET name='cfgoaiharvest', description='configure OAI Harvest' WHERE name='cfgbibharvest';" | ${prefix}/bin/dbexec echo "ALTER TABLE accARGUMENT CHANGE value value varchar(255);" | ${prefix}/bin/dbexec echo "UPDATE accACTION SET allowedkeywords='doctype,act,categ' WHERE name='submit';" | ${prefix}/bin/dbexec echo "INSERT INTO accARGUMENT(keyword,value) VALUES ('categ','*');" | ${prefix}/bin/dbexec echo "INSERT INTO accROLE_accACTION_accARGUMENT(id_accROLE,id_accACTION,id_accARGUMENT,argumentlistid) SELECT DISTINCT raa.id_accROLE,raa.id_accACTION,accARGUMENT.id,raa.argumentlistid FROM accROLE_accACTION_accARGUMENT as raa JOIN accACTION on id_accACTION=accACTION.id,accARGUMENT WHERE accACTION.name='submit' and accARGUMENT.keyword='categ' and accARGUMENT.value='*';" | ${prefix}/bin/dbexec echo "UPDATE accACTION SET allowedkeywords='name,with_editor_rights' WHERE name='cfgwebjournal';" | ${prefix}/bin/dbexec echo "INSERT INTO accARGUMENT(keyword,value) VALUES ('with_editor_rights','yes');" | ${prefix}/bin/dbexec echo "INSERT INTO accROLE_accACTION_accARGUMENT(id_accROLE,id_accACTION,id_accARGUMENT,argumentlistid) SELECT DISTINCT raa.id_accROLE,raa.id_accACTION,accARGUMENT.id,raa.argumentlistid FROM accROLE_accACTION_accARGUMENT as raa JOIN accACTION on id_accACTION=accACTION.id,accARGUMENT WHERE accACTION.name='cfgwebjournal' and accARGUMENT.keyword='with_editor_rights' and accARGUMENT.value='yes';" | ${prefix}/bin/dbexec echo "ALTER TABLE bskEXTREC CHANGE id id int(15) unsigned NOT NULL auto_increment;" | ${prefix}/bin/dbexec echo "ALTER TABLE bskEXTREC ADD external_id int(15) NOT NULL default '0';" | ${prefix}/bin/dbexec echo "ALTER TABLE bskEXTREC ADD collection_id int(15) unsigned NOT NULL default '0';" | ${prefix}/bin/dbexec echo "ALTER TABLE bskEXTREC ADD original_url text;" | ${prefix}/bin/dbexec echo "ALTER TABLE cmtRECORDCOMMENT ADD status char(2) NOT NULL default 'ok';" | ${prefix}/bin/dbexec echo "ALTER TABLE cmtRECORDCOMMENT ADD KEY status (status);" | ${prefix}/bin/dbexec echo "INSERT INTO sbmALLFUNCDESCR VALUES ('Move_Photos_to_Storage','Attach/edit the pictures uploaded with the \"create_photos_manager_interface()\" function');" | ${prefix}/bin/dbexec echo "INSERT INTO sbmFIELDDESC VALUES ('Upload_Photos',NULL,'','R',NULL,NULL,NULL,NULL,NULL,'\"\"\"\r\nThis is an example of element that creates a photos upload interface.\r\nClone it, customize it and integrate it into your submission. Then add function \r\n\'Move_Photos_to_Storage\' to your submission functions list, in order for files \r\nuploaded with this interface to be attached to the record. More information in \r\nthe WebSubmit admin guide.\r\n\"\"\"\r\n\r\nfrom invenio.websubmit_functions.ParamFile import ParamFromFile\r\nfrom invenio.websubmit_functions.Move_Photos_to_Storage import read_param_file, create_photos_manager_interface, get_session_id\r\n\r\n# Retrieve session id\r\ntry:\r\n # User info is defined only in MBI/MPI actions...\r\n session_id = get_session_id(None, uid, user_info) \r\nexcept:\r\n session_id = get_session_id(req, uid, {})\r\n\r\n# Retrieve context\r\nindir = curdir.split(\'/\')[-3]\r\ndoctype = curdir.split(\'/\')[-2]\r\naccess = curdir.split(\'/\')[-1]\r\n\r\n# Get the record ID, if any\r\nsysno = ParamFromFile(\"%s/%s\" % (curdir,\'SN\')).strip()\r\n\r\n\"\"\"\r\nModify below the configuration of the photos manager interface.\r\nNote: \'can_reorder_photos\' parameter is not yet fully taken into consideration\r\n\r\nDocumentation of the function is available by running:\r\necho -e \'from invenio.websubmit_functions.Move_Photos_to_Storage import create_photos_manager_interface as f\\nprint f.__doc__\' | python\r\n\"\"\"\r\ntext += create_photos_manager_interface(sysno, session_id, uid,\r\n doctype, indir, curdir, access,\r\n can_delete_photos=True,\r\n can_reorder_photos=True,\r\n can_upload_photos=True,\r\n editor_width=700,\r\n editor_height=400,\r\n initial_slider_value=100,\r\n max_slider_value=200,\r\n min_slider_value=80)','0000-00-00','0000-00-00',NULL,NULL,0);" | ${prefix}/bin/dbexec echo "INSERT INTO sbmFUNDESC VALUES ('Move_Photos_to_Storage','iconsize');" | ${prefix}/bin/dbexec echo "INSERT INTO sbmFIELDDESC VALUES ('Upload_Files',NULL,'','R',NULL,NULL,NULL,NULL,NULL,'\"\"\"\r\nThis is an example of element that creates a file upload interface.\r\nClone it, customize it and integrate it into your submission. Then add function \r\n\'Move_Uploaded_Files_to_Storage\' to your submission functions list, in order for files \r\nuploaded with this interface to be attached to the record. More information in \r\nthe WebSubmit admin guide.\r\n\"\"\"\r\nfrom invenio.websubmit_managedocfiles import create_file_upload_interface\r\nfrom invenio.websubmit_functions.Shared_Functions import ParamFromFile\r\n\r\nindir = ParamFromFile(os.path.join(curdir, \'indir\'))\r\ndoctype = ParamFromFile(os.path.join(curdir, \'doctype\'))\r\naccess = ParamFromFile(os.path.join(curdir, \'access\'))\r\ntry:\r\n sysno = int(ParamFromFile(os.path.join(curdir, \'SN\')).strip())\r\nexcept:\r\n sysno = -1\r\nln = ParamFromFile(os.path.join(curdir, \'ln\'))\r\n\r\n\"\"\"\r\nRun the following to get the list of parameters of function \'create_file_upload_interface\':\r\necho -e \'from invenio.websubmit_managedocfiles import create_file_upload_interface as f\\nprint f.__doc__\' | python\r\n\"\"\"\r\ntext = create_file_upload_interface(recid=sysno,\r\n print_outside_form_tag=False,\r\n include_headers=True,\r\n ln=ln,\r\n doctypes_and_desc=[(\'main\',\'Main document\'),\r\n (\'additional\',\'Figure, schema, etc.\')],\r\n can_revise_doctypes=[\'*\'],\r\n can_describe_doctypes=[\'main\'],\r\n can_delete_doctypes=[\'additional\'],\r\n can_rename_doctypes=[\'main\'],\r\n sbm_indir=indir, sbm_doctype=doctype, sbm_access=access)[1]\r\n','0000-00-00','0000-00-00',NULL,NULL,0);" | ${prefix}/bin/dbexec echo "INSERT INTO sbmFUNDESC VALUES ('Move_Uploaded_Files_to_Storage','forceFileRevision');" | ${prefix}/bin/dbexec echo "INSERT INTO sbmALLFUNCDESCR VALUES ('Create_Upload_Files_Interface','Display generic interface to add/revise/delete files. To be used before function \"Move_Uploaded_Files_to_Storage\"');" | ${prefix}/bin/dbexec echo "INSERT INTO sbmALLFUNCDESCR VALUES ('Move_Uploaded_Files_to_Storage','Attach files uploaded with \"Create_Upload_Files_Interface\"')" | ${prefix}/bin/dbexec echo "INSERT INTO sbmFUNDESC VALUES ('Move_Revised_Files_to_Storage','elementNameToDoctype');" | ${prefix}/bin/dbexec echo "INSERT INTO sbmFUNDESC VALUES ('Move_Revised_Files_to_Storage','createIconDoctypes');" | ${prefix}/bin/dbexec echo "INSERT INTO sbmFUNDESC VALUES ('Move_Revised_Files_to_Storage','createRelatedFormats');" | ${prefix}/bin/dbexec echo "INSERT INTO sbmFUNDESC VALUES ('Move_Revised_Files_to_Storage','iconsize');" | ${prefix}/bin/dbexec echo "INSERT INTO sbmFUNDESC VALUES ('Move_Revised_Files_to_Storage','keepPreviousVersionDoctypes');" | ${prefix}/bin/dbexec echo "INSERT INTO sbmALLFUNCDESCR VALUES ('Move_Revised_Files_to_Storage','Revise files initially uploaded with \"Move_Files_to_Storage\"')" | ${prefix}/bin/dbexec echo "INSERT INTO sbmFUNDESC VALUES ('Create_Upload_Files_Interface','maxsize');" | ${prefix}/bin/dbexec echo "INSERT INTO sbmFUNDESC VALUES ('Create_Upload_Files_Interface','minsize');" | ${prefix}/bin/dbexec echo "INSERT INTO sbmFUNDESC VALUES ('Create_Upload_Files_Interface','doctypes');" | ${prefix}/bin/dbexec echo "INSERT INTO sbmFUNDESC VALUES ('Create_Upload_Files_Interface','restrictions');" | ${prefix}/bin/dbexec echo "INSERT INTO sbmFUNDESC VALUES ('Create_Upload_Files_Interface','canDeleteDoctypes');" | ${prefix}/bin/dbexec echo "INSERT INTO sbmFUNDESC VALUES ('Create_Upload_Files_Interface','canReviseDoctypes');" | ${prefix}/bin/dbexec echo "INSERT INTO sbmFUNDESC VALUES ('Create_Upload_Files_Interface','canDescribeDoctypes');" | ${prefix}/bin/dbexec echo "INSERT INTO sbmFUNDESC VALUES ('Create_Upload_Files_Interface','canCommentDoctypes');" | ${prefix}/bin/dbexec echo "INSERT INTO sbmFUNDESC VALUES ('Create_Upload_Files_Interface','canKeepDoctypes');" | ${prefix}/bin/dbexec echo "INSERT INTO sbmFUNDESC VALUES ('Create_Upload_Files_Interface','canAddFormatDoctypes');" | ${prefix}/bin/dbexec echo "INSERT INTO sbmFUNDESC VALUES ('Create_Upload_Files_Interface','canRestrictDoctypes');" | ${prefix}/bin/dbexec echo "INSERT INTO sbmFUNDESC VALUES ('Create_Upload_Files_Interface','canRenameDoctypes');" | ${prefix}/bin/dbexec echo "INSERT INTO sbmFUNDESC VALUES ('Create_Upload_Files_Interface','canNameNewFiles');" | ${prefix}/bin/dbexec echo "INSERT INTO sbmFUNDESC VALUES ('Create_Upload_Files_Interface','createRelatedFormats');" | ${prefix}/bin/dbexec echo "INSERT INTO sbmFUNDESC VALUES ('Create_Upload_Files_Interface','keepDefault');" | ${prefix}/bin/dbexec echo "INSERT INTO sbmFUNDESC VALUES ('Create_Upload_Files_Interface','showLinks');" | ${prefix}/bin/dbexec echo "INSERT INTO sbmFUNDESC VALUES ('Create_Upload_Files_Interface','fileLabel');" | ${prefix}/bin/dbexec echo "INSERT INTO sbmFUNDESC VALUES ('Create_Upload_Files_Interface','filenameLabel');" | ${prefix}/bin/dbexec echo "INSERT INTO sbmFUNDESC VALUES ('Create_Upload_Files_Interface','descriptionLabel');" | ${prefix}/bin/dbexec echo "INSERT INTO sbmFUNDESC VALUES ('Create_Upload_Files_Interface','commentLabel');" | ${prefix}/bin/dbexec echo "INSERT INTO sbmFUNDESC VALUES ('Create_Upload_Files_Interface','restrictionLabel');" | ${prefix}/bin/dbexec echo "INSERT INTO sbmFUNDESC VALUES ('Create_Upload_Files_Interface','startDoc');" | ${prefix}/bin/dbexec echo "INSERT INTO sbmFUNDESC VALUES ('Create_Upload_Files_Interface','endDoc');" | ${prefix}/bin/dbexec echo "INSERT INTO sbmFUNDESC VALUES ('Create_Upload_Files_Interface','defaultFilenameDoctypes');" | ${prefix}/bin/dbexec echo "INSERT INTO sbmFUNDESC VALUES ('Create_Upload_Files_Interface','maxFilesDoctypes');" | ${prefix}/bin/dbexec echo "INSERT INTO sbmFUNDESC VALUES ('Move_Uploaded_Files_to_Storage','iconsize');" | ${prefix}/bin/dbexec echo "INSERT INTO sbmFUNDESC VALUES ('Move_Uploaded_Files_to_Storage','createIconDoctypes');" | ${prefix}/bin/dbexec echo "INSERT INTO sbmFUNDESC VALUES ('Report_Number_Generation','nblength');" | ${prefix}/bin/dbexec echo "INSERT INTO sbmFUNDESC VALUES ('Second_Report_Number_Generation','2nd_nb_length');" | ${prefix}/bin/dbexec echo "INSERT INTO sbmFUNDESC VALUES ('Get_Recid','record_search_pattern');" | ${prefix}/bin/dbexec echo "INSERT INTO sbmALLFUNCDESCR VALUES ('Move_FCKeditor_Files_to_Storage','Transfer files attached to the record with the FCKeditor');" | ${prefix}/bin/dbexec echo "INSERT INTO sbmFUNDESC VALUES ('Move_FCKeditor_Files_to_Storage','input_fields');" | ${prefix}/bin/dbexec echo "INSERT INTO sbmFUNDESC VALUES ('Stamp_Uploaded_Files','layer');" | ${prefix}/bin/dbexec echo "INSERT INTO sbmFUNDESC VALUES ('Stamp_Replace_Single_File_Approval','layer');" | ${prefix}/bin/dbexec echo "INSERT INTO sbmFUNDESC VALUES ('Stamp_Replace_Single_File_Approval','switch_file');" | ${prefix}/bin/dbexec echo "INSERT INTO sbmFUNDESC VALUES ('Stamp_Uploaded_Files','switch_file');" | ${prefix}/bin/dbexec echo "INSERT INTO sbmFUNDESC VALUES ('Move_Files_to_Storage','paths_and_restrictions');" | ${prefix}/bin/dbexec echo "INSERT INTO sbmFUNDESC VALUES ('Move_Files_to_Storage','paths_and_doctypes');" | ${prefix}/bin/dbexec echo "ALTER TABLE cmtRECORDCOMMENT ADD round_name varchar(255) NOT NULL default ''" | ${prefix}/bin/dbexec echo "ALTER TABLE cmtRECORDCOMMENT ADD restriction varchar(50) NOT NULL default ''" | ${prefix}/bin/dbexec echo "ALTER TABLE cmtRECORDCOMMENT ADD in_reply_to_id_cmtRECORDCOMMENT int(15) unsigned NOT NULL default '0'" | ${prefix}/bin/dbexec echo "ALTER TABLE cmtRECORDCOMMENT ADD KEY in_reply_to_id_cmtRECORDCOMMENT (in_reply_to_id_cmtRECORDCOMMENT);" | ${prefix}/bin/dbexec echo "ALTER TABLE bskRECORDCOMMENT ADD in_reply_to_id_bskRECORDCOMMENT int(15) unsigned NOT NULL default '0'" | ${prefix}/bin/dbexec echo "ALTER TABLE bskRECORDCOMMENT ADD KEY in_reply_to_id_bskRECORDCOMMENT (in_reply_to_id_bskRECORDCOMMENT);" | ${prefix}/bin/dbexec echo "ALTER TABLE cmtRECORDCOMMENT ADD reply_order_cached_data blob NULL default NULL;" | ${prefix}/bin/dbexec echo "ALTER TABLE bskRECORDCOMMENT ADD reply_order_cached_data blob NULL default NULL;" | ${prefix}/bin/dbexec echo "ALTER TABLE cmtRECORDCOMMENT ADD INDEX (reply_order_cached_data(40));" | ${prefix}/bin/dbexec echo "ALTER TABLE bskRECORDCOMMENT ADD INDEX (reply_order_cached_data(40));" | ${prefix}/bin/dbexec echo -e 'from invenio.webcommentadminlib import migrate_comments_populate_threads_index;\ migrate_comments_populate_threads_index()' | $(PYTHON) echo -e 'from invenio.access_control_firerole import repair_role_definitions;\ repair_role_definitions()' | $(PYTHON) CLEANFILES = *~ *.pyc *.tmp diff --git a/NEWS b/NEWS index a2397db70..a5e695169 100644 --- a/NEWS +++ b/NEWS @@ -1,1713 +1,1868 @@ Invenio NEWS ============ Here is a short summary of the most notable changes in Invenio releases. For more information about the current release, please consult RELEASE-NOTES. For more information about changes, please consult ChangeLog. +Invenio v1.1.2 -- released 2013-08-19 +------------------------------------- + + *) BibAuthorID: fix in name comparisons (#1313 #1314); improvements + and fixes; improvements, fixes and optimizations; UI and backend + improvements + + *) BibCatalog: removal of print statement (#1337) + + *) BibClassify: escape keywords in tag cloud and MARCXML + + *) BibDocFile: better JS washing in web UI; display file upload + progress (#1020 #1021); display "Restricted" label correctly + (#1299); fix check-md5 with bibdocfsinfo cache (#1249); fix + check-md5 with bibdocfsinfo cache (#1249); fix error in calling + register_download (#1311); handling of exceptions in Md5Folder + (#1060); revert md5 property patch (#1249); support new magic + library (#1207) + + *) BibEncode: minor fix in process_batch_job() + + *) BibFormat: additional fulltext file display in HB (#1219); checks + for bibformat bin; fix CLI call to old PHP-based formatter; fixes + unit tests (#1320); fix for fulltext file format; fix snippets for + phrase queries (#1201); format_element initialisation fix; passing + of user_info for Excel format; replacement of CDS Invenio by + Invenio; setUp/tearDown in unit tests (#1319); skip hidden icons + in OpenGraph image tag + + *) BibIndex: better wording for stemming in admin UI; replacement of + CDS Invenio by Invenio; synonym indexing speed up (#1484); use + human friendly index name (#1329) + + *) BibKnowledge: /kb/export 500 error fix; optional memoisation of + KBR lookups (#1484) + + *) BibMerge: delete cache file on submit + + *) BibSched: bibupload max_priority check; bugfix for high-priority + monotasks; increases size of monitor columns; + parse_runtime_limit() fix (#1432); parse_runtime_limit() tests fix + (#1432) + + *) BibUpload: FMT regression test case fix (#1152); indicators in + strong tags (#939) + + *) CKEditor: updated to version 3.6.6 + + *) dateutils: strftime improvement (#1065); strptime for Python-2.4 + compatibility + + *) errorlib: hiding bibcatalog info in exception body + + *) global: test suite nosification + + *) htmlutils: fix single quote escaping; improve js string escaping; + MathJax 2.1 (#1050) + + *) I18N: updates to Catalan and Spanish translations + + *) installation: fix collectiondetailedrecordpagetabs (#1496); fix + for jQuery hotkeys add-on URL (#1507); fix for MathJax OS X + install issue (#1455); support for Apache-2.4 (#1552) + + *) inveniocfg: tests runner file closure fix (#1327) + + *) InvenioConnector: fix for CDS authentication; mechanize dependency + + *) inveniogc: consider journal cache subdirs + + *) memoiseutils: initial release + + *) OAIHarvest: fix path for temporary authorlists; holding-pen UI + bugfixes (#1401) + + *) OAIRepository: CFG_OAI_REPOSITORY_MARCXML_SIZE; no bibupload -n + + *) RefExtract: replacement of CDS Invenio by Invenio + + *) WebAccess: fix variable parsing in robot auth (#1456); IP-based + rules and offline user fix (#1233); replacement of CDS Invenio by + InveniO + + *) WebApiKey: renames unit tests to regression tests (#1324) + + *) WebAuthorProfile: fix XSS vulnerability + + *) WebComment: escape review "title"; escape review "title" + + *) WebSearch: 410 HTTP code for deleted records; advanced search + notification if no hits; better cleaning of word patterns; fix + infinite synonym lookup cases (#804); handles "find feb 12" + (#948); nicer browsing of fuzzy indexes (#1348); respect default + `rg` in Advanced Search; SPIRES date math search fixes (#431 + #948); SPIRES invalid date search fix (#1467); tweaks SPIRES + two-digit search; unit test disabling for CFG_CERN_SITE; unit test + update (#1326) + + *) WebSession: fix for list of admin activities (#1444); login_method + changes; unit vs regression test suite cleanup + + *) WebStat: use CFG_JOURNAL_TAG instead of 773/909C4 (#546) + + *) WebSubmit: new websubmitadmin CLI (#1334); replacement of CDS + +Invenio v1.0.5 -- released 2013-08-19 +------------------------------------- + + *) BibClassify: escape keywords in tag cloud and MARCXML + + *) BibDocFile: support new magic library + + *) BibFormat: additional fulltext file display in HB; fix CLI call to + old PHP-based formatter; format_element initialisation fix + + *) BibIndex: better wording for stemming in admin UI + + *) BibKnowledge: /kb/export 500 error fix + + *) BibUpload: FMT regression test case fix; indicators in strong tags + + *) errorlib: hiding bibcatalog info in exception body + + *) global: test suite nosification + + *) installation: fix collectiondetailedrecordpagetabs; support for + Apache-2.4 + + *) WebAccess: IP-based rules and offline user fix; replacement of CDS + Invenio by InveniO + + *) WebComment: escape review "title" + + *) WebSearch: respect default `rg` in Advanced Search + + *) WebSession: fix for list of admin activities; login_method changes + + *) WebSubmit: new websubmitadmin CLI + +CDS Invenio v0.99.8 -- released 2013-08-19 +------------------------------------------ + + *) escape keywords in tag cloud and MARCXML (BibClassify) + + *) fix CLI call to old PHP-based formatter; fix format_element + initialisation (BibFormat) + + *) better wording for stemming in admin UI (BibIndex) + + *) IP-based rules and offline user fix (WebAccess) + + *) escape review "title" (WebComment) + + *) fix collectiondetailedrecordpagetabs (installation) + Invenio v1.1.1 -- released 2012-12-21 ------------------------------------- *) BatchUploader: error reporting improvements *) BibAuthorID: arXiv login upgrade; fix for small bug in claim interface *) BibConvert: fix bug with SPLITW function; target/source CLI flag description fix *) BibDocFile: better error report for unknown format; explicit redirection to secure URL; fix for file upload in submissions *) BibEdit: 'bibedit' CSS class addition to page body *) BibFormat: clean Default_HTML_meta template; fix for js_quicktags location; ISBN tag update for meta format; "ln" parameter in bfe_record_url output; meta header output fix; relator code filter in bfe_authors; fix for reformatting by record IDs *) errorlib: register_exception improvements *) global: login link using absolute URL redirection *) installation: aidUSERINPUTLOG consistency upgrade; bigger hstRECORD.marcxml size; fix for wrong name in tabcreate; inclusion of JS quicktags in tarball; mark upgrade recipes as applied; rephrase 1.1 upgrade recipe warning; safer upgrader bibsched status parse; strip spaces in CFG list values *) jQuery: tablesorter location standardisation *) mailutils: authentication and TLS support *) OAIRepository: Edit OAI Set page bug fix; fix for OAI set editing; print_record() fixes *) plotextractor: washing of captions and context *) pluginutils: fix for failing bibformat test case *) solrutils: addition of files into release tarball *) WebAccess: admin interface usability improvement; guest unit tests for firerole *) WebAlert: new regression tests for alerts *) WebComment: cleaner handling of non-reply comments *) WebJournal: better language handling in widgets; CERN-specific translation; explicit RSS icon dimensions; fix for CFG_TMPSHAREDDIR; fix for retrieval of deleted articles; search select form by name *) WebSearch: fix for webcoll grid layout markup; get_all_field_values() typo; next-hit/previous-hit numbering fix; respect output format content-type; washing of 'as' argument *) WebSession: fix for login-with-referer issue; fix for merge_usera_into_userb() *) WebStyle: dumb page loading fix Google Analytics documentation update; memory leak fix in session handling; new /ping handler; removal of excess language box call; req.is_https() fix; *) WebSubmit: display login link on /submit page; fix for Send_APP_Mail function; fix the approval URL for publiline *) WebUser: fix for referer URL protocol Invenio v1.0.4 -- released 2012-12-21 ------------------------------------- *) installation: inclusion of JS quicktags in tarball *) bibdocfile: better error report for unknown format *) WebAccess: admin interface usability improvement Invenio v1.0.3 -- released 2012-12-19 ------------------------------------- *) BatchUploader: error reporting improvements *) BibConvert: fix bug with SPLITW function; target/source CLI flag description fix *) BibEdit: 'bibedit' CSS class addition to page body *) BibFormat: fix for js_quicktags location *) jQuery: tablesorter location standardisation *) WebComment: cleaner handling of non-reply comments *) WebJournal: explicit RSS icon dimensions; fix for CFG_TMPSHAREDDIR; fix for retrieval of deleted articles *) WebSearch: external search pattern_list escape fix; respect output format content-type; washing of 'as' argument *) WebStyle: dumb page loading fix; Google Analytics documentation update; memory leak fix in session handling; new /ping handler; removal of excess language box call; req.is_https() fix *) WebSubmit: fix for Send_APP_Mail function *) WebUser: fix for referer URL protocol CDS Invenio v0.99.7 -- released 2012-12-18 ------------------------------------------ *) Google Analytics documentation update (WebStyle) *) target/source CLI flag description fix (BibConvert) Invenio v1.1.0 -- released 2012-10-21 ------------------------------------- *) BatchUploader: RESTful interface, runtime checks, TextMARC input, job priority selection *) BibAuthorID: new automatic author disambiguation and paper claiming facility *) BibCatalog: storage of ticket requestor, default RT user *) BibCirculation: security fixes *) BibClassify: UI improvements and refactoring *) BibConvert: new BibTeX-to-MARCXML conversion, new oaidmf2marcxml conversion, fixes for WORDS *) BibDocFile: new filesystem cache for faster statistics, caseless authorisation, disable HTTP range requests, improve file format policies, and more *) BibEdit: new options related to preview and printing, reference curation, autocompletion, record and field template manager, editing fields and subfields, per-collection authorisations, use of knowledge bases, and more *) BibEditMulti: new actions with conditions on fields, partial matching for subfields, faster preview generation, and more *) BibEncode: new audio and video media file processing tool, new Video demo collection *) BibFormat: new full-text snippet display facility, new configuration for I18N caching, updates to EndNote, Excel, Dublin Core and other formats, updates to formatting elements such as DOI, author, updates to podcast output, updates to XSLT processing, and more *) OAIHarvest: new configurable workflow with reference extraction, new author list extraction post process, upload priority, OpenAIRE compliance, better handling of timeouts, and more *) BibIndex: new full-text indexing via Solr, new support for author ID indexing, better author tokeniser *) BibKnowledge: dynamic knowledge bases for record editor, support for JSON format *) BibMatch: new matching of restricted collections *) BibMerge: subfield order in slave record, confirmation pop up, record selection bug fix *) BibRank: new index term count ranking method, new support for flot graphs, updates to citation graphs *) BibRecord: new possibility to use lxml parser, sanity checks *) BibSched: new motd-like facility for queue monitor, new continuable error status for tasks, new tasklet framework, new multi-node support, new monotask support, new support for task sequences, improvements to scheduling algorithm *) BibSort: new in-memory fast sorting tool using configurable buckets *) BibUpload: new automatic generation of MARC tag 005, new `--callback-url' CLI parameter, fixes for appending existing files, fixes for multiple 001 tags, and more *) WebAccess: new external person ID support, performance improvements, robot manager UI improvements, fixes for firerole handling, *) WebAlert: new alert description facility, fixes for restricted collections *) WebApiKey: new user-signed Web API key facility *) WebAuthorProfile: new author pages with dynamic box layout *) WebBasket: add to basket interface improvements, better XML export, fixes for external records and other improvements *) WebComment: new collapsible comment support, new permalink to comments, loss prevention of unsubmitted comments, tidying up HTML markup of comments, and more *) WebJournal: new Open Graph markup, more customisable newsletter, redirect to latest release of specific category, refresh chosen collections on release, remove unnecessary encoding/decoding, update weather widget for new APIs, and more *) WebSearch: new index-time and search-time synonym support, new Open Graph markup, new Google Scholar friendly metadata in page header, new limit option for wildcard queries, new support for access to merged records, new next/previous/back link support, new `authorcount' indexing and searching, new relative date search facility, clean OpenSearch support, improved speed, improvements to SPIRES query syntax support, improvements to self-cite math, primary collection guessing, other numerous fixes *) WebSession: new useful guest sessions, reintroduces configurable IP checking, enforcement of nickname refresh, several other fixes *) WebStat: new login statistics, new custom query summary, error analyser, custom event improvements *) WebStyle: new display restriction flag for restricted records, new initial right-to-left language support, authenticated user and HTTPS support, IP check for proxy configurations, layout updates and fixes for MSIE, and more *) WebSubmit: new initial support for converting to PDF/X, new embargo support, better LibreOffice compatibility, better async file upload, enhancements for Link_Records, support for hiding HIDDEN files in document manager, configurable initial value for counter, make use of BibSched task sequences, and more *) installation: updates to jQuery, CKEditor, unoconv, and other prerequisites *) dbdump: new compression support, reworked error handling *) dbquery: new possibility to query DB slave nodes, new dict-like output, fix for MySQL 5.5.3 and higher versions *) errorlib: stack analysis improvements, outline style improvements for invenio.err *) htmlutils: improvements to HTML markup removal, HTML tidying *) I18N: new Arabic and Lithuanian translations, updates to Catalan, Czech, French, German, Greek, Italian, Russian, Slovak, Spanish translations *) intbitset: new performance improvements, new get item support, new pickle support, several memory leak fixes *) inveniocfg: new automated Invenio Upgrader tool *) InvenioConnector: new search with retries, improved search parameters, improved local site check, use of Invenio user agent *) jsonutils: new JSON utility library *) mailutils: possibility to specify Reply-To header, fixes to multipart *) plotextractor: better TeX detection, better PDF harvesting from arXiv, configurable sleep timer *) pluginutils: new create_enhanced_plugin_builder API, external plugin loading *) RefExtract: new daemon operation mode, new DOI recognition, better author recognition, new author knowledge base *) remote debugger: new remote debuggng support *) sequtils: new sequence generator tool *) solrutils: new support for full-text query dispatching to Solr *) testutils: new Selenium web test framework *) textutils: updates to string-to-ascii functions, LaTeX symbols to Unicode *) urlutils: fix for redirect_to_url *) xmlmarclint: fix for error report formatting *) ... and other numerous smaller fixes and improvements Invenio v1.0.2 -- released 2012-10-19 ------------------------------------- *) BibConvert: fix for static files in admin guide *) BibEdit: regression test case fix *) BibFormat: fix call to bfe_primary_report_number; revert fix for format validation report *) BibHarvest: OAI harvesting via HTTP proxy *) BibRank: begin_date initialisation in del_recids(); INSERT DELAYED INTO rnkPAGEVIEWS; user-friendlier message for similar docs *) BibUpload: clarify correct/replace mode help *) WebJournal: catch ValueError when reading cache; use CFG_TMPSHAREDDIR in admin UI *) WebSearch: allow webcoll to query hidden tags; external collection search fix; external search XSS vulnerability fix; fix for parentheses inside quotes; get_collection_reclist() fix; more uses of `rg` configurable default; 'verbose' mode available to admins only; XSS and verbose improvements *) WebSession: fix possibly undefined variables; prevent nickname modification *) WebStyle: workaround IE bug with cache and HTTPS *) WebSubmit: configurable Document File Manager; fix JS check for mandatory fields; unoconv calling fix *) bibdocfile: guess_format_from_url() improvement; guess_format_from_url() improvements; INSERT DELAYED INTO rnkDOWNLOADS *) global: removal of psyco *) I18N: Spanish and Catalan updates to Search Tips; updates to German translation *) installation: fix for jQuery UI custom; fix md5sum example arguments; new index on session.session_expiry *) intbitset: fix memory leak *) inveniogc: tmp directory removal improvements *) urlutils: MS Office redirection workaround CDS Invenio v0.99.6 -- released 2012-10-18 ------------------------------------------ *) improved XSS safety in external collection searching (WebSearch) *) verbose level in the search results pages is now available only to admins, preventing potential restricted record ID disclosure even though record content would remain restricted (WebSearch) Invenio v1.0.1 -- released 2012-06-28 ------------------------------------- *) BibFormat: fix format validation report; fix opensearch prefix exclusion in RSS; fix retrieval of collection identifier *) BibIndex: new unit tests for the Greek stemmer *) BibSched: improve low level submission arg parsing; set ERROR status when wrong params; task can stop immediately when sleeping *) BibSword: remove dangling documentation *) BibUpload: fix setting restriction in -a/-ir modes *) WebAlert: simplify HTML markup *) WebComment: only logged users to use report abuse *) WebJournal: hide deleted records *) WebSearch: adapt test cases for citation summary; fix collection order on the search page; look at access control when webcolling; sorting in citesummary breakdown links *) WebSession: simplify HTML markup *) WebSubmit: capitalise doctypes in Doc File Manager; check authorizations in endaction; check for problems when archiving; ensure unique tmp file name for upload; fix email formatting; fix Move_to_Done function; remove 8564_ field from demo templates; skip file upload if necessary; update CERN-specific config *) bibdocfile: BibRecDocs recID argument type check *) data cacher: deletes cache before refilling it *) dbquery: fix dbexec CLI WRT max allowed packet *) I18N: updates to Greek translation *) installation: fix circular install-jquery-plugins; fix demo user initialisation; fix jQuery tablesorter download URL; fix jQuery uploadify download URL; more info about max_allowed_packet; remove unneeded rxp binary package Invenio v1.0.0 -- released 2012-02-29 ------------------------------------- *) BatchUploader: fix retrieval of recs from extoaiid *) BibCirculation: fix regexp for dictionary checking; security check before eval *) BibConvert: fix UP and DOWN for UTF-8 strings *) bibdocfile: add missing normalize_format() calls; check_bibdoc_authorization caseless; fix append WRT description/restriction; fix cli_set_batch function; fix documentation WRT --with-version; fix handling of embargo firerole rule; fix parsing of complex subformats *) BibEdit: fix crash in Ajax request; fix undefined dictionary key *) BibFormat: better escape BFE in admin test UI; do not exit if no XSLT processor found; fix regression test; fix URL to ejournal resolver; fix XSLT formatting of MARCXML snippets; removes 'No fulltext' message; special handling of INSPIRE-PUBLIC type; use default namespace in XSL *) BibHarvest: check for empty resumptionToken; fix MARCXML creation in OAI updater; optional JSON dependency *) BibIndex: fix author:Campbell-Wilson word query; fix double-stemming upon indexing; fix Porter stemmer in multithread; Greek stemmer improvements *) BibKnowledge: make XML/XSLT libs optional *) BibRank: CERN hack to inactivate similarity lists; fix citation indexer time stamp updating; fix citation indexing of deleted records; fix citedby/refersto for infinite sets; fix empty citation data cacher; fix incremental citation indexer leaks; make numpy optional; minimum x-axis in citation history graphs; run citation indexer after word indexer *) BibRecord: fix for record_get_field_instances() *) BibSched: fix guess_apache_process_user_from_ps; use larger timouts for launching tasks *) BibUpload: FFT regression tests not to use CDS *) htmlutils: fix FCKeditor upload URLs *) installation: add note about optional hashlib; change table TYPE to ENGINE in SQL; fix 'install-mathjax-plugin'; fix issue with FCKeditor; fix 'make install-jquery-plugins'; fix output message cosmetics; new 'make install-ckeditor-plugin'; re-enable WSGI pre-loading *) intbitset: fix never ending loop in __repr__; fix several memory leaks *) inveniocfg: fix resetting ranking method names *) inveniogc: new CLI options check/optimise tables *) kwalitee: grep-like output and exit status changes; use `--check-some` as default CLI option *) mailutils: remove unnecessary 'multipart/related' *) plotextractor: fix INSPIRE unit test *) textmarc2xmlmarc: fix handling of BOM *) urlutils: new Indico request generator helper *) WebAccess: fix Access policy page; fix FireRole handling integer uid; fix retrieving emails from firerole *) WebAlert: fix the display of records in alerts *) WebBasket: fix missing return statement; fix number of items in public baskets *) WebComment: CERN-specific hack for ATLAS comments; fix discussion display in bfe_comments; fix washing of email to admin; improve sanity checks *) WebHelp: HOWTO MARC document update *) WebJournal: fix seminar widget encoding issue; fix seminar widget for new Indico APIs; update weather widget for new APIs *) WebSearch: add refersto:/a b c/ example to guide; CERN-specific hack for journal sorting; CERN-specific hack for latest additions; fix case-insensitive collection search; fix CDSIndico external search; fix collection translation in admin UI; fix get_fieldvalues() when recid is str; fix get_index_id_from_field(); fix structured regexp query parsing; fix symbol name typo in loop checking; parenthesised collection definitions; remove accent-search warning in guide; remove Report for INSPIRE author pages; replace CDS Indico by Indico; updates some output phrases *) WebSession: fix crash when no admin user exists *) WebStyle: better service failure message; fix implementation of req.get_hostname; fluid width of the menu; pre-load citation dictionaries for web *) WebSubmit: avoid printing empty doctype section; check_user_can_view_record in publiline; fix filename bug in document manager; fix handling of uploaded files; fix record_search_pattern in DEMOJRN *) xmlmarclint: 'no valid record detected' error *) I18N: updates to Catalan, Czech, French, German, Greek, Italian, Slovak, and Spanish translations *) Note: for a complete list of new features in Invenio v1.0 release series over Invenio v0.99 release series, please see: CDS Invenio v0.99.5 -- released 2012-02-21 ------------------------------------------ *) improved sanity checks when reporting, voting, or replying to a comment, or when accessing comment attachments, preventing URL mangling attempts (WebComment) CDS Invenio v0.99.4 -- released 2011-12-19 ------------------------------------------ *) fixed double stemming during indexing (BibIndex) *) fixed collection translation in admin UI (WebSearch) *) fixed UP and DOWN functions for UTF-8 strings (BibConvert) Invenio v1.0.0-rc0 -- released 2010-12-21 ----------------------------------------- *) CDS Invenio becomes Invenio as of this release *) new facility of hosted collections; support for external records in search collections, user alerts and baskets (WebSearch, WebAlert, WebBasket) *) support for nested parentheses in search query syntax (WebSearch) *) new refersto/citedby search operators for second-order searches in citation map (BibRank, WebSearch) *) numerous improvements to SPIRES query syntax parser (WebSearch) *) enhancement to search results summaries, e.g. co-author lists on author pages, e.g. h-index (WebSearch) *) new support for unAPI, Zotero, OpenSearch, AWS (WebSearch) *) new phrase and word-pair indexes (BibIndex) *) new fuzzy author name matching mode (BibIndex) *) new time-dependent citation ranking family of methods (BibRank) *) full-text search now shows context snippets (BibFormat) *) improvements to the basket UI, basket export facility (WebBasket) *) new support for FCKeditor in submissions and user comments, possibility to attach files (WebComment, WebSubmit) *) commenting facility enhanced with rounds and threads (WebComment) *) new facility to moderate user comments (WebComment) *) enhanced CLI tool for document file management bringing new options such as hidden file flag (WebSubmit) *) numerous improvements to the submission system, e.g. asynchronous JavaScript upload support, derived document formats, icon creation, support for automatic conversion of OpenOffice documents, PDF/A, OCR (WebSubmit) *) new full-text file metadata reader/writer tool (WebSubmit) *) new experimental SWORD protocol client application (BibSword) *) complete rewrite of the record editor using Ajax technology for faster user operation, with new features such as field templates, cloning, copy/paste, undo/redo, auto-completion, etc (BibEdit) *) new multi-record editor to alter many records in one go (BibEdit) *) new Ajax-based record differ and merger (BibMerge) *) new fuzzy record matching mode, with possibility to match records against remote Invenio installations (BibMatch) *) new circulation and holdings module (BibCirculation) *) new facility for matching provenance information when uploading records (BibUpload) *) new possibility of uploading incoming changes into holding pen (BibUpload) *) new batch uploader facility to support uploading of metadata files and of full-text files either in CLI or over web (BibUpload) *) new record exporting module supporting e.g. Sitemap and Google Scholar export methods (BibExport) *) improvements to the keyword classifier, e.g. author and core keywords (BibClassify) *) new facility for external robot-like login method (WebAccess) *) numerous improvements to the journal creation facility, new journal `Atlantis Times' demo journal (WebJournal) *) refactored and improved OAI exporter and harvester (BibHarvest) *) new taxonomy-based and dynamic-query knowledge base types (BibKnowledge) *) possibility to switch on/off user features such as alerts and baskets based on RBAC rules (WebAccess and other modules) *) various improvements to task scheduler, for example better communication with tasks, possibility to run certain bibsched tasks within given time limit, etc (BibSched) *) new database dumper for backup purposes (MiscUtil) *) new plotextractor library for extracting plots from compuscripts, new figure caption index and the Plots tab (MiscUtil, BibIndex, Webearch) *) enhanced reference extrator, e.g. support for DOI, for author name recognition (MiscUtil) *) new register emergency feature e.g. to alert admins by SMS in case the task queue stops (MiscUtil) *) infrastructure move from mod_python to mod_wsgi, support for mod_xsendfile (WebStyle and many modules) *) infrastructure move from jsMath to MathJax (MiscUtil) *) some notable backward-incompatible changes: removed authentication methods related to Apache user and group files, changed BibFormat element's API (BibFormat, many modules) *) new translations (Afrikaans, Galician, Georgian, Romanian, Kinyarwanda) plus many translation updates *) other numerous improvements and bug fixes done in about 1600 commits over Invenio v0.99 series CDS Invenio v0.99.3 -- released 2010-12-13 ------------------------------------------ *) fixed issues in the harvesting daemon when harvesting from more than one OAI repository (BibHarvest) *) fixed failure in formatting engine when dealing with not-yet-existing records (BibFormat) *) fixed traversal of final URL parts in the URL dispatcher (WebStyle) *) improved bibdocfile URL recognition upon upload of MARC files (BibUpload) *) fixed bug in admin interface for adding authorizations (WebAccess) *) keyword extractor is now compatible with rdflib releases older than 2.3.2 (BibClassify) *) output of `bibsched status' now shows the queue mode status as AUTOMATIC or MANUAL to help queue monitoring (BibSched) CDS Invenio v0.99.2 -- released 2010-10-20 ------------------------------------------ *) stricter checking of access to restricted records: in order to view a restricted record, users are now required to have authorizations to access all restricted collections the given record may belong to (WebSearch) *) strict checking of user query history when setting up email notification alert, preventing URL mangling attempts (WebAlert) *) fixed possible Unix signal conflicts for tasks performing I/O operations or running external processes, relevant notably to full-text indexing of remote files (BibSched) *) fixed full-text indexing and improved handling of files of `unexpected' extensions (BibIndex, WebSubmit) *) streaming of files of `unknown' MIME type now defaults to application/octet-stream (WebSubmit) *) fixed addition of new MARC fields in the record editor (BibEdit) *) fixed issues in full-text file attachment via MARC (BibUpload) *) fixed authaction CLI client (WebAccess) *) ... plus other minor fixes and improvements CDS Invenio v0.99.1 -- released 2008-07-10 ------------------------------------------ *) search engine syntax now supports parentheses (WebSearch) *) search engine syntax now supports SPIRES query language (WebSearch) *) strict respect for per-collection sort options on the search results pages (WebSearch) *) improved parsing of search query with respect to non-existing field terms (WebSearch) *) fixed "any collection" switch on the search results page (WebSearch) *) added possibility for progressive display of detailed record page tabs (WebSearch) *) added support for multi-page RSS output (WebSearch) *) new search engine summarizer module with the cite summary output format (WebSearch, BibRank) *) "cited by" links are now generated only when needed (WebSearch) *) new experimental comprehensive author page (WebSearch) *) stemming for many indexes is now enabled by default (BibIndex) *) new intelligent journal index (BibIndex) *) new logging of missing citations (BibRank) *) citation indexer and searcher improvements and caching (BibRank) *) new low-level task submission facility (BibSched) *) new options in bibsched task monitor: view task options, log and error files; prune task to a history table; extended status reporting; failed tasks now need acknowledgement in order to restart the queue (BibSched) *) safer handling of task sleeping and waking up (BibSched) *) new experimental support for task priorities and concurrent task execution (BibSched) *) improved user-configured browser language matching (MiscUtil) *) new default behaviour not differentiating between guest users; this removes a need to keep sessions/uids for guests and robots (WebSession) *) optimized sessions and collecting external user information (WebSession) *) improved logging conflicts for external vs internal users (WebAccess) *) improved Single Sign-On session preservation (WebAccess) *) new 'become user' debugging facility for admins (WebAccess) *) new bibdocfile CLI tool to manipulate full-text files archive (WebSubmit) *) optimized redirection of old URLs (WebSubmit) *) new icon creation tool in the submission input chain (WebSubmit) *) improved full-text file migration tool (WebSubmit) *) improved stamping of full-text files (WebSubmit) *) new approval-related end-submission functions (WebSubmit) *) comments and descriptions of full-text files are now kept also in bibdoc tables, not only in MARC; they are synchronized during bibupload (WebSubmit, BibUpload) *) fixed navigation in public baskets (WebBasket) *) added detailed record page link to basket records (WebBasket) *) new removal of HTML markup in alert notification emails (WebAlert) *) improved OAI harvester logging and handling (BibHarvest) *) improved error checking (BibConvert) *) improvements to the record editing tool: subfield order change, repetitive subfields; improved record locking features; configurable per-collection curators (BibEdit) *) fully refactored WebJournal module (WebJournal) *) new RefWorks output format, thanks to Theodoros Theodoropoulos (BibFormat) *) fixed keyword detection tool's output; deactivated taxonomy compilation (BibClassify) *) new /stats URL for administrators (WebStat) *) better filtering of unused translations (WebStyle) *) updated French, Italian, Norwegian and Swedish translations; updated Japanese translation (thanks to Makiko Matsumoto and Takao Ishigaki); updated Greek translation (thanks to Theodoros Theodoropoulos); new Hungarian translation (thanks to Eva Papp) *) ... plus many other minor bug fixes and improvements CDS Invenio v0.99.0 -- released 2008-03-27 ------------------------------------------ *) new Invenio configuration language, new inveniocfg configuration tool permitting more runtime changes and enabling separate local customizations (MiscUtil) *) phased out WML dependency everywhere (all modules) *) new common RSS cache implementation (WebSearch) *) improved access control to the detailed record pages (WebSearch) *) when searching non-existing collections, do not revert to searching in public Home anymore (WebSearch) *) strict calculation of number of hits per multiple collections (WebSearch) *) propagate properly language environment in browse pages, thanks to Ferran Jorba (WebSearch) *) search results sorting made accentless, thanks to Ferran Jorba (WebSearch) *) new OpenURL interface (WebSearch) *) added new search engine API argument to limit searches to record creation/modification dates and times instead of hitherto creation dates only (WebSearch) *) do not allow HTTP POST method for searches to prevent hidden mining (WebSearch) *) added alert and RSS teaser for search engine queries (WebSearch) *) new optimized index structure for fast integer bit vector operations, leading to significant indexing time improvements (MiscUtil, BibIndex, WebSearch) *) new tab-based organisation of detailed record pages, with new URL schema (/record/1/usage) and related CSS changes (BibFormat, MiscUtil, WebComment, WebSearch, WebStyle, WebSubmit) *) phased out old PHP based code; migration to Python-based output formats recommended (BibFormat, WebSubmit) *) new configurability to show/hide specific output formats for specific collections (BibFormat, WebSearch) *) new configurability to have specific stemming settings for specific indexes (BibIndex, WebSearch) *) optional removal of LaTeX markup for indexer (BibIndex, WebSearch) *) performance optimization for webcoll and optional arguments to refresh only parts of collection cache (WebSearch) *) optional verbosity argument propagation to the output formatter (BibFormat, WebSearch) *) new convenient reindex option to the indexer (BibIndex) *) fixed problem with indexing of some lengthy UTF-8 accented names, thanks to Theodoros Theodoropoulos for reporting the problem (BibIndex) *) fixed full-text indexing of HTML pages (BibIndex) *) new Stemmer module dependency, fixes issues on 64-bit systems (BibIndex) *) fixed download history graph display (BibRank) *) improved citation ranking and history graphs, introduced self-citation distinction, added new demo records (BibRank) *) fixed range redefinition and output message printing problems in the ranking indexer, thanks to Mike Marino (BibRank) *) new XSLT output formatter support; phased out old BFX formats (BibFormat) *) I18N output messages are now translated in the output formatter templates (BibFormat) *) formats fixed to allow multiple author affiliations (BibFormat) *) improved speed of the record output reformatter in case of large sets (BibFormat) *) support for displaying LaTeX formulas via JavaScript (BibFormat) *) new and improved output formatter elements (BibFormat) *) new escaping modes for format elements (BibFormat) *) output format template editor cache and element dependency checker improvements (BibFormat) *) output formatter speed improvements in PHP-compatible mode (BibFormat) *) new demo submission configuration and approval workflow examples (WebSubmit) *) new submission full-text file stamper utility (WebSubmit) *) new submission icon-creation utility (WebSubmit) *) separated submission engine and database layer (WebSubmit) *) submission functions can now access user information (WebSubmit) *) implemented support for restricted icons (WebSubmit, WebAccess) *) new full-text file URL and cleaner storage facility; requires file names to be unique within a given record (WebSearch, WebSubmit) *) experimental release of the complex approval and refereeing workflow (WebSubmit) *) new end-submission functions to move files to storage space (WebSubmit) *) added support for MD5 checking of full-text files (WebSubmit) *) improved behaviour of the submission system with respect to the browser "back" button (WebSubmit) *) removed support for submission "cookies" (WebSubmit) *) flexible report number generation during submission (WebSubmit) *) added support for optional filtering step in the OAI harvesting chain (BibHarvest) *) new text-oriented converter functions IFDEFP, JOINMULTILINES (BibConvert) *) selective harvesting improvements, sets, non-standard responses, safer resumption token handling (BibHarvest) *) OAI archive configuration improvements: collections retrieval, multiple set definitions, new clean mode, timezones, and more (BibHarvest) *) OAI gateway improvements: XSLT used to produce configurable output (BibHarvest) *) added support for "strong tags" that can resist metadata replace mode (BibUpload) *) added external OAI ID tag support to the uploader (BibUpload) *) added support for full-text file transfer during uploading (BibUpload) *) preserving full history of all MARCXML versions of a record (BibEdit, BibUpload) *) XMLMARC to TextMarc improvements: empty indicators and more (BibEdit) *) numerous reference extraction tool improvements: year handling, LaTeX handling, URLs, journal titles, output methods, and more (BibEdit) *) new classification daemon (BibClassify) *) classification taxonomy caching resulting in speed optimization (BibClassify) *) new possibility to define more than one keyword taxonomy per collection (BibClassify) *) fixed non-standalone keyword detection, thanks to Annette Holtkamp (BibClassify) *) new embedded page generation profiler (WebStyle) *) new /help pages layout and webdoc formatting tool (WebStyle) *) new custom style template verification tool (WebStyle) *) added support for the XML page() output format, suitable for AJAX interfaces (WebStyle) *) introduction of navigation menus (WebStyle) *) general move from HTML to XHTML markup (all modules) *) fixed alert deletion tool vulnerability (WebAlert) *) do not advertise baskets/alerts much for guest users; show only the login link (WebSession) *) password reset interface improvements (WebSession) *) new permanent "remember login" mechanism (WebSession, WebAccess) *) local user passwords are now encrypted (WebSession, WebAccess) *) new LDAP external authentication plugin (WebAccess) *) new password reset mechanism using new secure mail cookies and temporary role membership facilities (WebAccess, WebSession) *) added support for Single Sign-On Shibboleth based authentication method (WebAccess) *) new firewall-like based role definition language, new demo examples (WebAccess) *) external authentication and groups improvements: nicknames, account switching, and more (WebSession, WebAccess) *) task log viewer integrated in the task monitor (BibSched) *) new journal creation module (WebJournal) *) new generic statistic gathering and display facility (WebStat) *) deployed new common email sending facility (MiscUtil, WebAlert, WebComment, WebSession, WebSubmit) *) dropped support for MySQL-4.0, permitting to use clean and strict UTF-8 storage methods; upgrade of MySQLdb to at least 1.2.1_p2 required (MiscUtil) *) uncatched exceptions are now being sent by email to the administrator (MiscUtil, WebStyle) *) new general garbage collector with a possibility to run via the task scheduler and a possibility to clean unreferenced bibliographic values (MiscUtil) *) new generic SQL and data cacher (MiscUtil) *) new HTML page validator plugin (MiscUtil) *) new web test suite running in a real browser (MiscUtil) *) improved code kwalitee checker (MiscUtil) *) translation updates: Spanish and Catalan (thanks to Ferran Jorba), Japanese (Toru Tsuboyama), German (Benedikt Koeppel), Polish (Zbigniew Szklarz and Zbigniew Leonowicz), Greek (Theodoros Theodoropoulos), Russian (Yana Osborne), Swedish, Italian, French *) new translations: Chinese traditional and Chinese simplified (thanks to Kam-ming Ku) *) ... plus many other minor bug fixes and improvements CDS Invenio v0.92.1 -- released 2007-02-20 ------------------------------------------ *) new support for external authentication systems (WebSession, WebAccess) *) new support for external user groups (WebSession) *) new experimental version of the reference extraction program (BibEdit) *) new optional Greek stopwords list, thanks to Theodoropoulos Theodoros (BibIndex) *) new Get_Recid submission function (WebSubmit) *) new config variable governing the display of the download history graph (BibRank) *) started deployment of user preferences (WebSession, WebSearch) *) split presentation style for "Narrow search", "Focus on" and "Search also" search interface boxes (WebSearch, WebStyle) *) updated CERN Indico and KEK external collection searching facility (WebSearch) *) fixed search interface portalbox and collection definition escaping behaviour (WebSearch Admin) *) fixed problems with external system number and OAI ID matching (BibUpload) *) fixed problem with case matching behaviour (BibUpload) *) fixed problems with basket record display and basket topic change (WebBasket) *) fixed output format template attribution behaviour (BibFormat) *) improved language context propagation in output formats (BibFormat) *) improved output format treatment of HTML-aware fields (BibFormat) *) improved BibFormat migration kit (BibFormat) *) improved speed and eliminated set duplication of the OAI repository gateway (BibHarvest) *) fixed resumption token handling (BibHarvest) *) improved record editing interface (BibEdit) *) fixed problem with empty fields treatment (BibConvert) *) updated Report_Number_Generation submission function to be able to easily generate report numbers from any submission information (WebSubmit) *) fixed problem with submission field value escaping (WebSubmit) *) fixed problem with submission collection ordering (WebSubmit) *) fixed BibSched task signal handling inconsistency (BibSched) *) fixed TEXT versus BLOB database problems for some tables/columns *) minor updates to the HOWTO Migrate guide and several admin guides (WebHelp, BibIndex, BibFormat) *) minor bugfixes to several modules; see ChangeLog for details and credits CDS Invenio v0.92.0 -- released 2006-12-22 ------------------------------------------ *) previously experimental output formatter in Python improved and made default (BibFormat) *) previously experimental new submission admin interface in Python improved and made default (WebSubmit) *) new XML-oriented output formatting mode (BibFormat) *) new export-oriented output formats: EndNote, NLM (BibFormat) *) RSS 2.0 latest additions feed service (WebSearch, BibFormat) *) new XML-oriented metadata converter mode (BibConvert) *) new metadata uploader in Python (BibUpload) *) new integrated parallel external collection searching (WebSearch) *) improved document classifier: composite keywords, wildcards, cloud output (BibClassify) *) improved UTF-8 fulltext indexing (BibIndex) *) improved external login authentication subsystem (WebAccess) *) added possibility to order submission categories (WebSubmit) *) improved handling of cached search interface page formats, preferential sort pattern functionality, international collection names (WebSearch) *) improved behaviour of OAI harvester: sets, deleted records, harvested metadata transformation (BibHarvest) *) improved MARCXML schema compatibility concerning indicators; updates to the HTML MARC output format (BibEdit, BibUpload, BibFormat, and other modules) *) multiple minor bugs fixed thanks to the wider deployment of the regression test suite (all modules) *) new translation (Croatian) and several translation updates (Catalan, Bulgarian, French, Greek, Spanish); thanks to Ferran Jorba, Beatriu Piera, Alen Vodopijevec, Jasna Marković, Theodoros Theodoropoulos, and Nikolay Dyankov (see also THANKS file) *) removed dependency on PHP; not needed anymore *) full compatibility with MySQL 4.1 and 5.0; upgrade from MySQL 4.0 now recommended *) full compatibility with FreeBSD and Mac OS X CDS Invenio v0.90.1 -- released 2006-07-23 ------------------------------------------ *) output messages improved and enhanced to become more easily translatable in various languages (all modules) *) new translation (Bulgarian) and several updated translations (Greek, French, Russian, Slovak) *) respect langugage choice in various web application links (WebAlert, WebBasket, WebComment, WebSession, WebSubmit) *) fixed problem with commenting rights in a group-shared basket that is also a public basket with lesser rights (WebBasket) *) guest users are now forbidden to share baskets (WebBasket) *) fixed guest user garbage collection, adapted to the new baskets schema (WebSession) *) added possibility to reject group membership requests; sending informational messages when users are approved/refused by group administrators (WebSession) *) experimental release of the new BibFormat in Python (BibFormat) *) started massive deployment of the regression test suite, checking availability of all web interface pages (BibEdit, BibFormat, BibHarvest, BibIndex, BibRank, MiscUtil, WebAccess, WebBasket, WebComment, WebMessage, WebSearch, WebSession, WebSubmit) *) updated developer documentation (I18N output messages policy, test suite policy, coding style) CDS Invenio v0.90.0 -- released 2006-06-30 ------------------------------------------ *) formerly known as CDSware; the application name change clarifies the relationship with respect to the CDS Sofware Consortium producing two flagship applications (CDS Indico and Invenio) *) version number increased to v0.90 in the anticipation of the forthcoming v1.0 release after all the major codebase changes are now over *) new possibility to define user groups (WebGroup) *) new personal basket organization in topics (WebBasket) *) new basket sharing among user groups (WebBasket) *) new open peer reviewing and commenting on documents (WebComment) *) new user and group web messaging system (WebMessage) *) new ontology-based document classification system (BibClassify) *) new WebSubmit Admin (WebSubmit) *) new record editing web interface (BibEdit) *) new record matching tool (BibMatch) *) new OAI repository administration tool (BibHarvest) *) new OAI periodical harvesting tool (BibHarvest) *) new web layout templating system (WebStyle) *) new clean URL schema (e.g. /collection/Theses, /record/1234) (WebStyle) *) new BibTeX output format support (BibFormat) *) new possibility of secure HTTPS authentication while keeping the rest of the site non-HTTPS (WebSession) *) new centralized error library (MiscUtil) *) new gettext-based international translations, with two new beta translations (Japanese, Polish) *) new regression testing suite framework (MiscUtil) *) new all prerequisites are now apt-gettable for Debian "Sarge" GNU/Linux *) new full support for Mac OS X *) ... plus many fixes and changes worth one year of development CDSware v0.7.1 -- released 2005-05-04 ------------------------------------- *) important bugfix for bibconvert's ``source data in a directory'' mode, as invoked by the web submission system (BibConvert) *) minor bugfix in the search engine, thanks to Frederic Gobry (WebSearch) *) minor bugfix in the WebSearch Admin interface (WebSearch) *) automatic linking to Google Print in the ``Haven't found what you were looking for...'' page box (WebSearch) *) BibFormat Admin Guide cleaned, thanks to Ferran Jorba *) new Catalan translation, thanks to Ferran Jorba *) updated Greek and Portuguese translations, thanks to Theodoros Theodoropoulos and Flávio C. Coelho *) updated Spanish translation CDSware v0.7.0 -- released 2005-04-06 ------------------------------------- *) experimental release of the refextract program for automatic reference extraction from PDF fulltext files (BibEdit) *) experimental release of the citation and download ranking tools (BibRank) *) new module for gathering usage statistics out of Apache log files (WebStat) *) new similar-records-navigation tool exploring end-user viewing habits: "people who viewed this page also viewed" (WebSearch, BibRank) *) OAI gateway validated against OAI Repository Explorer (BibHarvest) *) fixed "records modified since" option for the indexer (BibIndex) *) collection cache update is done only when the cache is not up to date (WebSearch) [closing #WebSearch-016] *) cleanup of user login mechanism (WebSession, WebAccess) *) fixed uploading of already-existing records in the insertion mode (BibUpload) *) fixed submission in UTF-8 languages (WebSubmit) *) updated HOWTO Run Your Existing CDSware Installation (WebHelp) *) test suite improvements (WebSearch, BibHarvest, BibRank, BibConvert) *) German translation updated and new German stopwords list added, thanks to Guido Pelzer *) new Greek and Ukrainian translations, thanks to Theodoros Theodoropoulos and Vasyl Ostrovskyi *) all language codes now comply to RFC 1766 and ISO 639 *) numerous other small fixes and improvements, with many contributions by the EPFL team headed by Frederic Gobry (BibConvert, BibUpload, WebSearch, WebSubmit, WebSession) CDSware v0.5.0 -- released 2004-12-17 ------------------------------------- *) new rank engine, featuring word similarity rank method and the journal impact factor rank demo (BibRank) *) search engine includes ranking option (WebSearch) *) record similarity search based on word frequency (WebSearch, BibRank) *) stopwords possibility when ranking and indexing (BibRank, BibIndex) *) stemming possibility when ranking and indexing (BibRank, BibIndex) *) search engine boolean query processing stages improved (WebSearch) *) search engine accent matching in phrase searches (WebSearch) *) regular expression searching mode introduced into the Simple Search interface too (WebSearch) *) Search Tips split into a brief Search Tips page and detailed Search Guide page (WebSearch) *) improvements to the ``Try your search on'' hints (WebSearch) *) author search hints introduced (WebSearch) *) search interface respects title prologue/epilogue portalboxes (WebSearch) *) improvements to admin interfaces (WebSearch, BibIndex, BibRank, WebAccess) *) basket item ordering problem fixed (WebBasket) *) access error messages introduced (WebAccess and its clients) *) new account management to enable/disable guest users and automatic vs to-be-approved account registration (WebAccess) *) possibility for temporary read-only access to, and closure of, the site; useful for backups (WebAccess and its clients) *) possibility for external authentication login methods (WebAccess) *) new XML MARC handling library (BibEdit) *) when uploading, bad XML records are marked as errors (BibUpload) *) improvements to the submission engine and its admin interface, thanks to Tiberiu Dondera (WebSubmit) *) preparations for electronic mail submission feature, not yet functional (ElmSubmit) *) added example on MARC usage at CERN (WebHelp) *) legacy compatibility with MySQL 3.23.x assured (BibUpload) *) legacy compatibility with Python 2.2 assured (WebSubmit) *) test suite additions and corrections (BibRank, BibIndex, WebSearch, BibEdit) *) French translation fixes, thanks to Eric Grand *) minor Czech and Slovak translation cleanup CDSware v0.3.3 (DEVELOPMENT) -- released 2004-07-16 --------------------------------------------------- *) new international phrases, collection and field names; thanks to Guido, Flavio, Tullio *) collection international names are now respected by the search engine and interfaces (WebSearch) *) field international names are now respected by the search engine and interfaces (WebSearch) *) when no hits found in a given collection, do not display all public hits straight away but only link to them (WebSearch) *) records marked as DELETED aren't shown anymore in XML MARC and other formats (WebSearch) *) detailed record page now features record creation and modification times (WebSearch) *) improved XML MARC parsing and cumulative record count in case of uploading of several files in one go (BibUpload) *) personal `your admin activities' page introduced (WebSession) *) added option to fulltext-index local files only (BibIndex) *) initial release of the BibIndex Admin interface (BibIndex) *) checking of mandatory selection box definitions (WebSubmit) *) WebSearch Admin interface cleanup (WebSearch) *) introducing common test suite infrastructure (WebSearch, BibIndex, MiscUtil, WebHelp) *) fixed accent and link problems for photo demo records (MiscUtil) *) conference title exported via OAI XML DC (BibHarvest) *) enabled building out of source directory; thanks to Frederic CDSware v0.3.2 (DEVELOPMENT) -- released 2004-05-12 --------------------------------------------------- *) admin area improved: all the modules have now Admin Guides; some guides were updated, some are still to be updated (WebHelp, BibConvert, BibFormat, BibIndex, BibSched, WebAlert, WebSession, WebSubmit, BibEdit, BibHarvest, BibRank, BibUpload, WebAccess, WebBasket, WebSearch, WebStyle) *) initial release of the WebSearch Admin interface (WebSearch) *) initial release of the BibRank Admin interface (BibRank) *) search cache expiry after insertion of new records (WebSearch) *) search engine now does on-the-fly formatting via BibFormat CLI call to handle restricted site situations (WebSearch) *) webcoll default verbosity decreased for efficiency (WebSearch) *) added BibConvert configuration example for converting XML Dublin Core to XML MARC (BibConvert) *) BibConvert knowledge base mode extended by various case-sensitive matching possibilities (BibConvert) *) fixed various problems with fulltext file names and the submission from MS Windows platform (WebSubmit) *) fixed problem with bibupload append mode not updating XML MARC properly (BibUpload) *) fixed small problems with the submission interface such as multiple fields selection (WebSubmit) *) session revoking and session expiry strengthened (WebSession) *) page design and style sheet updated to better fit large variety of browsers (WebStyle) *) added output format argument for basket display (WebBasket) *) new Swedish translation and updated German, Russian, and Spanish translations; thanks to Urban, Guido, Lyuba, and Magaly *) faster creation of I18N static HTML and PHP files during make CDSware v0.3.1 (DEVELOPMENT) -- released 2004-03-12 --------------------------------------------------- *) security fix preventing exposure of local configuration variables by malicious URL crafting (WebSearch, WebSubmit, WebAlert, WebBasket, WebSession, BibHarvest, MiscUtil) *) initial release of the ranking engine (BibRank) *) new guide on HOWTO Run Your CDSware Installation (WebHelp) *) fixed submit configurations with respect to fulltext links and metadata tags (WebSubmit, MiscUtil) *) Your Account personal corner now shows the list and the status of submissions and approvals (WebSession) *) uniform help and version number option for CLI executables (WebSearch, BibSched, BibIndex, BibRank, BibHarvest, BibConvert, WebAccess, BibFormat, WebSession, WebAlert) *) uniform technique for on-the-fly formatting of search results via `hb_' and `hd_' output format parameters (WebSearch) *) check for presence of pcntl and mysql PHP libraries (BibUpload) CDSware v0.3.0 (DEVELOPMENT) -- released 2004-03-05 --------------------------------------------------- *) new development branch release (important SQL table changes) *) introducing a new submission engine and the end-user web interface (WebSubmit) *) bibupload is now a BibSched task with new options (BibUpload) *) BibWords renamed into BibIndex in the view of future phrase indexing changes (BibIndex) *) more secure DB server connectivity (BibSched) *) record matching functionality (BibConvert) *) character encoding conversion tables (BibConvert) *) Qualified Dublin Core conversion example (BibConvert) *) OAI deleted records policy can now be specified (BibHarvest) *) multi-language collection portalboxes (WebSearch) *) HTML pages now respect language selections (WebSearch, WebHelp) *) minor layout changes (WebStyle) *) updated Russian and other translations *) ChangeLog is now generated from CVS log messages *) plus the usual set of bugfixes (see ChangeLog) CDSware v0.1.2 (DEVELOPMENT) -- released 2003-12-21 --------------------------------------------------- *) development branch release *) fix BibReformat task launching problem (BibFormat) *) fix BibTeX -> XML MARC conversion example (BibConvert) *) updated Spanish translation CDSware v0.1.1 (DEVELOPMENT) -- released 2003-12-19 --------------------------------------------------- *) development branch release *) access control engine now used by BibWords, BibFormat (admin and bibreformat), WebSearch (webcoll), and BibTaskEx *) access control engine admin guide started (WebAccess) *) search engine support for sorting by more than one field (WebSearch) *) more internationalization of the search engine messages (WebSearch) *) new language: Norwegian (bokmÃ¥l) *) simple example for converting BibTeX into XML MARC (BibConvert) *) new optional --with-python configuration option *) Python module detection during configure *) bugfixes: os.tempnam() warning, login page referer, and others CDSware v0.1.0 (DEVELOPMENT) -- released 2003-12-04 --------------------------------------------------- *) development branch release *) search engine redesign to yield five times more search performance for larger sites (WebSearch, BibWords) *) fulltext indexation of PDF, PostScript, MS Word, MS PowerPoint and MS Excel files (WebSearch) *) integrated combined metadata/fulltext/citation search (WebSearch) *) multi-stage search guidance in cases of no exact match (WebSearch) *) OAI-PMH harvestor (BibHarvest) *) bibliographic task scheduler (BibSched) *) automatic daemon mode of the indexer, the formatter and the collection cache generator (BibWords, BibFormat, WebSearch) *) user management and session handling rewrite (WebSession) *) user personalization, document baskets and notification alert system (WebBasket, WebAlert) *) role-based access control engine (WebAccess) *) internationalization of the interface started (currently with Czech, German, English, Spanish, French, Italian, Portuguese, Russian, and Slovak support) *) web page design update (WebStyle) *) introduction of programmer-oriented technical documentation corner (WebHelp) *) source tree reorganization, mod_python technology adopted for most of the modules CDSware v0.0.9 (STABLE) -- released 2002-08-01 ---------------------------------------------- *) first "public" alpha release of CDSware *) recently standardized Library of Congress' MARC XML schema adopted in all CDSware modules as the new default internal XML file format (BibConvert, BibFormat, BibUpload, WebSubmit, WebSearch) *) support for OAI-PMH v2.0 in addition to OAI-PMH v1.1 (WebSearch) *) search interface now honors multiple output formats per collection (BibFormat, WebSearch) *) search interface now honors search fields, search options, and sort options from the database config tables (WebSearch, WebSearch Admin) *) search interface now honors words indexes from the database config tables (BibWords, WebSearch) *) easy reformatting of already uploaded bibliographic records via web admin. tool (BibFormat Admin/Reformat Records) *) new submission form field type ("response") allowing greater flexibility (WebSubmit) [thanks to Frank Sudholt] *) demo site "Atlantis Institute of Science" updated to demonstrate: Pictures collection of photographs; specific per-collection formats; references inside Articles and Preprints; "cited by" search link; published version linking; subject category searching; search within, search options, sort options in the web collection pages. - end of file - diff --git a/RELEASE-NOTES b/RELEASE-NOTES index 518c8f7ad..117b3569e 100644 --- a/RELEASE-NOTES +++ b/RELEASE-NOTES @@ -1,126 +1,165 @@ -------------------------------------------------------------------- -Invenio v1.1.1 is released -December 21, 2012 +Invenio v1.1.2 is released +August 19, 2013 http://invenio-software.org/ -------------------------------------------------------------------- -Invenio v1.1.1 was released on December 21, 2012. +Invenio v1.1.2 was released on August 19, 2013. This stable release contains a number of minor fixes and improvements. -It is recommended to all Invenio sites using v1.1.0 or previous stable +It is recommended to all Invenio sites using v1.1.1 or previous stable release series (v0.99, v1.0). What's new: ----------- - *) BatchUploader: error reporting improvements + *) BibAuthorID: fix in name comparisons (#1313 #1314); improvements + and fixes; improvements, fixes and optimizations; UI and backend + improvements - *) BibAuthorID: arXiv login upgrade; fix for small bug in claim - interface + *) BibCatalog: removal of print statement (#1337) - *) BibConvert: fix bug with SPLITW function; target/source CLI flag - description fix + *) BibClassify: escape keywords in tag cloud and MARCXML - *) BibDocFile: better error report for unknown format; explicit - redirection to secure URL; fix for file upload in submissions + *) BibDocFile: better JS washing in web UI; display file upload + progress (#1020 #1021); display "Restricted" label correctly + (#1299); fix check-md5 with bibdocfsinfo cache (#1249); fix + check-md5 with bibdocfsinfo cache (#1249); fix error in calling + register_download (#1311); handling of exceptions in Md5Folder + (#1060); revert md5 property patch (#1249); support new magic + library (#1207) - *) BibEdit: 'bibedit' CSS class addition to page body + *) BibEncode: minor fix in process_batch_job() - *) BibFormat: clean Default_HTML_meta template; fix for js_quicktags - location; ISBN tag update for meta format; "ln" parameter in - bfe_record_url output; meta header output fix; relator code filter - in bfe_authors; fix for reformatting by record IDs + *) BibFormat: additional fulltext file display in HB (#1219); checks + for bibformat bin; fix CLI call to old PHP-based formatter; fixes + unit tests (#1320); fix for fulltext file format; fix snippets for + phrase queries (#1201); format_element initialisation fix; passing + of user_info for Excel format; replacement of CDS Invenio by + Invenio; setUp/tearDown in unit tests (#1319); skip hidden icons + in OpenGraph image tag - *) errorlib: register_exception improvements + *) BibIndex: better wording for stemming in admin UI; replacement of + CDS Invenio by Invenio; synonym indexing speed up (#1484); use + human friendly index name (#1329) - *) global: login link using absolute URL redirection + *) BibKnowledge: /kb/export 500 error fix; optional memoisation of + KBR lookups (#1484) - *) installation: aidUSERINPUTLOG consistency upgrade; bigger - hstRECORD.marcxml size; fix for wrong name in tabcreate; inclusion - of JS quicktags in tarball; mark upgrade recipes as applied; - rephrase 1.1 upgrade recipe warning; safer upgrader bibsched - status parse; strip spaces in CFG list values + *) BibMerge: delete cache file on submit - *) jQuery: tablesorter location standardisation + *) BibSched: bibupload max_priority check; bugfix for high-priority + monotasks; increases size of monitor columns; + parse_runtime_limit() fix (#1432); parse_runtime_limit() tests fix + (#1432) - *) mailutils: authentication and TLS support + *) BibUpload: FMT regression test case fix (#1152); indicators in + strong tags (#939) - *) OAIRepository: Edit OAI Set page bug fix; fix for OAI set editing; - print_record() fixes + *) CKEditor: updated to version 3.6.6 - *) plotextractor: washing of captions and context + *) dateutils: strftime improvement (#1065); strptime for Python-2.4 + compatibility - *) pluginutils: fix for failing bibformat test case + *) errorlib: hiding bibcatalog info in exception body - *) solrutils: addition of files into release tarball + *) global: test suite nosification - *) WebAccess: admin interface usability improvement; guest unit tests - for firerole + *) htmlutils: fix single quote escaping; improve js string escaping; + MathJax 2.1 (#1050) - *) WebAlert: new regression tests for alerts + *) I18N: updates to Catalan and Spanish translations - *) WebComment: cleaner handling of non-reply comments + *) installation: fix collectiondetailedrecordpagetabs (#1496); fix + for jQuery hotkeys add-on URL (#1507); fix for MathJax OS X + install issue (#1455); support for Apache-2.4 (#1552) - *) WebJournal: better language handling in widgets; CERN-specific - translation; explicit RSS icon dimensions; fix for - CFG_TMPSHAREDDIR; fix for retrieval of deleted articles; search - select form by name + *) inveniocfg: tests runner file closure fix (#1327) - *) WebSearch: fix for webcoll grid layout markup; - get_all_field_values() typo; next-hit/previous-hit numbering fix; - respect output format content-type; washing of 'as' argument + *) InvenioConnector: fix for CDS authentication; mechanize dependency - *) WebSession: fix for login-with-referer issue; fix for - merge_usera_into_userb() + *) inveniogc: consider journal cache subdirs - *) WebStyle: dumb page loading fix Google Analytics documentation - update; memory leak fix in session handling; new /ping handler; - removal of excess language box call; req.is_https() fix; + *) memoiseutils: initial release - *) WebSubmit: display login link on /submit page; fix for - Send_APP_Mail function; fix the approval URL for publiline + *) OAIHarvest: fix path for temporary authorlists; holding-pen UI + bugfixes (#1401) - *) WebUser: fix for referer URL protocol + *) OAIRepository: CFG_OAI_REPOSITORY_MARCXML_SIZE; no bibupload -n + + *) RefExtract: replacement of CDS Invenio by Invenio + + *) WebAccess: fix variable parsing in robot auth (#1456); IP-based + rules and offline user fix (#1233); replacement of CDS Invenio by + InveniO + + *) WebApiKey: renames unit tests to regression tests (#1324) + + *) WebAuthorProfile: fix XSS vulnerability + + *) WebComment: escape review "title"; escape review "title" + + *) WebSearch: 410 HTTP code for deleted records; advanced search + notification if no hits; better cleaning of word patterns; fix + infinite synonym lookup cases (#804); handles "find feb 12" + (#948); nicer browsing of fuzzy indexes (#1348); respect default + `rg` in Advanced Search; SPIRES date math search fixes (#431 + #948); SPIRES invalid date search fix (#1467); tweaks SPIRES + two-digit search; unit test disabling for CFG_CERN_SITE; unit test + update (#1326) + + *) WebSession: fix for list of admin activities (#1444); login_method + changes; unit vs regression test suite cleanup + + *) WebStat: use CFG_JOURNAL_TAG instead of 773/909C4 (#546) + + *) WebSubmit: new websubmitadmin CLI (#1334); replacement of CDS + Invenio by Invenio; use PyPDF2 if available Download: --------- - - - + + + Installation notes: ------------------- Please follow the INSTALL file bundled in the distribution tarball. Upgrade notes: -------------- Please proceed as follows: a) Stop your bibsched queue and your Apache server. b) Install the update: - $ tar xvfz invenio-1.1.1.tar.gz - $ cd invenio-1.1.1 + $ tar xvfz invenio-1.1.2.tar.gz + $ cd invenio-1.1.2 $ sudo rsync -a /opt/invenio/etc/ /opt/invenio/etc.OLD/ $ sh /opt/invenio/etc/build/config.nice $ make $ make check-upgrade $ sudo -u www-data make install $ sudo rsync -a /opt/invenio/etc.OLD/ \ - --exclude invenio.conf \ - --exclude bibformat/format_templates/Default_HTML_meta.bft \ + --exclude invenio-autotools.conf \ + --exclude bibformat/format_templates/Default_HTML_brief.bft \ /opt/invenio/etc/ # Note: if you are upgrading from previous stable release # series (v0.99 or v1.0), please don't rsync but diff, in order # to inspect changes and adapt your old configuration to the # new v1.1 release series. $ sudo -u www-data /opt/invenio/bin/inveniocfg --update-all $ sudo -u www-data /opt/invenio/bin/inveniocfg --upgrade - c) Restart your Apache server and your bibsched queue. + c) Update your MathJax and CKeditor plugins: + + $ sudo -u www-data make install-mathjax-plugin + $ sudo -u www-data make install-ckeditor-plugin + + d) Restart your Apache server and your bibsched queue. - end of file - \ No newline at end of file diff --git a/modules/webjournal/lib/webjournal_regression_tests.py b/modules/webjournal/lib/webjournal_regression_tests.py index 0455207fb..a3ec223bc 100644 --- a/modules/webjournal/lib/webjournal_regression_tests.py +++ b/modules/webjournal/lib/webjournal_regression_tests.py @@ -1,416 +1,416 @@ # -*- coding: utf-8 -*- ## ## This file is part of Invenio. ## Copyright (C) 2009, 2010, 2011, 2012 CERN. ## ## Invenio is free software; you can redistribute it and/or ## modify it under the terms of the GNU General Public License as ## published by the Free Software Foundation; either version 2 of the ## License, or (at your option) any later version. ## ## Invenio is distributed in the hope that it will be useful, but ## WITHOUT ANY WARRANTY; without even the implied warranty of ## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ## General Public License for more details. ## ## You should have received a copy of the GNU General Public License ## along with Invenio; if not, write to the Free Software Foundation, Inc., ## 59 Temple Place, Suite 330, Boston, MA 02111-1307, USA. """WebJournal Regression Test Suite.""" __revision__ = "$Id$" import datetime import unittest import urllib from invenio.search_engine import record_public_p from invenio import webjournal_utils as wju from invenio.config import CFG_SITE_URL, \ CFG_SITE_LANG, \ CFG_SITE_SUPPORT_EMAIL, \ CFG_PREFIX, \ CFG_DEVEL_SITE from invenio.testutils import make_test_suite, run_test_suite, \ test_web_page_content, merge_error_messages class ArticlesRelated(unittest.TestCase): """Functions about articles""" def test_is_new_article(self): """webjournal - checks if an article is new or not """ article = wju.is_new_article('AtlantisTimes', '03/2009', 99) self.assertEqual(article, False) article = wju.is_new_article('AtlantisTimes', '03/2009', 103) self.assertEqual(article, True) class CategoriesRelated(unittest.TestCase): """Functions about journal categories""" def test_get_journal_categories(self): """webjournal - returns all categories for a given issue""" journal1 = wju.get_journal_categories('AtlantisTimes', '03/2009') self.assertEqual(journal1[0], 'News') self.assertEqual(journal1[1], 'Science') journal2 = wju.get_journal_categories('AtlantisTimes', ) self.assertEqual(journal2[0], 'News') self.assertEqual(journal2[1], 'Science') self.assertEqual(journal2[2], 'Arts') def test_get_category_query(self): """webjournal - returns the category definition """ self.assertEqual(wju.get_category_query('AtlantisTimes', 'News'), '980__a:ATLANTISTIMESNEWS or 980__a:ATLANTISTIMESNEWSDRAFT') self.assertEqual(wju.get_category_query('AtlantisTimes', 'Science'), '980__a:ATLANTISTIMESSCIENCE or 980__a:ATLANTISTIMESSCIENCEDRAFT') class JournalConfigVars(unittest.TestCase): """Functions to get journal variables """ def test_get_xml_from_config(self): """webjournal - returns values from the journal configuration file """ value = wju.get_xml_from_config(["submission/doctype"], 'AtlantisTimes') self.assertEqual(value.values()[0], ['DEMOJRN']) self.assertEqual(value.keys(), ['submission/doctype']) value = wju.get_xml_from_config(["submission/identifier_element"], 'AtlantisTimes') self.assertEqual(value.values()[0], ['DEMOJRN_RN']) self.assertEqual(value.keys(), ['submission/identifier_element']) value = wju.get_xml_from_config(["draft_image_access_policy"], 'AtlantisTimes') self.assertEqual(value.values()[0], ['allow']) def test_get_journal_issue_field(self): """webjournal - returns the MARC field """ value = wju.get_journal_issue_field('AtlantisTimes') self.assertEqual(value, '773__n') def test_get_journal_css_url(self): """webjournal - returns URL to this journal's CSS """ self.assertEqual(wju.get_journal_css_url('AtlantisTimes', type='screen'), CFG_SITE_URL + '/img/AtlantisTimes.css') def test_get_journal_submission_params(self): """webjournal - returns params for the submission of articles """ submissions = wju.get_journal_submission_params('AtlantisTimes') self.assertEqual(submissions[0], 'DEMOJRN') self.assertEqual(submissions[1], 'DEMOJRN_RN') self.assertEqual(submissions[2], '037__a') def test_get_journal_draft_keyword_to_remove(self): """webjournal - returns the keyword to removed in order to move the article from Draft to Ready """ self.assertEqual(wju.get_journal_draft_keyword_to_remove('AtlantisTimes'), 'DRAFT') def test_get_journal_alert_sender_email(self): """webjournal - returns the email address used to send of the alert email. """ self.assertEqual(wju.get_journal_alert_sender_email('AtlantisTimes'), CFG_SITE_SUPPORT_EMAIL) def test_get_journal_alert_recipient_email(self): """webjournal - returns the default email address of the recipients of the email""" if CFG_DEVEL_SITE: self.assertEqual(wju.get_journal_alert_recipient_email('AtlantisTimes'), '') else: self.assertEqual(wju.get_journal_alert_recipient_email('AtlantisTimes'), 'recipients@atlantis.atl') def test_get_journal_template(self): """webjournal - returns the journal templates name for the given template type""" value = wju.get_journal_template('index', 'AtlantisTimes', ln=CFG_SITE_LANG) self.assertEqual(value, 'webjournal/AtlantisTimes_Index.bft') def test_get_journal_name_intl(self): """webjournal - returns the nice name of the journal """ name = wju.get_journal_name_intl('AtlantisTimes', ln=CFG_SITE_LANG) self.assertEqual(name, 'Atlantis Times') def test_get_journal_languages(self): """webjournal - returns the list of languages defined for this journal""" lang = wju.get_journal_languages('AtlantisTimes') self.assertEqual(lang[0], 'en') self.assertEqual(lang[1], 'fr') def test_get_journal_issue_grouping(self): """webjournal - returns the number of issue that are typically released at the same time""" issue = wju.get_journal_issue_grouping('AtlantisTimes') self.assertEqual(issue, 2) def test_get_journal_nb_issues_per_year(self): """webjournal - returns the default number of issues per year for this journal""" nb = wju.get_journal_nb_issues_per_year('AtlantisTimes') self.assertEqual(nb, 52) def test_get_journal_preferred_language(self): """webjournal - returns the most adequate language to display the journal, given a language """ value = wju.get_journal_preferred_language('AtlantisTimes', 'fr') self.assertEqual(value, 'fr') value = wju.get_journal_preferred_language('AtlantisTimes', 'it') self.assertEqual(value, 'en') value = wju.get_journal_preferred_language('AtlantisTimes', 'hello') self.assertEqual(value, 'en') def test_get_unreleased_issue_hiding_mode(self): """webjournal - returns how unreleased issue should be treated""" value = wju.get_unreleased_issue_hiding_mode('AtlantisTimes') self.assertEqual(value, 'all') def test_get_first_issue_from_config(self): """webjournal - returns the first issue as defined from config""" issue = wju.get_first_issue_from_config('AtlantisTimes') self.assertEqual(issue, '02/2009') class TimeIssueFunctions(unittest.TestCase): """Functions about time, using issues""" def test_get_current_issue(self): """webjournal - returns the current issue of a journal """ issue = wju.get_current_issue('en', 'AtlantisTimes') self.assertEqual(issue, '03/2009') def test_get_all_released_issues(self): """webjournal - returns the list of released issue""" issues = wju.get_all_released_issues('AtlantisTimes') self.assertEqual(issues[0], '03/2009') self.assertEqual(issues[1], '02/2009') def test_get_next_journal_issues(self): """webjournal - this function suggests the 'n' next issue numbers """ issues = wju.get_next_journal_issues('03/2009', 'AtlantisTimes', n=2) self.assertEqual(issues[0], '04/2009') self.assertEqual(issues[1], '05/2009') def test_get_grouped_issues(self): """webjournal - returns all the issues grouped with a given one""" issues = wju.get_grouped_issues('AtlantisTimes', '03/2009') self.assertEqual(issues[0], '02/2009') self.assertEqual(issues[1], '03/2009') def test_get_issue_number_display(self): """webjournal - returns the display string for a given issue number""" issue_nb = wju.get_issue_number_display('03/2009', 'AtlantisTimes', ln=CFG_SITE_LANG) self.assertEqual(issue_nb, '02-03/2009') def test_make_issue_number(self): """webjournal - creates a normalized issue number representation""" issue = wju.make_issue_number('AtlantisTimes', 03, 2009, for_url_p=False) self.assertEqual(issue, '03/2009') issue = wju.make_issue_number('AtlantisTimes', 06, 2009, for_url_p=False) self.assertEqual(issue, '06/2009') issue = wju.make_issue_number('AtlantisTimes', 03, 2008, for_url_p=False) self.assertEqual(issue, '03/2008') def test_get_release_datetime(self): """webjournal - gets the date at which an issue was released from the DB""" value = wju.get_release_datetime('03/2009', 'AtlantisTimes', ln=CFG_SITE_LANG) self.assertEqual(value, datetime.datetime(2009, 1, 16, 0, 0)) def test_get_announcement_datetime(self): """webjournal - get the date at which an issue was announced through the alert system""" value = wju.get_announcement_datetime('03/2009', 'AtlantisTimes', ln=CFG_SITE_LANG) self.assertEqual(value, None) def test_datetime_to_issue(self): """webjournal - returns the issue corresponding to the given datetime object""" date_value = datetime.datetime(2009, 7, 16, 13, 39, 46, 426373) value = wju.datetime_to_issue(date_value, 'AtlantisTimes') self.assertEqual(value, None) def test_issue_to_datetime(self): """webjournal - returns the *theoretical* date of release for given issue""" issue = wju.issue_to_datetime('03/2009', 'AtlantisTimes', granularity=None) self.assertEqual(issue, datetime.datetime(2009, 1, 19, 0, 0)) def test_get_number_of_articles_for_issue(self): """webjournal - returns a dictionary with all categories and number of articles in each category""" value = wju.get_number_of_articles_for_issue('03/2009', 'AtlantisTimes', ln=CFG_SITE_LANG) self.assertEqual(value.values()[0], 3) self.assertEqual(value.values()[1], 2) self.assertEqual(value.keys()[0], 'News') self.assertEqual(value.keys()[1], 'Science') def test_is_recid_in_released_issue(self): """webjournal - check identification of records as part of a released issue""" for recid in xrange(1, 99): # Not articles self.assertEqual(wju.is_recid_in_released_issue(recid), False) for recid in xrange(99, 104): # Article published and well categorized/indexed self.assertEqual(wju.is_recid_in_released_issue(recid), True) # Even though article is not in public collection (yet?), it # is part of a released issue self.assertEqual(wju.is_recid_in_released_issue(111), True) # Article is not part of public collection, and is not part of # a released issue self.assertEqual(wju.is_recid_in_released_issue(112), False) def test_article_in_unreleased_issue(self): """webjournal - check access to unreleased article""" # Record is not public self.assertEqual(record_public_p(112), False) # Unreleased article is not visible to guest error_messages = test_web_page_content(CFG_SITE_URL + '/journal/AtlantisTimes/2009/06/News/112' , expected_text=["A naturalist's voyage around the world"], unexpected_text=['Galapagos Archipelago']) if error_messages: self.fail(merge_error_messages(error_messages)) # Unreleased article is visible to editor error_messages = test_web_page_content(CFG_SITE_URL + '/journal/AtlantisTimes/2009/06/News/112', username='balthasar', password='b123althasar', expected_text=['Galapagos Archipelago'], unexpected_text=['This file is restricted', 'You are not authorized']) if error_messages: self.fail(merge_error_messages(error_messages)) def test_restricted_article_in_released_issue(self): """webjournal - check access to restricted article in released issue""" # Record is not public self.assertEqual(record_public_p(112), False) # Released article (even if restricted) is visible to guest error_messages = test_web_page_content(CFG_SITE_URL + '/journal/AtlantisTimes/2009/03/Science/111' , expected_text=["Scissor-beak"], unexpected_text=["A naturalist's voyage around the world"]) if error_messages: self.fail(merge_error_messages(error_messages)) class JournalRelated(unittest.TestCase): """Functions about journal""" def test_get_journal_info_path(self): """webjournal - returns the path to the info file of the given journal""" info = wju.get_journal_info_path('AtlantisTimes') path = CFG_PREFIX + '/var/cache/webjournal/AtlantisTimes/info.dat' self.assertEqual(info, path) def test_get_journal_article_cache_path(self): """webjournal - returns the path to cache file of the articles of a given issue""" info = wju.get_journal_article_cache_path('AtlantisTimes', '03/2009') - path = CFG_PREFIX + '/var/cache/webjournal/AtlantisTimes/03_2009_articles_cache.dat' + path = CFG_PREFIX + '/var/cache/webjournal/AtlantisTimes/2009/03/articles_cache.dat' self.assertEqual(info, path) def test_get_journal_id(self): """webjournal - get the id for this journal from the DB""" jrnid = wju.get_journal_id('AtlantisTimes', ln=CFG_SITE_LANG) self.assertEqual(jrnid, 1) def test_guess_journal_name(self): """webjournal - tries to take a guess what a user was looking for on the server if not providing a name for the journal""" name = wju.guess_journal_name('en', journal_name=None) self.assertEqual(name, 'AtlantisTimes' ) def test_get_journals_ids_and_names(self): """webjournal - returns the list of existing journals IDs and names""" ids_names = wju.get_journals_ids_and_names() self.assertEqual(ids_names[0].values(), [1, 'AtlantisTimes']) self.assertEqual(ids_names[0].keys(), ['journal_id', 'journal_name']) def test_parse_url_string(self): """webjournal - parses any url string given in webjournal""" d = wju.parse_url_string("/journal/AtlantisTimes/2009/03/News/?ln=en") self.assertEqual(d['category'], 'News') self.assertEqual(d['issue_year'], 2009) self.assertEqual(d['ln'], 'en') self.assertEqual(d['issue_number'], 3) self.assertEqual(d['journal_name'], 'AtlantisTimes') self.assertEqual(d['issue'], '03/2009') d = wju.parse_url_string("/journal/AtlantisTimes/2009/03/Science?ln=en") self.assertEqual(d['category'], 'Science') self.assertEqual(d['issue_year'], 2009) self.assertEqual(d['ln'], 'en') self.assertEqual(d['issue_number'], 3) self.assertEqual(d['journal_name'], 'AtlantisTimes') self.assertEqual(d['issue'], '03/2009') d = wju.parse_url_string("/journal/AtlantisTimes/2009/03/News/97?ln=en") self.assertEqual(d['category'], 'News') self.assertEqual(d['issue_year'], 2009) self.assertEqual(d['ln'], 'en') self.assertEqual(d['issue_number'], 3) self.assertEqual(d['recid'], 97) self.assertEqual(d['journal_name'], 'AtlantisTimes') self.assertEqual(d['issue'], '03/2009') try: wju.parse_url_string("/journal/fictivejournal/2009/03/News/97?ln=en") dont_find_journal = 'not' except: dont_find_journal = 'ok' self.assertEqual(dont_find_journal, 'ok') class HtmlCachingFunction(unittest.TestCase): """HTML caching functions""" def setUp(self): "Access some URL for cache to be generated" urllib.urlopen(CFG_SITE_URL + '/journal/AtlantisTimes/2009/03/News') urllib.urlopen(CFG_SITE_URL + '/journal/AtlantisTimes/2009/03/News/103') def test_get_index_page_from_cache(self): """webjournal - function to get an index page from the cache""" value = wju.get_index_page_from_cache('AtlantisTimes', 'News', '03/2009', 'en') assert("Atlantis (Timaeus)" in value) def test_get_article_page_from_cache(self): """webjournal - gets an article view of a journal from cache""" value = wju.get_article_page_from_cache('AtlantisTimes', 'News', 103, '03/2009', 'en') assert("April 14th, 1832.—Leaving Socêgo, we rode to another estate on the Rio Macâe" in value) def test_clear_cache_for_issue(self): """webjournal - clears the cache of a whole issue""" value = wju.clear_cache_for_issue('AtlantisTimes', '03/2009') self.assertEqual(value, True) class FormattingElements(unittest.TestCase): """Test how formatting elements behave in various contexts""" def test_language_handling_in_journal(self): """webjournal - check washing of ln parameter in /journal handler""" error_messages = test_web_page_content(CFG_SITE_URL + '/journal/AtlantisTimes/2009/03/News/103?verbose=9&ln=hello' , expected_text=["we rode to another estate", "The forest abounded with beautiful objects"], unexpected_text=["Error when evaluating format element WEBJOURNAL_"]) if error_messages: self.fail(merge_error_messages(error_messages)) def test_language_handling_in_record(self): """webjournal - check washing of ln parameter in /record handler""" error_messages = test_web_page_content(CFG_SITE_URL + '/record/103?verbose=9&ln=hello' , expected_text=["we rode to another estate", "The forest abounded with beautiful objects"], unexpected_text=["Error when evaluating format element WEBJOURNAL_"]) if error_messages: self.fail(merge_error_messages(error_messages)) def test_language_handling_in_whatsnew_widget(self): """webjournal - check handling of ln parameter in "what's new" widget""" error_messages = test_web_page_content(CFG_SITE_URL + '/journal/AtlantisTimes/2009/03/News?ln=fr' , expected_link_label="Scissor-beak", expected_link_target=CFG_SITE_URL + "/journal/AtlantisTimes/2009/03/Science/111?ln=fr") if error_messages: self.fail(merge_error_messages(error_messages)) TEST_SUITE = make_test_suite(ArticlesRelated, CategoriesRelated, JournalConfigVars, TimeIssueFunctions, JournalRelated, HtmlCachingFunction, FormattingElements) if __name__ == "__main__": run_test_suite(TEST_SUITE, warn_user=True) diff --git a/modules/webjournal/lib/webjournal_utils.py b/modules/webjournal/lib/webjournal_utils.py index 1405597a7..9f969c3a7 100644 --- a/modules/webjournal/lib/webjournal_utils.py +++ b/modules/webjournal/lib/webjournal_utils.py @@ -1,1783 +1,1809 @@ # -*- coding: utf-8 -*- ## ## This file is part of Invenio. ## Copyright (C) 2007, 2008, 2009, 2010, 2011 CERN. ## ## Invenio is free software; you can redistribute it and/or ## modify it under the terms of the GNU General Public License as ## published by the Free Software Foundation; either version 2 of the ## License, or (at your option) any later version. ## ## Invenio is distributed in the hope that it will be useful, but ## WITHOUT ANY WARRANTY; without even the implied warranty of ## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ## General Public License for more details. ## ## You should have received a copy of the GNU General Public License ## along with Invenio; if not, write to the Free Software Foundation, Inc., ## 59 Temple Place, Suite 330, Boston, MA 02111-1307, USA. """ Various utilities for WebJournal, e.g. config parser, etc. """ import time import datetime import calendar import re import os import cPickle import math import urllib from MySQLdb import OperationalError from xml.dom import minidom from urlparse import urlparse from invenio.config import \ CFG_ETCDIR, \ CFG_SITE_URL, \ CFG_CACHEDIR, \ CFG_SITE_LANG, \ CFG_ACCESS_CONTROL_LEVEL_SITE, \ CFG_SITE_SUPPORT_EMAIL, \ CFG_DEVEL_SITE, \ CFG_CERN_SITE from invenio.dbquery import run_sql from invenio.bibformat_engine import BibFormatObject from invenio.search_engine import search_pattern, record_exists from invenio.messages import gettext_set_language from invenio.errorlib import register_exception from invenio.urlutils import make_invenio_opener WEBJOURNAL_OPENER = make_invenio_opener('WebJournal') ########################### REGULAR EXPRESSIONS ###################### header_pattern = re.compile('\s*(?P
.*?)\s*

') header_pattern2 = re.compile('(?P
.*?)

') para_pattern = re.compile('(?P.+?)

', re.DOTALL) img_pattern = re.compile('\S+?)("|\'|\s).*?/>', re.DOTALL) image_pattern = re.compile(r''' (\S*)["']?>)?# get the link location for the image \s*# after each tag we can have arbitrary whitespaces
# the image is always centered \s* \S*)\s*border=1\s*(/)?># getting the image itself \s*
\s* ()? (
|
|
)*# the caption can be separated by any nr of line breaks ( \s* \s*
(?P.*?)
# getting the caption \s*
\s*
)?''', re.DOTALL | re.VERBOSE | re.IGNORECASE ) #' ############################## FEATURED RECORDS ###################### def get_featured_records(journal_name): """ Returns the 'featured' records i.e. records chosen to be displayed with an image on the main page, in the widgets section, for the given journal. parameter: journal_name - (str) the name of the journal for which we want to get the featured records returns: list of tuples (recid, img_url) """ try: feature_file = open('%s/webjournal/%s/featured_record' % \ (CFG_ETCDIR, journal_name)) except: return [] records = feature_file.readlines() return [(record.split('---', 1)[0], record.split('---', 1)[1]) \ for record in records if "---" in record] def add_featured_record(journal_name, recid, img_url): """ Adds the given record to the list of featured records of the given journal. parameters: journal_name - (str) the name of the journal to which the record should be added. recid - (int) the record id of the record to be featured. img_url - (str) a url to an image icon displayed along the featured record. returns: 0 if everything went ok 1 if record is already in the list 2 if other problems """ # Check that record is not already there featured_records = get_featured_records(journal_name) for featured_recid, featured_img in featured_records: if featured_recid == str(recid): return 1 try: fptr = open('%s/webjournal/%s/featured_record' % (CFG_ETCDIR, journal_name), "a") fptr.write(str(recid) + '---' + img_url + '\n') fptr.close() except: return 2 return 0 def remove_featured_record(journal_name, recid): """ Removes the given record from the list of featured records of the given journal. parameters: journal_name - (str) the name of the journal to which the record should be added. recid - (int) the record id of the record to be featured. """ featured_records = get_featured_records(journal_name) try: fptr = open('%s/webjournal/%s/featured_record' % (CFG_ETCDIR, journal_name), "w") for featured_recid, featured_img in featured_records: if str(featured_recid) != str(recid): fptr.write(str(featured_recid) + '---' + featured_img + \ '\n') fptr.close() except: return 1 return 0 ############################ ARTICLES RELATED ######################## def get_order_dict_from_recid_list(recids, journal_name, issue_number, newest_first=False, newest_only=False): """ Returns the ordered list of input recids, for given 'issue_number'. Since there might be several articles at the same position, the returned structure is a dictionary with keys being order number indicated in record metadata, and values being list of recids for this order number (recids for one position are ordered from highest to lowest recid). Eg: {'1': [2390, 2386, 2385], '3': [2388], '2': [2389], '4': [2387]} Parameters: recids - a list of all recid's that should be brought into order journal_name - the name of the journal issue_number - *str* the issue_number for which we are deriving the order newest_first - *bool* if True, new articles should be placed at beginning of the list. If so, their position/order will be negative integers newest_only - *bool* if only new articles should be returned Returns: ordered_records: a dictionary with the recids ordered by keys """ ordered_records = {} ordered_new_records = {} records_without_defined_order = [] new_records_without_defined_order = [] for record in recids: temp_rec = BibFormatObject(record) articles_info = temp_rec.fields('773__') for article_info in articles_info: if article_info.get('n', '') == issue_number or \ '0' + article_info.get('n', '') == issue_number: if article_info.has_key('c') and \ article_info['c'].isdigit(): order_number = int(article_info.get('c', '')) if (newest_first or newest_only) and \ is_new_article(journal_name, issue_number, record): if ordered_new_records.has_key(order_number): ordered_new_records[order_number].append(record) else: ordered_new_records[order_number] = [record] elif not newest_only: if ordered_records.has_key(order_number): ordered_records[order_number].append(record) else: ordered_records[order_number] = [record] else: # No order? No problem! Append it at the end. if newest_first and is_new_article(journal_name, issue_number, record): new_records_without_defined_order.append(record) elif not newest_only: records_without_defined_order.append(record) # Append records without order at the end of the list if records_without_defined_order: if ordered_records: ordered_records[max(ordered_records.keys()) + 1] = records_without_defined_order else: ordered_records[1] = records_without_defined_order # Append new records without order at the end of the list of new # records if new_records_without_defined_order: if ordered_new_records: ordered_new_records[max(ordered_new_records.keys()) + 1] = new_records_without_defined_order else: ordered_new_records[1] = new_records_without_defined_order # Append new records at the beginning of the list of 'old' # records. To do so, use negative integers if ordered_new_records: highest_new_record_order = max(ordered_new_records.keys()) for order, new_records in ordered_new_records.iteritems(): ordered_records[- highest_new_record_order + order - 1] = new_records for (order, records) in ordered_records.iteritems(): # Reverse so that if there are several articles at same # positon, newest appear first records.reverse() return ordered_records def get_journal_articles(journal_name, issue, category, newest_first=False, newest_only=False): """ Returns the recids in given category and journal, for given issue number. The returned recids are grouped according to their 773__c field. Example of returned value: {'1': [2390, 2386, 2385], '3': [2388], '2': [2389], '4': [2387]} Parameters: journal_name - *str* the name of the journal (as used in URLs) issue - *str* the issue. Eg: "08/2007" category - *str* the name of the category newest_first - *bool* if True, new articles should be placed at beginning of the list. If so, their position/order will be negative integers newest_only - *bool* if only new articles should be returned """ use_cache = True current_issue = get_current_issue(CFG_SITE_LANG, journal_name) if issue_is_later_than(issue, current_issue): # If we are working on unreleased issue, do not use caching # mechanism use_cache = False if use_cache: cached_articles = _get_cached_journal_articles(journal_name, issue, category) if cached_articles is not None: ordered_articles = get_order_dict_from_recid_list(cached_articles, journal_name, issue, newest_first, newest_only) return ordered_articles # Retrieve the list of rules that map Category -> Search Pattern. # Keep only the rule matching our category config_strings = get_xml_from_config(["record/rule"], journal_name) category_to_search_pattern_rules = config_strings["record/rule"] try: matching_rule = [rule.split(',', 1) for rule in \ category_to_search_pattern_rules \ if rule.split(',')[0] == category] except: return [] recids_issue = search_pattern(p='773__n:%s -980:DELETED' % issue) recids_rule = search_pattern(p=matching_rule[0][1]) if issue[0] == '0': # search for 09/ and 9/ recids_issue.union_update(search_pattern(p='773__n:%s -980:DELETED' % issue.lstrip('0'))) recids_rule.intersection_update(recids_issue) recids = [recid for recid in recids_rule if record_exists(recid) == 1] if use_cache: _cache_journal_articles(journal_name, issue, category, recids) ordered_articles = get_order_dict_from_recid_list(recids, journal_name, issue, newest_first, newest_only) return ordered_articles def _cache_journal_articles(journal_name, issue, category, articles): """ Caches given articles IDs. """ journal_cache_path = get_journal_article_cache_path(journal_name, issue) try: journal_cache_file = open(journal_cache_path, 'r') journal_info = cPickle.load(journal_cache_file) journal_cache_file.close() except cPickle.PickleError, e: journal_info = {} except IOError: journal_info = {} except EOFError: journal_info = {} except ValueError: journal_info = {} if not journal_info.has_key('journal_articles'): journal_info['journal_articles'] = {} journal_info['journal_articles'][category] = articles # Create cache directory if it does not exist journal_cache_dir = os.path.dirname(journal_cache_path) if not os.path.exists(journal_cache_dir): try: os.makedirs(journal_cache_dir) except: return False journal_cache_file = open(journal_cache_path, 'w') cPickle.dump(journal_info, journal_cache_file) journal_cache_file.close() return True def _get_cached_journal_articles(journal_name, issue, category): """ Retrieve the articles IDs cached for this journal. Returns None if cache does not exist or more than 5 minutes old """ # Check if our cache is more or less up-to-date (not more than 5 # minutes old) try: journal_cache_path = get_journal_article_cache_path(journal_name, issue) last_update = os.path.getctime(journal_cache_path) except Exception, e : return None now = time.time() if (last_update + 5*60) < now: return None # Get from cache try: journal_cache_file = open(journal_cache_path, 'r') journal_info = cPickle.load(journal_cache_file) journal_articles = journal_info.get('journal_articles', {}).get(category, None) journal_cache_file.close() except cPickle.PickleError, e: journal_articles = None except IOError: journal_articles = None except EOFError: journal_articles = None except ValueError: journal_articles = None return journal_articles def is_new_article(journal_name, issue, recid): """ Check if given article should be considered as new or not. New articles are articles that have never appeared in older issues than given one. """ article_found_in_older_issue = False temp_rec = BibFormatObject(recid) publication_blocks = temp_rec.fields('773__') for publication_block in publication_blocks: this_issue_number, this_issue_year = issue.split('/') issue_number, issue_year = publication_block.get('n', '/').split('/', 1) if int(issue_year) < int(this_issue_year): # Found an older issue article_found_in_older_issue = True break elif int(issue_year) == int(this_issue_year) and \ int(issue_number) < int(this_issue_number): # Found an older issue article_found_in_older_issue = True break return not article_found_in_older_issue ############################ CATEGORIES RELATED ###################### def get_journal_categories(journal_name, issue=None): """ List the categories for the given journal and issue. Returns categories in same order as in config file. Parameters: journal_name - *str* the name of the journal (as used in URLs) issue - *str* the issue. Eg:'08/2007'. If None, consider all categories defined in journal config """ categories = [] current_issue = get_current_issue(CFG_SITE_LANG, journal_name) config_strings = get_xml_from_config(["record/rule"], journal_name) all_categories = [rule.split(',')[0] for rule in \ config_strings["record/rule"]] if issue is None: return all_categories for category in all_categories: recids = get_journal_articles(journal_name, issue, category) if len(recids.keys()) > 0: categories.append(category) return categories def get_category_query(journal_name, category): """ Returns the category definition for the given category and journal name Parameters: journal_name - *str* the name of the journal (as used in URLs) categoy - *str* a category name, as found in the XML config """ config_strings = get_xml_from_config(["record/rule"], journal_name) category_to_search_pattern_rules = config_strings["record/rule"] try: matching_rule = [rule.split(',', 1)[1].strip() for rule in \ category_to_search_pattern_rules \ if rule.split(',')[0] == category] except: return None return matching_rule[0] ######################### JOURNAL CONFIG VARS ###################### cached_parsed_xml_config = {} def get_xml_from_config(nodes, journal_name): """ Returns values from the journal configuration file. The needed values can be specified by node name, or by a hierarchy of nodes names using '/' as character to mean 'descendant of'. Eg. 'record/rule' to get all the values of 'rule' tags inside the 'record' node Returns a dictionary with a key for each query and a list of strings (innerXml) results for each key. Has a special field "config_fetching_error" that returns an error when something has gone wrong. """ # Get and open the config file results = {} if cached_parsed_xml_config.has_key(journal_name): config_file = cached_parsed_xml_config[journal_name] else: config_path = '%s/webjournal/%s/%s-config.xml' % \ (CFG_ETCDIR, journal_name, journal_name) config_file = minidom.Document try: config_file = minidom.parse("%s" % config_path) except: # todo: raise exception "error: no config file found" results["config_fetching_error"] = "could not find config file" return results else: cached_parsed_xml_config[journal_name] = config_file for node_path in nodes: node = config_file for node_path_component in node_path.split('/'): # pylint: disable=E1103 # The node variable can be rewritten in the loop and therefore # its type can change. if node != config_file and node.length > 0: # We have a NodeList object: consider only first child node = node.item(0) # pylint: enable=E1103 try: node = node.getElementsByTagName(node_path_component) except: # WARNING, config did not have such value node = [] break results[node_path] = [] for result in node: try: result_string = result.firstChild.toxml(encoding="utf-8") except: # WARNING, config did not have such value continue results[node_path].append(result_string) return results def get_journal_issue_field(journal_name): """ Returns the MARC field in which this journal expects to find the issue number. Read this from the journal config file Parameters: journal_name - *str* the name of the journal (as used in URLs) """ config_strings = get_xml_from_config(["issue_number"], journal_name) try: issue_field = config_strings["issue_number"][0] except: issue_field = '773__n' return issue_field def get_journal_css_url(journal_name, type='screen'): """ Returns URL to this journal's CSS. Parameters: journal_name - *str* the name of the journal (as used in URLs) type - *str* 'screen' or 'print', depending on the kind of CSS """ config_strings = get_xml_from_config([type], journal_name) css_path = '' try: css_path = config_strings["screen"][0] except Exception: register_exception(req=None, suffix="No css file for journal %s. Is this right?" % \ journal_name) return CFG_SITE_URL + '/' + css_path def get_journal_submission_params(journal_name): """ Returns the (doctype, identifier element, identifier field) for the submission of articles in this journal, so that it is possible to build direct submission links. Parameter: journal_name - *str* the name of the journal (as used in URLs) """ doctype = '' identifier_field = '' identifier_element = '' config_strings = get_xml_from_config(["submission/doctype"], journal_name) if config_strings.get('submission/doctype', ''): doctype = config_strings['submission/doctype'][0] config_strings = get_xml_from_config(["submission/identifier_element"], journal_name) if config_strings.get('submission/identifier_element', ''): identifier_element = config_strings['submission/identifier_element'][0] config_strings = get_xml_from_config(["submission/identifier_field"], journal_name) if config_strings.get('submission/identifier_field', ''): identifier_field = config_strings['submission/identifier_field'][0] else: identifier_field = '037__a' return (doctype, identifier_element, identifier_field) def get_journal_draft_keyword_to_remove(journal_name): """ Returns the keyword that should be removed from the article metadata in order to move the article from Draft to Ready """ config_strings = get_xml_from_config(["draft_keyword"], journal_name) if config_strings.get('draft_keyword', ''): return config_strings['draft_keyword'][0] return '' def get_journal_alert_sender_email(journal_name): """ Returns the email address that should be used as send of the alert email. If not specified, use CFG_SITE_SUPPORT_EMAIL """ config_strings = get_xml_from_config(["alert_sender"], journal_name) if config_strings.get('alert_sender', ''): return config_strings['alert_sender'][0] return CFG_SITE_SUPPORT_EMAIL def get_journal_alert_recipient_email(journal_name): """ Returns the default email address of the recipients of the email Return a string of comma-separated emails. """ if CFG_DEVEL_SITE: # To be on the safe side, do not return the default alert recipients. return '' config_strings = get_xml_from_config(["alert_recipients"], journal_name) if config_strings.get('alert_recipients', ''): return config_strings['alert_recipients'][0] return '' def get_journal_collection_to_refresh_on_release(journal_name): """ Returns the list of collection to update (WebColl) upon release of an issue. """ from invenio.search_engine import collection_reclist_cache config_strings = get_xml_from_config(["update_on_release/collection"], journal_name) return [coll for coll in config_strings.get('update_on_release/collection', []) if \ collection_reclist_cache.cache.has_key(coll)] def get_journal_index_to_refresh_on_release(journal_name): """ Returns the list of indexed to update (BibIndex) upon release of an issue. """ from invenio.bibindex_engine import get_index_id_from_index_name config_strings = get_xml_from_config(["update_on_release/index"], journal_name) return [index for index in config_strings.get('update_on_release/index', []) if \ get_index_id_from_index_name(index) != ''] def get_journal_template(template, journal_name, ln=CFG_SITE_LANG): """ Returns the journal templates name for the given template type Raise an exception if template cannot be found. """ from invenio.webjournal_config import \ InvenioWebJournalTemplateNotFoundError config_strings = get_xml_from_config([template], journal_name) try: index_page_template = 'webjournal' + os.sep + \ config_strings[template][0] except: raise InvenioWebJournalTemplateNotFoundError(ln, journal_name, template) return index_page_template def get_journal_name_intl(journal_name, ln=CFG_SITE_LANG): """ Returns the nice name of the journal, translated if possible """ _ = gettext_set_language(ln) config_strings = get_xml_from_config(["niceName"], journal_name) if config_strings.get('niceName', ''): return _(config_strings['niceName'][0]) return '' def get_journal_languages(journal_name): """ Returns the list of languages defined for this journal """ config_strings = get_xml_from_config(["languages"], journal_name) if config_strings.get('languages', ''): return [ln.strip() for ln in \ config_strings['languages'][0].split(',')] return [] def get_journal_issue_grouping(journal_name): """ Returns the number of issue that are typically released at the same time. This is used if every two weeks you release an issue that should contains issue of next 2 weeks (eg. at week 16, you relase an issue named '16-17/2009') This number should help in the admin interface to guess how to release the next issue (can be overidden by user). """ config_strings = get_xml_from_config(["issue_grouping"], journal_name) if config_strings.get('issue_grouping', ''): issue_grouping = config_strings['issue_grouping'][0] if issue_grouping.isdigit() and int(issue_grouping) > 0: return int(issue_grouping) return 1 def get_journal_nb_issues_per_year(journal_name): """ Returns the default number of issues per year for this journal. This number should help in the admin interface to guess the next issue number (can be overidden by user). """ config_strings = get_xml_from_config(["issues_per_year"], journal_name) if config_strings.get('issues_per_year', ''): issues_per_year = config_strings['issues_per_year'][0] if issues_per_year.isdigit() and int(issues_per_year) > 0: return int(issues_per_year) return 52 def get_journal_preferred_language(journal_name, ln): """ Returns the most adequate language to display the journal, given a language. """ languages = get_journal_languages(journal_name) if ln in languages: return ln elif CFG_SITE_LANG in languages: return CFG_SITE_LANG elif languages: return languages else: return CFG_SITE_LANG def get_unreleased_issue_hiding_mode(journal_name): """ Returns how unreleased issue should be treated. Can be one of the following string values: 'future' - only future unreleased issues are hidden. Past unreleased one can be viewed 'all' - any unreleased issue (past and future) have to be hidden - 'none' - no unreleased issue is hidden """ config_strings = get_xml_from_config(["hide_unreleased_issues"], journal_name) if config_strings.get('hide_unreleased_issues', ''): hide_unreleased_issues = config_strings['hide_unreleased_issues'][0] if hide_unreleased_issues in ['future', 'all', 'none']: return hide_unreleased_issues return 'all' def get_first_issue_from_config(journal_name): """ Returns the first issue as defined from config. This should only be useful when no issue have been released. If not specified, returns the issue made of current week number and year. """ config_strings = get_xml_from_config(["first_issue"], journal_name) if config_strings.has_key('first_issue'): return config_strings['first_issue'][0] return time.strftime("%W/%Y", time.localtime()) ######################## TIME / ISSUE FUNCTIONS ###################### def get_current_issue(ln, journal_name): """ Returns the current issue of a journal as a string. Current issue is the latest released issue. """ journal_id = get_journal_id(journal_name, ln) try: current_issue = run_sql("""SELECT issue_number FROM jrnISSUE WHERE date_released <= NOW() AND id_jrnJOURNAL=%s ORDER BY date_released DESC LIMIT 1""", (journal_id,))[0][0] except: # start the first journal ever current_issue = get_first_issue_from_config(journal_name) run_sql("""INSERT INTO jrnISSUE (id_jrnJOURNAL, issue_number, issue_display) VALUES(%s, %s, %s)""", (journal_id, current_issue, current_issue)) return current_issue def get_all_released_issues(journal_name): """ Returns the list of released issue, ordered by release date Note that it only includes the issues that are considered as released in the DB: it will not for example include articles that have been imported in the system but not been released """ journal_id = get_journal_id(journal_name) res = run_sql("""SELECT issue_number FROM jrnISSUE WHERE id_jrnJOURNAL = %s AND UNIX_TIMESTAMP(date_released) != 0 ORDER BY date_released DESC""", (journal_id,)) if res: return [row[0] for row in res] else: return [] def get_next_journal_issues(current_issue_number, journal_name, n=2): """ This function suggests the 'n' next issue numbers """ number, year = current_issue_number.split('/', 1) number = int(number) year = int(year) number_issues_per_year = get_journal_nb_issues_per_year(journal_name) next_issues = [make_issue_number(journal_name, ((number - 1 + i) % (number_issues_per_year)) + 1, year + ((number - 1 + i) / number_issues_per_year)) \ for i in range(1, n + 1)] return next_issues def get_grouped_issues(journal_name, issue_number): """ Returns all the issues grouped with a given one. Issues are sorted from the oldest to newest one. """ grouped_issues = [] journal_id = get_journal_id(journal_name, CFG_SITE_LANG) issue_display = get_issue_number_display(issue_number, journal_name) res = run_sql("""SELECT issue_number FROM jrnISSUE WHERE id_jrnJOURNAL=%s AND issue_display=%s""", (journal_id, issue_display)) if res: grouped_issues = [row[0] for row in res] grouped_issues.sort(compare_issues) return grouped_issues def compare_issues(issue1, issue2): """ Comparison function for issues. Returns: -1 if issue1 is older than issue2 0 if issues are equal 1 if issue1 is newer than issue2 """ issue1_number, issue1_year = issue1.split('/', 1) issue2_number, issue2_year = issue2.split('/', 1) if int(issue1_year) == int(issue2_year): return cmp(int(issue1_number), int(issue2_number)) else: return cmp(int(issue1_year), int(issue2_year)) def issue_is_later_than(issue1, issue2): """ Returns true if issue1 is later than issue2 """ issue_number1, issue_year1 = issue1.split('/', 1) issue_number2, issue_year2 = issue2.split('/', 1) if int(issue_year1) > int(issue_year2): return True elif int(issue_year1) == int(issue_year2): return int(issue_number1) > int(issue_number2) else: return False def get_issue_number_display(issue_number, journal_name, ln=CFG_SITE_LANG): """ Returns the display string for a given issue number. """ journal_id = get_journal_id(journal_name, ln) issue_display = run_sql("""SELECT issue_display FROM jrnISSUE WHERE issue_number=%s AND id_jrnJOURNAL=%s""", (issue_number, journal_id)) if issue_display: return issue_display[0][0] else: # Not yet released... return issue_number def make_issue_number(journal_name, number, year, for_url_p=False): """ Creates a normalized issue number representation with given issue number (as int or str) and year (as int or str). Reverse the year and number if for_url_p is True """ number_issues_per_year = get_journal_nb_issues_per_year(journal_name) precision = len(str(number_issues_per_year)) number = int(str(number)) year = int(str(year)) if for_url_p: return ("%i/%0" + str(precision) + "i") % \ (year, number) else: return ("%0" + str(precision) + "i/%i") % \ (number, year) def get_release_datetime(issue, journal_name, ln=CFG_SITE_LANG): """ Gets the date at which an issue was released from the DB. Returns None if issue has not yet been released. See issue_to_datetime() to get the *theoretical* release time of an issue. """ journal_id = get_journal_id(journal_name, ln) try: release_date = run_sql("""SELECT date_released FROM jrnISSUE WHERE issue_number=%s AND id_jrnJOURNAL=%s""", (issue, journal_id))[0][0] except: return None if release_date: return release_date else: return None def get_announcement_datetime(issue, journal_name, ln=CFG_SITE_LANG): """ Get the date at which an issue was announced through the alert system. Return None if not announced """ journal_id = get_journal_id(journal_name, ln) try: announce_date = run_sql("""SELECT date_announced FROM jrnISSUE WHERE issue_number=%s AND id_jrnJOURNAL=%s""", (issue, journal_id))[0][0] except: return None if announce_date: return announce_date else: return None def datetime_to_issue(issue_datetime, journal_name): """ Returns the issue corresponding to the given datetime object. If issue_datetime is too far in the future or in the past, gives the best possible matching issue, or None, if it does not seem to exist. #If issue_datetime is too far in the future, return the latest #released issue. #If issue_datetime is too far in the past, return None Parameters: issue_datetime - *datetime* date of the issue to be retrieved journal_name - *str* the name of the journal (as used in URLs) """ issue_number = None journal_id = get_journal_id(journal_name) # Try to discover how much days an issue is valid nb_issues_per_year = get_journal_nb_issues_per_year(journal_name) this_year_number_of_days = 365 if calendar.isleap(issue_datetime.year): this_year_number_of_days = 366 issue_day_lifetime = math.ceil(float(this_year_number_of_days)/nb_issues_per_year) res = run_sql("""SELECT issue_number, date_released FROM jrnISSUE WHERE date_released < %s AND id_jrnJOURNAL = %s ORDER BY date_released DESC LIMIT 1""", (issue_datetime, journal_id)) if res and res[0][1]: issue_number = res[0][0] issue_release_date = res[0][1] # Check that the result is not too far in the future: if issue_release_date + datetime.timedelta(issue_day_lifetime) < issue_datetime: # In principle, the latest issue will no longer be valid # at that time return None else: # Mmh, are we too far in the past? This can happen in the case # of articles that have been imported in the system but never # considered as 'released' in the database. So we should still # try to approximate/match an issue: if round(issue_day_lifetime) in [6, 7, 8]: # Weekly issues. We can use this information to better # match the issue number issue_nb = int(issue_datetime.strftime('%W')) # = week number else: # Compute the number of days since beginning of year, and # divide by the lifetime of an issue: we get the # approximate issue_number issue_nb = math.ceil((int(issue_datetime.strftime('%j')) / issue_day_lifetime)) issue_number = ("%0" + str(len(str(nb_issues_per_year)))+ "i/%i") % (issue_nb, issue_datetime.year) # Now check if this issue exists in the system for this # journal if not get_journal_categories(journal_name, issue_number): # This issue did not exist return None return issue_number DAILY = 1 WEEKLY = 2 MONTHLY = 3 def issue_to_datetime(issue_number, journal_name, granularity=None): """ Returns the *theoretical* date of release for given issue: useful if you release on Friday, but the issue date of the journal should correspond to the next Monday. This will correspond to the next day/week/month, depending on the number of issues per year (or the 'granularity' if specified) and the release time (if close to the end of a period defined by the granularity, consider next period since release is made a bit in advance). See get_release_datetime() for the *real* release time of an issue THIS FUNCTION SHOULD ONLY BE USED FOR INFORMATIVE DISPLAY PURPOSE, AS IT GIVES APPROXIMATIVE RESULTS. Do not use it to make decisions. Parameters: issue_number - *str* issue number to consider journal_name - *str* the name of the journal (as used in URLs) granularity - *int* the granularity to consider """ # If we have released, we can use this information. Otherwise we # have to approximate. issue_date = get_release_datetime(issue_number, journal_name) if not issue_date: # Approximate release date number, year = issue_number.split('/') number = int(number) year = int(year) nb_issues_per_year = get_journal_nb_issues_per_year(journal_name) this_year_number_of_days = 365 if calendar.isleap(year): this_year_number_of_days = 366 issue_day_lifetime = float(this_year_number_of_days)/nb_issues_per_year # Compute from beginning of the year issue_date = datetime.datetime(year, 1, 1) + \ datetime.timedelta(days=int(round((number - 1) * issue_day_lifetime))) # Okay, but if last release is not too far in the past, better # compute from the release. current_issue = get_current_issue(CFG_SITE_LANG, journal_name) current_issue_time = get_release_datetime(current_issue, journal_name) if current_issue_time.year == issue_date.year: current_issue_number, current_issue_year = current_issue.split('/') current_issue_number = int(current_issue_number) # Compute from last release issue_date = current_issue_time + \ datetime.timedelta(days=int((number - current_issue_number) * issue_day_lifetime)) # If granularity is not specifed, deduce from config if granularity is None: nb_issues_per_year = get_journal_nb_issues_per_year(journal_name) if nb_issues_per_year > 250: granularity = DAILY elif nb_issues_per_year > 40: granularity = WEEKLY else: granularity = MONTHLY # Now we can adapt the date to match the granularity if granularity == DAILY: if issue_date.hour >= 15: # If released after 3pm, consider it is the issue of the next # day issue_date = issue_date + datetime.timedelta(days=1) elif granularity == WEEKLY: (year, week_nb, day_nb) = issue_date.isocalendar() if day_nb > 4: # If released on Fri, Sat or Sun, consider that it is next # week's issue. issue_date = issue_date + datetime.timedelta(weeks=1) # Get first day of the week issue_date = issue_date - datetime.timedelta(days=issue_date.weekday()) else: if issue_date.day > 22: # If released last week of the month, consider release for # next month issue_date = issue_date.replace(month=issue_date.month+1) date_string = issue_date.strftime("%Y %m 1") issue_date = datetime.datetime(*(time.strptime(date_string, "%Y %m %d")[0:6])) return issue_date def get_number_of_articles_for_issue(issue, journal_name, ln=CFG_SITE_LANG): """ Function that returns a dictionary with all categories and number of articles in each category. """ all_articles = {} categories = get_journal_categories(journal_name, issue) for category in categories: all_articles[category] = len(get_journal_articles(journal_name, issue, category)) return all_articles ########################## JOURNAL RELATED ########################### def get_journal_info_path(journal_name): """ Returns the path to the info file of the given journal. The info file should be used to get information about a journal when database is not available. Returns None if path cannot be determined """ # We must make sure we don't try to read outside of webjournal # cache dir info_path = os.path.abspath("%s/webjournal/%s/info.dat" % \ (CFG_CACHEDIR, journal_name)) if info_path.startswith(CFG_CACHEDIR + '/webjournal/'): return info_path else: return None def get_journal_article_cache_path(journal_name, issue): """ Returns the path to cache file of the articles of a given issue Returns None if path cannot be determined """ # We must make sure we don't try to read outside of webjournal # cache dir - cache_path = os.path.abspath("%s/webjournal/%s/%s_articles_cache.dat" % \ + issue_number, year = issue.replace('/', '_').split('_', 1) + cache_path = os.path.abspath("%s/webjournal/%s/%s/%s/articles_cache.dat" % \ (CFG_CACHEDIR, journal_name, - issue.replace('/', '_'))) + year, issue_number)) if cache_path.startswith(CFG_CACHEDIR + '/webjournal/'): return cache_path else: return None def get_journal_id(journal_name, ln=CFG_SITE_LANG): """ Get the id for this journal from the DB. If DB is down, try to get from cache. """ journal_id = None from invenio.webjournal_config import InvenioWebJournalJournalIdNotFoundDBError if CFG_ACCESS_CONTROL_LEVEL_SITE == 2: # do not connect to the database as the site is closed for # maintenance: journal_info_path = get_journal_info_path(journal_name) try: journal_info_file = open(journal_info_path, 'r') journal_info = cPickle.load(journal_info_file) journal_id = journal_info.get('journal_id', None) except cPickle.PickleError, e: journal_id = None except IOError: journal_id = None except ValueError: journal_id = None else: try: res = run_sql("SELECT id FROM jrnJOURNAL WHERE name=%s", (journal_name,)) if len(res) > 0: journal_id = res[0][0] except OperationalError, e: # Cannot connect to database. Try to read from cache journal_info_path = get_journal_info_path(journal_name) try: journal_info_file = open(journal_info_path, 'r') journal_info = cPickle.load(journal_info_file) journal_id = journal_info['journal_id'] except cPickle.PickleError, e: journal_id = None except IOError: journal_id = None except ValueError: journal_id = None if journal_id is None: raise InvenioWebJournalJournalIdNotFoundDBError(ln, journal_name) return journal_id def guess_journal_name(ln, journal_name=None): """ Tries to take a guess what a user was looking for on the server if not providing a name for the journal, or if given journal name does not match case of original journal. """ from invenio.webjournal_config import InvenioWebJournalNoJournalOnServerError from invenio.webjournal_config import InvenioWebJournalNoNameError journals_id_and_names = get_journals_ids_and_names() if len(journals_id_and_names) == 0: raise InvenioWebJournalNoJournalOnServerError(ln) elif not journal_name and \ journals_id_and_names[0].has_key('journal_name'): return journals_id_and_names[0]['journal_name'] elif len(journals_id_and_names) > 0: possible_journal_names = [journal_id_and_name['journal_name'] for journal_id_and_name \ in journals_id_and_names \ if journal_id_and_name.get('journal_name', '').lower() == journal_name.lower()] if possible_journal_names: return possible_journal_names[0] else: raise InvenioWebJournalNoNameError(ln) else: raise InvenioWebJournalNoNameError(ln) def get_journals_ids_and_names(): """ Returns the list of existing journals IDs and names. Try to read from the DB, or from cache if DB is not accessible. """ journals = [] if CFG_ACCESS_CONTROL_LEVEL_SITE == 2: # do not connect to the database as the site is closed for # maintenance: files = os.listdir("%s/webjournal" % CFG_CACHEDIR) info_files = [path + os.sep + 'info.dat' for path in files if \ os.path.isdir(path) and \ os.path.exists(path + os.sep + 'info.dat')] for info_file in info_files: try: journal_info_file = open(info_file, 'r') journal_info = cPickle.load(journal_info_file) journal_id = journal_info.get('journal_id', None) journal_name = journal_info.get('journal_name', None) current_issue = journal_info.get('current_issue', None) if journal_id is not None and \ journal_name is not None: journals.append({'journal_id': journal_id, 'journal_name': journal_name, 'current_issue': current_issue}) except cPickle.PickleError, e: # Well, can't do anything... continue except IOError: # Well, can't do anything... continue except ValueError: continue else: try: res = run_sql("SELECT id, name FROM jrnJOURNAL ORDER BY id") for journal_id, journal_name in res: journals.append({'journal_id': journal_id, 'journal_name': journal_name}) except OperationalError, e: # Cannot connect to database. Try to read from cache files = os.listdir("%s/webjournal" % CFG_CACHEDIR) info_files = [path + os.sep + 'info.dat' for path in files if \ os.path.isdir(path) and \ os.path.exists(path + os.sep + 'info.dat')] for info_file in info_files: try: journal_info_file = open(info_file, 'r') journal_info = cPickle.load(journal_info_file) journal_id = journal_info.get('journal_id', None) journal_name = journal_info.get('journal_name', None) current_issue = journal_info.get('current_issue', None) if journal_id is not None and \ journal_name is not None: journals.append({'journal_id': journal_id, 'journal_name': journal_name, 'current_issue': current_issue}) except cPickle.PickleError, e: # Well, can't do anything... continue except IOError: # Well, can't do anything... continue except ValueError: continue return journals def parse_url_string(uri): """ Centralized function to parse any url string given in webjournal. Useful to retrieve current category, journal, etc. from within format elements The webjournal interface handler should already have cleaned the URI beforehand, so that journal name exist, issue number is correct, etc. The only remaining problem might be due to the capitalization of journal name in contact, search and popup pages, so clean the journal name. Note that language is also as returned from the URL, which might need to be filtered to match available languages (WebJournal elements can rely in bfo.lang to retrieve washed language) returns: args: all arguments in dict form """ args = {'journal_name' : '', 'issue_year' : '', 'issue_number' : None, 'issue' : None, 'category' : '', 'recid' : -1, 'verbose' : 0, 'ln' : CFG_SITE_LANG, 'archive_year' : None, 'archive_search': ''} if not uri.startswith('/journal'): # Mmh, incorrect context. Still, keep language if available url_params = urlparse(uri)[4] args['ln'] = dict([part.split('=') for part in url_params.split('&') \ if len(part.split('=')) == 2]).get('ln', CFG_SITE_LANG) return args # Take everything after journal and before first question mark splitted_uri = uri.split('journal', 1) second_part = splitted_uri[1] splitted_uri = second_part.split('?') uri_middle_part = splitted_uri[0] uri_arguments = '' if len(splitted_uri) > 1: uri_arguments = splitted_uri[1] arg_list = uri_arguments.split("&") args['ln'] = CFG_SITE_LANG args['verbose'] = 0 for arg_pair in arg_list: arg_and_value = arg_pair.split('=') if len(arg_and_value) == 2: if arg_and_value[0] == 'ln': args['ln'] = arg_and_value[1] elif arg_and_value[0] == 'verbose' and \ arg_and_value[1].isdigit(): args['verbose'] = int(arg_and_value[1]) elif arg_and_value[0] == 'archive_year' and \ arg_and_value[1].isdigit(): args['archive_year'] = int(arg_and_value[1]) elif arg_and_value[0] == 'archive_search': args['archive_search'] = arg_and_value[1] elif arg_and_value[0] == 'name': args['journal_name'] = guess_journal_name(args['ln'], arg_and_value[1]) arg_list = uri_middle_part.split("/") if len(arg_list) > 1 and arg_list[1] not in ['search', 'contact', 'popup']: args['journal_name'] = urllib.unquote(arg_list[1]) elif arg_list[1] not in ['search', 'contact', 'popup']: args['journal_name'] = guess_journal_name(args['ln'], args['journal_name']) cur_issue = get_current_issue(args['ln'], args['journal_name']) if len(arg_list) > 2: try: args['issue_year'] = int(urllib.unquote(arg_list[2])) except: args['issue_year'] = int(cur_issue.split('/')[1]) else: args['issue'] = cur_issue args['issue_year'] = int(cur_issue.split('/')[1]) args['issue_number'] = int(cur_issue.split('/')[0]) if len(arg_list) > 3: try: args['issue_number'] = int(urllib.unquote(arg_list[3])) except: args['issue_number'] = int(cur_issue.split('/')[0]) args['issue'] = make_issue_number(args['journal_name'], args['issue_number'], args['issue_year']) if len(arg_list) > 4: args['category'] = urllib.unquote(arg_list[4]) if len(arg_list) > 5: try: args['recid'] = int(urllib.unquote(arg_list[5])) except: pass args['ln'] = get_journal_preferred_language(args['journal_name'], args['ln']) # FIXME : wash arguments? return args def make_journal_url(current_uri, custom_parameters=None): """ Create a URL, using the current URI and overriding values with the given custom_parameters Parameters: current_uri - *str* the current full URI custom_parameters - *dict* a dictionary of parameters that should override those of curent_uri """ if not custom_parameters: custom_parameters = {} default_params = parse_url_string(current_uri) for key, value in custom_parameters.iteritems(): # Override default params with custom params default_params[key] = str(value) uri = CFG_SITE_URL + '/journal/' if default_params['journal_name']: uri += urllib.quote(default_params['journal_name']) + '/' if default_params['issue_year'] and default_params['issue_number']: uri += make_issue_number(default_params['journal_name'], default_params['issue_number'], default_params['issue_year'], for_url_p=True) + '/' if default_params['category']: uri += urllib.quote(default_params['category']) if default_params['recid'] and \ default_params['recid'] != -1: uri += '/' + str(default_params['recid']) printed_question_mark = False if default_params['ln']: uri += '?ln=' + default_params['ln'] printed_question_mark = True if default_params['verbose'] != 0: if printed_question_mark: uri += '&verbose=' + str(default_params['verbose']) else: uri += '?verbose=' + str(default_params['verbose']) return uri ############################ HTML CACHING FUNCTIONS ############################ def cache_index_page(html, journal_name, category, issue, ln): """ Caches the index page main area of a Bulletin (right hand menu cannot be cached) + @return: tuple (path to cache file (or None), message) """ issue = issue.replace("/", "_") + issue_number, year = issue.split("_", 1) category = category.replace(" ", "") - cache_path = os.path.abspath('%s/webjournal/%s/%s_index_%s_%s.html' % \ + cache_path = os.path.abspath('%s/webjournal/%s/%s/%s/index_%s_%s.html' % \ (CFG_CACHEDIR, journal_name, - issue, category, + year, issue_number, category, ln)) if not cache_path.startswith(CFG_CACHEDIR + '/webjournal'): # Mmh, not accessing correct path. Stop caching - return False + return (None, 'Trying to cache at wrong location: %s' % cache_path) + + cache_path_dir = os.path.dirname(cache_path) + try: + if not os.path.isdir(cache_path_dir): + os.makedirs(cache_path_dir) + cached_file = open(cache_path, "w") + cached_file.write(html) + cached_file.close() + except Exception, e: + register_exception(req=None, + prefix="Could not store index page cache", + alert_admin=True) + return (None, e) - cache_path_dir = '%s/webjournal/%s' % (CFG_CACHEDIR, journal_name) - if not os.path.isdir(cache_path_dir): - os.makedirs(cache_path_dir) - cached_file = open(cache_path, "w") - cached_file.write(html) - cached_file.close() + return (cache_path, '') def get_index_page_from_cache(journal_name, category, issue, ln): """ Function to get an index page from the cache. False if not in cache. """ issue = issue.replace("/", "_") + issue_number, year = issue.split("_", 1) category = category.replace(" ", "") - cache_path = os.path.abspath('%s/webjournal/%s/%s_index_%s_%s.html' % \ + cache_path = os.path.abspath('%s/webjournal/%s/%s/%s/index_%s_%s.html' % \ (CFG_CACHEDIR, journal_name, - issue, category, ln)) + year, issue_number, category, ln)) if not cache_path.startswith(CFG_CACHEDIR + '/webjournal'): # Mmh, not accessing correct path. Stop reading cache return False try: cached_file = open(cache_path).read() except: return False return cached_file def cache_article_page(html, journal_name, category, recid, issue, ln): """ Caches an article view of a journal. + + If cache cannot be written, a warning is reported to the admin. + @return: tuple (path to cache file (or None), message) """ issue = issue.replace("/", "_") + issue_number, year = issue.split("_", 1) category = category.replace(" ", "") - cache_path = os.path.abspath('%s/webjournal/%s/%s_article_%s_%s_%s.html' % \ + cache_path = os.path.abspath('%s/webjournal/%s/%s/%s/article_%s_%s_%s.html' % \ (CFG_CACHEDIR, journal_name, - issue, category, recid, ln)) + year, issue_number, category, recid, ln)) if not cache_path.startswith(CFG_CACHEDIR + '/webjournal'): # Mmh, not accessing correct path. Stop caching - return - cache_path_dir = '%s/webjournal/%s' % (CFG_CACHEDIR, journal_name) - if not os.path.isdir(cache_path_dir): - os.makedirs(cache_path_dir) - cached_file = open(cache_path, "w") - cached_file.write(html) - cached_file.close() + return (None, 'Trying to cache at wrong location: %s' % cache_path) + + cache_path_dir = os.path.dirname(cache_path) + try: + if not os.path.isdir(cache_path_dir): + os.makedirs(cache_path_dir) + cached_file = open(cache_path, "w") + cached_file.write(html) + cached_file.close() + except Exception, e: + register_exception(req=None, + prefix="Could not store article cache", + alert_admin=True) + return (None, e) + + return (cache_path_dir, '') NOT_FOR_ALERT_COMMENTS_RE = re.compile('.*?', re.IGNORECASE | re.DOTALL) def get_article_page_from_cache(journal_name, category, recid, issue, ln, bfo=None): """ Gets an article view of a journal from cache. False if not in cache. """ issue = issue.replace("/", "_") + issue_number, year = issue.split("_", 1) category = category.replace(" ", "") - cache_path = os.path.abspath('%s/webjournal/%s/%s_article_%s_%s_%s.html' % \ + cache_path = os.path.abspath('%s/webjournal/%s/%s/%s/article_%s_%s_%s.html' % \ (CFG_CACHEDIR, journal_name, - issue, category, recid, ln)) + year, issue_number, category, recid, ln)) if not cache_path.startswith(CFG_CACHEDIR + '/webjournal'): # Mmh, not accessing correct path. Stop reading cache return False try: cached_file = open(cache_path).read() except: return False if CFG_CERN_SITE and bfo: try: from invenio.bibformat_elements import bfe_webjournal_cern_toolbar cached_file = NOT_FOR_ALERT_COMMENTS_RE.sub(bfe_webjournal_cern_toolbar.format_element(bfo), cached_file, 1) except ImportError, e: pass return cached_file def clear_cache_for_article(journal_name, category, recid, issue): """ Resets the cache for an article (e.g. after an article has been modified) """ issue = issue.replace("/", "_") + issue_number, year = issue.split("_", 1) category = category.replace(" ", "") cache_path = os.path.abspath('%s/webjournal/%s/' % (CFG_CACHEDIR, journal_name)) if not cache_path.startswith(CFG_CACHEDIR + '/webjournal'): # Mmh, not accessing correct path. Stop deleting cache return False # try to delete the article cached file try: - os.remove('%s/webjournal/%s/%s_article_%s_%s_en.html' % - (CFG_CACHEDIR, journal_name, issue, category, recid)) + os.remove('%s/webjournal/%s/%s/%s/article_%s_%s_en.html' % + (CFG_CACHEDIR, journal_name, year, issue_number, category, recid)) except: pass try: - os.remove('%s/webjournal/%s/%s_article_%s_%s_fr.html' % - (CFG_CACHEDIR, journal_name, issue, category, recid)) + os.remove('%s/webjournal/%s/%s/%s/article_%s_%s_fr.html' % + (CFG_CACHEDIR, journal_name, year, issue_number, category, recid)) except: pass # delete the index page for the category try: - os.remove('%s/webjournal/%s/%s_index_%s_en.html' - % (CFG_CACHEDIR, journal_name, issue, category)) + os.remove('%s/webjournal/%s/%s/%s/index_%s_en.html' + % (CFG_CACHEDIR, journal_name, year, issue_number, category)) except: pass try: - os.remove('%s/webjournal/%s/%s_index_%s_fr.html' - % (CFG_CACHEDIR, journal_name, issue, category)) + os.remove('%s/webjournal/%s/%s/%s/index_%s_fr.html' + % (CFG_CACHEDIR, journal_name, year, issue_number, category)) except: pass try: path = get_journal_article_cache_path(journal_name, issue) os.remove(path) except: pass return True def clear_cache_for_issue(journal_name, issue): """ clears the cache of a whole issue. """ issue = issue.replace("/", "_") - cache_path_dir = os.path.abspath('%s/webjournal/%s' % \ - (CFG_CACHEDIR, journal_name)) + issue_number, year = issue.split("_", 1) + + cache_path_dir = os.path.abspath('%s/webjournal/%s/%s/%s/' % \ + (CFG_CACHEDIR, journal_name, + year, issue_number)) if not cache_path_dir.startswith(CFG_CACHEDIR + '/webjournal'): # Mmh, not accessing correct path. Stop deleting cache return False all_cached_files = os.listdir(cache_path_dir) - non_deleted = [] for cached_file in all_cached_files: - if cached_file.startswith(issue.replace('/', '_')): - try: - os.remove(cache_path_dir + '/' + cached_file) - except: - return False - else: - non_deleted.append(cached_file) + try: + os.remove(cache_path_dir + '/' + cached_file) + except: + return False return True ######################### CERN SPECIFIC FUNCTIONS ################# def get_recid_from_legacy_number(issue_number, category, number): """ Returns the recid based on the issue number, category and 'number'. This is used to support URLs using the now deprecated 'number' argument. The function tries to reproduce the behaviour of the old way of doing, even keeping some of its 'problems' (so that we reach the same article as before with a given number).. Returns the recid as int, or -1 if not found """ recids = [] if issue_number[0] == "0": alternative_issue_number = issue_number[1:] recids = list(search_pattern(p='65017a:"%s" and 773__n:%s' % (category, issue_number))) recids.extend(list(search_pattern(p='65017a:"%s" and 773__n:%s' % (category, alternative_issue_number)))) else: recids = list(search_pattern(p='65017:"%s" and 773__n:%s' % (category, issue_number))) # Now must order the records and pick the one at index 'number'. # But we have to take into account that there can be multiple # records at position 1, and that these additional records should # be numbered with negative numbers: # 1, 1, 1, 2, 3 -> 1, -1, -2, 2, 3... negative_index_records = {} positive_index_records = {} # Fill in 'negative_index_records' and 'positive_index_records' # lists with the following loop for recid in recids: bfo = BibFormatObject(recid) order = [subfield['c'] for subfield in bfo.fields('773__') if \ issue_number in subfield.get('n', '')] if len(order) > 0: # If several orders are defined for the same article and # the same issue, keep the first one order = order[0] if order.isdigit(): # Order must be an int. Otherwise skip order = int(order) if order == 1 and positive_index_records.has_key(1): # This is then a negative number for this record index = (len(negative_index_records.keys()) > 0 and \ min(negative_index_records.keys()) -1) or 0 negative_index_records[index] = recid else: # Positive number for this record if not positive_index_records.has_key(order): positive_index_records[order] = recid else: # We make the assumption that we cannot have # twice the same position for two # articles. Previous WebJournal module was not # clear about that. Just drop this record # (better than crashing or looping forever..) pass recid_to_return = -1 # Ok, we can finally pick the recid corresponding to 'number' if number <= 0: negative_indexes = negative_index_records.keys() negative_indexes.sort() negative_indexes.reverse() if len(negative_indexes) > abs(number): recid_to_return = negative_index_records[negative_indexes[abs(number)]] else: if positive_index_records.has_key(number): recid_to_return = positive_index_records[number] return recid_to_return def is_recid_in_released_issue(recid): """ Returns True if recid is part of the latest issue of the given journal. WARNING: the function does not check that the article does not belong to the draft collection of the record. This is wanted, in order to workaround the time needed for a record to go from the draft collection to the final collection """ bfo = BibFormatObject(recid) journal_name = '' journal_names = [journal_name for journal_name in bfo.fields('773__t') if journal_name] if journal_names: journal_name = journal_names[0] else: return False existing_journal_names = [o['journal_name'] for o in get_journals_ids_and_names()] if not journal_name in existing_journal_names: # Try to remove whitespace journal_name = journal_name.replace(' ', '') if not journal_name in existing_journal_names: # Journal name unknown from WebJournal return False config_strings = get_xml_from_config(["draft_image_access_policy"], journal_name) if config_strings['draft_image_access_policy'] and \ config_strings['draft_image_access_policy'][0] != 'allow': # The journal does not want to optimize access to images return False article_issues = bfo.fields('773__n') current_issue = get_current_issue(CFG_SITE_LANG, journal_name) for article_issue in article_issues: # Check each issue until a released one is found if get_release_datetime(article_issue, journal_name): # Release date exists, issue has been released return True else: # Unreleased issue. Do we still allow based on journal config? unreleased_issues_mode = get_unreleased_issue_hiding_mode(journal_name) if (unreleased_issues_mode == 'none' or \ (unreleased_issues_mode == 'future' and \ not issue_is_later_than(article_issue, current_issue))): return True return False diff --git a/modules/webjournal/lib/widgets/bfe_webjournal_widget_whatsNew.py b/modules/webjournal/lib/widgets/bfe_webjournal_widget_whatsNew.py index 5ce9efad5..0551bc0be 100644 --- a/modules/webjournal/lib/widgets/bfe_webjournal_widget_whatsNew.py +++ b/modules/webjournal/lib/widgets/bfe_webjournal_widget_whatsNew.py @@ -1,289 +1,299 @@ # -*- coding: utf-8 -*- ## $Id: bfe_webjournal_widget_whatsNew.py,v 1.24 2009/01/27 07:25:12 jerome Exp $ ## ## This file is part of Invenio. ## Copyright (C) 2009, 2010, 2011 CERN. ## ## Invenio is free software; you can redistribute it and/or ## modify it under the terms of the GNU General Public License as ## published by the Free Software Foundation; either version 2 of the ## License, or (at your option) any later version. ## ## Invenio is distributed in the hope that it will be useful, but ## WITHOUT ANY WARRANTY; without even the implied warranty of ## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ## General Public License for more details. ## ## You should have received a copy of the GNU General Public License ## along with Invenio; if not, write to the Free Software Foundation, Inc., ## 59 Temple Place, Suite 330, Boston, MA 02111-1307, USA. """ WebJournal widget - Display the index of the lastest articles, including 'breaking news'. """ import time import os from invenio.search_engine import search_pattern, record_exists from invenio.bibformat_engine import BibFormatObject from invenio.config import \ CFG_SITE_URL, \ CFG_CACHEDIR, \ CFG_ACCESS_CONTROL_LEVEL_SITE from invenio.webjournal_utils import \ parse_url_string, \ make_journal_url, \ get_journal_info_path, \ get_journal_categories, \ get_journal_articles, \ get_current_issue from invenio.messages import gettext_set_language +from invenio.errorlib import register_exception def format_element(bfo, latest_issue_only='yes', newest_articles_only='yes', link_category_headers='yes', display_categories='', hide_when_only_new_records="no"): """ Display the index to the newest articles (of the latest issue, or of the displayed issue) @param latest_issue_only: if 'yes', always display articles of the latest issue, even if viewing a past issue @param newest_articles_only: only display new articles, not those that also appeared in previous issues @param link_category_headers: if yes, category headers link to index page of that category @param display_categories: comma-separated list of categories to display. If none, display all @param hide_when_only_new_records: if 'yes' display new articles only if old articles exist in this issue """ args = parse_url_string(bfo.user_info['uri']) journal_name = args["journal_name"] ln = bfo.lang _ = gettext_set_language(ln) if latest_issue_only.lower() == 'yes': issue_number = get_current_issue(bfo.lang, journal_name) else: issue_number = args["issue"] # Try to get HTML from cache if args['verbose'] == 0: cached_html = _get_whatsNew_from_cache(journal_name, issue_number, ln) if cached_html: return cached_html # No cache? Build from scratch # 1. Get the articles journal_categories = get_journal_categories(journal_name, issue_number) if display_categories: display_categories = display_categories.lower().split(',') journal_categories = [category for category in journal_categories \ if category.lower() in display_categories] whats_new_articles = {} for category in journal_categories: whats_new_articles[category] = get_journal_articles(journal_name, issue_number, category, newest_only=newest_articles_only.lower() == 'yes') # Do we want to display new articles only if they have been added # to an issue that contains non-new records? if hide_when_only_new_records.lower() == "yes": # First gather all articles in this issue all_whats_new_articles = {} for category in journal_categories: all_whats_new_articles[category] = get_journal_articles(journal_name, issue_number, category, newest_first=True, newest_only=False) # Then check if we have some articles at position > -1 has_old_articles = False for articles in all_whats_new_articles.values(): if len([order for order in articles.keys() if order > -1]) > 0: has_old_articles = True break if not has_old_articles: # We don't have old articles? Thend don't consider any for category in journal_categories: whats_new_articles[category] = {} # 2. Build the HTML html_out = _get_breaking_news(ln, journal_name) for category in journal_categories: articles_in_category = whats_new_articles[category] html_articles_in_category = "" # Generate the list of articles in this category order_numbers = articles_in_category.keys() order_numbers.sort() for order in order_numbers: articles = articles_in_category[order] for recid in articles: link = make_journal_url(bfo.user_info['uri'], {'journal_name': journal_name, 'issue_number': issue_number.split('/')[0], 'issue_year': issue_number.split('/')[1], 'category': category, 'recid': recid, 'ln': bfo.lang}) temp_rec = BibFormatObject(recid) if ln == 'fr': try: title = temp_rec.fields('246_1a')[0] except: try: title = temp_rec.field('245__a') except: continue else: try: title = temp_rec.field('245__a') except: continue try: html_articles_in_category += '
  • %s
  • ' % \ (link, title) except: pass if html_articles_in_category: # Good, we found some new articles for this category. # Then insert the genereated results into a larger list # with category as "parent". html_out += '
  • ' if link_category_headers.lower() == 'yes': html_out += '%s' % _(category) else: html_out += '%s' % _(category) html_out += '
      ' html_out += html_articles_in_category html_out += '
  • ' if not html_out: html_out = '' + _('There are no new articles for the moment') + '' else: html_out = '
      ' + html_out + '
    ' if args['verbose'] == 0: cache_whatsNew(html_out, journal_name, issue_number, ln) return html_out def _get_breaking_news(lang, journal_name): """ Gets the 'Breaking News' articles that are currently active according to start and end dates. """ # CERN Bulletin only if not journal_name.lower() == 'cernbulletin': return '' # Look for active breaking news breaking_news_recids = [recid for recid in search_pattern(p='980__a:BULLETINBREAKING') \ if record_exists(recid) == 1] today = time.mktime(time.localtime()) breaking_news = "" for recid in breaking_news_recids: temp_rec = BibFormatObject(recid) try: end_date = time.mktime(time.strptime(temp_rec.field("925__b"), "%m/%d/%Y")) except: end_date = time.mktime(time.strptime("01/01/1970", "%m/%d/%Y")) if end_date < today: continue try: start_date = time.mktime(time.strptime(temp_rec.field("925__a"), "%m/%d/%Y")) except: start_date = time.mktime(time.strptime("01/01/2050", "%m/%d/%Y")) if start_date > today: continue publish_date = temp_rec.field("269__c") if lang == 'fr': title = temp_rec.field("246_1a") else: title = temp_rec.field("245__a") breaking_news += '''

    %s
    %s

    ''' % ("", publish_date, CFG_SITE_URL, journal_name, recid, lang, title) if breaking_news: breaking_news = '
  • %s
  • ' % breaking_news return breaking_news def _get_whatsNew_from_cache(journal_name, issue, ln): """ Try to get the "whats new" box from cache. """ - cache_path = os.path.abspath('%s/webjournal/%s/%s_whatsNew_%s.html' % \ + issue = issue.replace("/", "_") + issue_number, year = issue.split("_", 1) + cache_path = os.path.abspath('%s/webjournal/%s/%s/%s/whatsNew_%s.html' % \ (CFG_CACHEDIR, journal_name, - issue.replace('/','_'), + year, issue_number, ln)) if not cache_path.startswith(CFG_CACHEDIR + '/webjournal'): # Make sure we are reading from correct directory (you # know, in case there are '../../' inside journal name..) return False try: last_update = os.path.getctime(cache_path) except: return False try: # Get last journal update, based on journal info file last # modification time journal_info_path = get_journal_info_path(journal_name) last_journal_update = os.path.getctime(journal_info_path) except: return False now = time.time() if ((last_update + 30*60) < now) or \ (last_journal_update > last_update): # invalidate after 30 minutes or if last journal release is # newer than cache return False try: cached_file = open(cache_path).read() except: return False return cached_file def cache_whatsNew(html, journal_name, issue, ln): """ caches the whats new box for 30 minutes. """ if not CFG_ACCESS_CONTROL_LEVEL_SITE == 2: - cache_path = os.path.abspath('%s/webjournal/%s/%s_whatsNew_%s.html' % \ + issue = issue.replace("/", "_") + issue_number, year = issue.split("_", 1) + cache_path = os.path.abspath('%s/webjournal/%s/%s/%s/whatsNew_%s.html' % \ (CFG_CACHEDIR, journal_name, - issue.replace('/','_'), + year, issue_number, ln)) if cache_path.startswith(CFG_CACHEDIR + '/webjournal'): # Do not try to cache if the journal name led us to some # other directory ('../../' inside journal name for # example) - cache_dir = CFG_CACHEDIR + '/webjournal/' + journal_name - if not os.path.isdir(cache_dir): - os.makedirs(cache_dir) - cache_file = file(cache_path, "w") - cache_file.write(html) - cache_file.close() + try: + cache_dir = os.path.dirname(cache_path) + if not os.path.isdir(cache_dir): + os.makedirs(cache_dir) + cache_file = file(cache_path, "w") + cache_file.write(html) + cache_file.close() + except Exception: + register_exception(req=None, + prefix="Could not store 'Whats new' section", + alert_admin=True) def escape_values(bfo): """ Called by BibFormat in order to check if output of this element should be escaped. """ return 0 _ = gettext_set_language('en') dummy = _("What's new") diff --git a/modules/websession/lib/inveniogc.py b/modules/websession/lib/inveniogc.py index c6d8ac678..b46d39fa2 100644 --- a/modules/websession/lib/inveniogc.py +++ b/modules/websession/lib/inveniogc.py @@ -1,631 +1,633 @@ ## -*- mode: python; coding: utf-8; -*- ## ## This file is part of Invenio. ## Copyright (C) 2007, 2008, 2010, 2011, 2012 CERN. ## ## Invenio is free software; you can redistribute it and/or ## modify it under the terms of the GNU General Public License as ## published by the Free Software Foundation; either version 2 of the ## License, or (at your option) any later version. ## ## Invenio is distributed in the hope that it will be useful, but ## WITHOUT ANY WARRANTY; without even the implied warranty of ## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ## General Public License for more details. ## ## You should have received a copy of the GNU General Public License ## along with Invenio; if not, write to the Free Software Foundation, Inc., ## 59 Temple Place, Suite 330, Boston, MA 02111-1307, USA. """ Invenio garbage collector. """ __revision__ = "$Id$" import sys import datetime import time import os try: from invenio.dbquery import run_sql, wash_table_column_name from invenio.config import CFG_LOGDIR, CFG_TMPDIR, CFG_CACHEDIR, \ CFG_TMPSHAREDDIR, CFG_WEBSEARCH_RSS_TTL, CFG_PREFIX, \ CFG_WEBSESSION_NOT_CONFIRMED_EMAIL_ADDRESS_EXPIRE_IN_DAYS from invenio.bibtask import task_init, task_set_option, task_get_option, \ write_message, write_messages from invenio.access_control_mailcookie import mail_cookie_gc from invenio.bibdocfile import BibDoc from invenio.bibsched import gc_tasks from invenio.websubmit_config import CFG_WEBSUBMIT_TMP_VIDEO_PREFIX from invenio.dateutils import convert_datestruct_to_datetext except ImportError, e: print "Error: %s" % (e,) sys.exit(1) # configure variables CFG_MYSQL_ARGUMENTLIST_SIZE = 100 # After how many days to remove obsolete log/err files CFG_MAX_ATIME_RM_LOG = 28 # After how many days to zip obsolete log/err files CFG_MAX_ATIME_ZIP_LOG = 7 # After how many days to remove obsolete bibreformat fmt xml files CFG_MAX_ATIME_RM_FMT = 28 # After how many days to zip obsolete bibreformat fmt xml files CFG_MAX_ATIME_ZIP_FMT = 7 # After how many days to remove obsolete oaiharvest fmt xml files CFG_MAX_ATIME_RM_OAI = 14 # After how many days to zip obsolete oaiharvest fmt xml files CFG_MAX_ATIME_ZIP_OAI = 3 # After how many days to remove deleted bibdocs CFG_DELETED_BIBDOC_MAXLIFE = 365 * 10 # After how many day to remove old cached webjournal files CFG_WEBJOURNAL_TTL = 7 # After how many days to zip obsolete bibsword xml log files CFG_MAX_ATIME_ZIP_BIBSWORD = 7 # After how many days to remove obsolete bibsword xml log files CFG_MAX_ATIME_RM_BIBSWORD = 28 # After how many days to remove temporary video uploads CFG_MAX_ATIME_WEBSUBMIT_TMP_VIDEO = 3 # After how many days to remove obsolete refextract xml output files CFG_MAX_ATIME_RM_REFEXTRACT = 28 # After how many days to remove obsolete bibdocfiles temporary files CFG_MAX_ATIME_RM_BIBDOC = 4 # After how many days to remove obsolete WebSubmit-created temporary # icon files CFG_MAX_ATIME_RM_ICON = 7 # After how many days to remove obsolete WebSubmit-created temporary # stamp files CFG_MAX_ATIME_RM_STAMP = 7 # After how many days to remove obsolete WebJournal-created update XML CFG_MAX_ATIME_RM_WEBJOURNAL_XML = 7 # After how many days to remove obsolete temporary files attached with # the CKEditor in WebSubmit context? CFG_MAX_ATIME_RM_WEBSUBMIT_CKEDITOR_FILE = 28 # After how many days to remove obsolete temporary files related to BibEdit # cache CFG_MAX_ATIME_BIBEDIT_TMP = 3 def gc_exec_command(command): """ Exec the command logging in appropriate way its output.""" write_message(' %s' % command, verbose=9) (dummy, output, errors) = os.popen3(command) write_messages(errors.read()) write_messages(output.read()) def clean_logs(): """ Clean the logs from obsolete files. """ write_message("""CLEANING OF LOG FILES STARTED""") write_message("- deleting/gzipping bibsched empty/old err/log " "BibSched files") vstr = task_get_option('verbose') > 1 and '-v' or '' gc_exec_command('find %s -name "bibsched_task_*"' ' -size 0c -exec rm %s -f {} \;' \ % (CFG_LOGDIR, vstr)) gc_exec_command('find %s -name "bibsched_task_*"' ' -atime +%s -exec rm %s -f {} \;' \ % (CFG_LOGDIR, CFG_MAX_ATIME_RM_LOG, vstr)) gc_exec_command('find %s -name "bibsched_task_*"' ' -atime +%s -exec gzip %s -9 {} \;' \ % (CFG_LOGDIR, CFG_MAX_ATIME_ZIP_LOG, vstr)) write_message("""CLEANING OF LOG FILES FINISHED""") def clean_tempfiles(): """ Clean old temporary files. """ write_message("""CLEANING OF TMP FILES STARTED""") write_message("- deleting/gzipping temporary empty/old " "BibReformat xml files") vstr = task_get_option('verbose') > 1 and '-v' or '' gc_exec_command('find %s %s -name "rec_fmt_*"' ' -size 0c -exec rm %s -f {} \;' \ % (CFG_TMPDIR, CFG_TMPSHAREDDIR, vstr)) gc_exec_command('find %s %s -name "rec_fmt_*"' ' -atime +%s -exec rm %s -f {} \;' \ % (CFG_TMPDIR, CFG_TMPSHAREDDIR, \ CFG_MAX_ATIME_RM_FMT, vstr)) gc_exec_command('find %s %s -name "rec_fmt_*"' ' -atime +%s -exec gzip %s -9 {} \;' \ % (CFG_TMPDIR, CFG_TMPSHAREDDIR, \ CFG_MAX_ATIME_ZIP_FMT, vstr)) write_message("- deleting/gzipping temporary old " "OAIHarvest xml files") gc_exec_command('find %s %s -name "oaiharvestadmin.*"' ' -exec rm %s -f {} \;' \ % (CFG_TMPDIR, CFG_TMPSHAREDDIR, vstr)) gc_exec_command('find %s %s -name "bibconvertrun.*"' ' -exec rm %s -f {} \;' \ % (CFG_TMPDIR, CFG_TMPSHAREDDIR, vstr)) # Using mtime and -r here to include directories. gc_exec_command('find %s %s -name "oaiharvest*"' ' -mtime +%s -exec gzip %s -9 {} \;' \ % (CFG_TMPDIR, CFG_TMPSHAREDDIR, \ CFG_MAX_ATIME_ZIP_OAI, vstr)) gc_exec_command('find %s %s -name "oaiharvest*"' ' -mtime +%s -exec rm %s -rf {} \;' \ % (CFG_TMPDIR, CFG_TMPSHAREDDIR, \ CFG_MAX_ATIME_RM_OAI, vstr)) gc_exec_command('find %s %s -name "oai_archive*"' ' -mtime +%s -exec rm %s -rf {} \;' \ % (CFG_TMPDIR, CFG_TMPSHAREDDIR, \ CFG_MAX_ATIME_RM_OAI, vstr)) write_message("- deleting/gzipping temporary old " "BibSword files") gc_exec_command('find %s %s -name "bibsword_*"' ' -atime +%s -exec rm %s -f {} \;' \ % (CFG_TMPDIR, CFG_TMPSHAREDDIR, \ CFG_MAX_ATIME_RM_BIBSWORD, vstr)) gc_exec_command('find %s %s -name "bibsword_*"' ' -atime +%s -exec gzip %s -9 {} \;' \ % (CFG_TMPDIR, CFG_TMPSHAREDDIR, \ CFG_MAX_ATIME_ZIP_BIBSWORD, vstr)) # DELETE ALL FILES CREATED DURING VIDEO SUBMISSION write_message("- deleting old video submissions") gc_exec_command('find %s -name %s* -atime +%s -exec rm %s -f {} \;' \ % (CFG_TMPSHAREDDIR, CFG_WEBSUBMIT_TMP_VIDEO_PREFIX, CFG_MAX_ATIME_WEBSUBMIT_TMP_VIDEO, vstr)) write_message("- deleting temporary old " "RefExtract files") gc_exec_command('find %s %s -name "refextract*"' ' -atime +%s -exec rm %s -f {} \;' \ % (CFG_TMPDIR, CFG_TMPSHAREDDIR, CFG_MAX_ATIME_RM_REFEXTRACT, vstr)) write_message("- deleting temporary old bibdocfiles") gc_exec_command('find %s %s -name "bibdocfile_*"' ' -atime +%s -exec rm %s -f {} \;' \ % (CFG_TMPDIR, CFG_TMPSHAREDDIR, \ CFG_MAX_ATIME_RM_BIBDOC, vstr)) write_message("- deleting old temporary WebSubmit icons") gc_exec_command('find %s %s -name "websubmit_icon_creator_*"' ' -atime +%s -exec rm %s -f {} \;' \ % (CFG_TMPDIR, CFG_TMPSHAREDDIR, \ CFG_MAX_ATIME_RM_ICON, vstr)) write_message("- deleting old temporary WebSubmit stamps") gc_exec_command('find %s %s -name "websubmit_file_stamper_*"' ' -atime +%s -exec rm %s -f {} \;' \ % (CFG_TMPDIR, CFG_TMPSHAREDDIR, \ CFG_MAX_ATIME_RM_STAMP, vstr)) write_message("- deleting old temporary WebJournal XML files") gc_exec_command('find %s %s -name "webjournal_publish_*"' ' -atime +%s -exec rm %s -f {} \;' \ % (CFG_TMPDIR, CFG_TMPSHAREDDIR, \ CFG_MAX_ATIME_RM_WEBJOURNAL_XML, vstr)) write_message("- deleting old temporary files attached with CKEditor") gc_exec_command('find %s/var/tmp/attachfile/ ' ' -atime +%s -exec rm %s -f {} \;' \ % (CFG_PREFIX, CFG_MAX_ATIME_RM_WEBSUBMIT_CKEDITOR_FILE, vstr)) write_message("- deleting old temporary files attached with BibEdit") gc_exec_command('find %s -name "bibedit*.tmp"' ' -atime +%s -exec rm %s -f {} \;' \ % (CFG_TMPSHAREDDIR + '/bibedit-cache/', CFG_MAX_ATIME_BIBEDIT_TMP, vstr)) write_message("""CLEANING OF TMP FILES FINISHED""") def clean_cache(): """Clean the cache for expired and old files.""" write_message("""CLEANING OF OLD CACHED RSS REQUEST STARTED""") rss_cache_dir = "%s/rss/" % CFG_CACHEDIR try: filenames = os.listdir(rss_cache_dir) except OSError: filenames = [] count = 0 for filename in filenames: filename = os.path.join(rss_cache_dir, filename) last_update_time = datetime.datetime.fromtimestamp(os.stat(os.path.abspath(filename)).st_mtime) if not (datetime.datetime.now() < last_update_time + datetime.timedelta(minutes=CFG_WEBSEARCH_RSS_TTL)): try: os.remove(filename) count += 1 except OSError, e: write_message("Error: %s" % e) write_message("""%s rss cache file pruned out of %s.""" % (count, len(filenames))) write_message("""CLEANING OF OLD CACHED RSS REQUEST FINISHED""") write_message("""CLEANING OF OLD CACHED WEBJOURNAL FILES STARTED""") webjournal_cache_dir = "%s/webjournal/" % CFG_CACHEDIR + filenames = [] try: - filenames = os.listdir(webjournal_cache_dir) + for root, dummy, files in os.walk(webjournal_cache_dir): + filenames.extend(os.path.join(root, filename) for filename in files) except OSError: - filenames = [] + pass count = 0 for filename in filenames: filename = os.path.join(webjournal_cache_dir, filename) last_update_time = datetime.datetime.fromtimestamp(os.stat(os.path.abspath(filename)).st_mtime) if not (datetime.datetime.now() < last_update_time + datetime.timedelta(days=CFG_WEBJOURNAL_TTL)): try: os.remove(filename) count += 1 except OSError, e: write_message("Error: %s" % e) write_message("""%s webjournal cache file pruned out of %s.""" % (count, len(filenames))) write_message("""CLEANING OF OLD CACHED WEBJOURNAL FILES FINISHED""") def clean_bibxxx(): """ Clean unreferenced bibliographic values from bibXXx tables. This is useful to prettify browse results, as it removes old, no longer used values. WARNING: this function must be run only when no bibupload is running and/or sleeping. """ write_message("""CLEANING OF UNREFERENCED bibXXx VALUES STARTED""") for xx in range(0, 100): bibxxx = 'bib%02dx' % xx bibrec_bibxxx = 'bibrec_bib%02dx' % xx if task_get_option('verbose') >= 9: num_unref_values = run_sql("""SELECT COUNT(*) FROM %(bibxxx)s LEFT JOIN %(bibrec_bibxxx)s ON %(bibxxx)s.id=%(bibrec_bibxxx)s.id_bibxxx WHERE %(bibrec_bibxxx)s.id_bibrec IS NULL""" % \ {'bibxxx': bibxxx, 'bibrec_bibxxx': bibrec_bibxxx, })[0][0] run_sql("""DELETE %(bibxxx)s FROM %(bibxxx)s LEFT JOIN %(bibrec_bibxxx)s ON %(bibxxx)s.id=%(bibrec_bibxxx)s.id_bibxxx WHERE %(bibrec_bibxxx)s.id_bibrec IS NULL""" % \ {'bibxxx': bibxxx, 'bibrec_bibxxx': bibrec_bibxxx, }) if task_get_option('verbose') >= 9: write_message(""" - %d unreferenced %s values cleaned""" % \ (num_unref_values, bibxxx)) write_message("""CLEANING OF UNREFERENCED bibXXx VALUES FINISHED""") def clean_documents(): """Delete all the bibdocs that have been set as deleted and have not been modified since CFG_DELETED_BIBDOC_MAXLIFE days. Returns the number of bibdocs involved.""" write_message("""CLEANING OF OBSOLETED DELETED DOCUMENTS STARTED""") write_message("select id from bibdoc where status='DELETED' and NOW()>ADDTIME(modification_date, '%s 0:0:0')" % CFG_DELETED_BIBDOC_MAXLIFE, verbose=9) records = run_sql("select id from bibdoc where status='DELETED' and NOW()>ADDTIME(modification_date, '%s 0:0:0')", (CFG_DELETED_BIBDOC_MAXLIFE,)) for record in records: bibdoc = BibDoc.create_instance(record[0]) bibdoc.expunge() write_message("DELETE FROM bibdoc WHERE id=%i" % int(record[0]), verbose=9) run_sql("DELETE FROM bibdoc WHERE id=%s", (record[0],)) write_message("""%s obsoleted deleted documents cleaned""" % len(records)) write_message("""CLEANING OF OBSOLETED DELETED DOCUMENTS FINISHED""") return len(records) def check_tables(): """ Check all DB tables. Useful to run from time to time when the site is idle, say once a month during a weekend night. FIXME: should produce useful output about outcome. """ res = run_sql("SHOW TABLES") for row in res: table_name = row[0] write_message("checking table %s" % table_name) run_sql("CHECK TABLE %s" % wash_table_column_name(table_name)) # kwalitee: disable=sql def optimise_tables(): """ Optimise all DB tables to defragment them in order to increase DB performance. Useful to run from time to time when the site is idle, say once a month during a weekend night. FIXME: should produce useful output about outcome. """ res = run_sql("SHOW TABLES") for row in res: table_name = row[0] write_message("optimising table %s" % table_name) run_sql("OPTIMIZE TABLE %s" % wash_table_column_name(table_name)) # kwalitee: disable=sql def guest_user_garbage_collector(): """Session Garbage Collector program flow/tasks: 1: delete expired sessions 1b:delete guest users without session 2: delete queries not attached to any user 3: delete baskets not attached to any user 4: delete alerts not attached to any user 5: delete expired mailcookies 5b: delete expired not confirmed email address 6: delete expired roles memberships verbose - level of program output. 0 - nothing 1 - default 9 - max, debug""" # dictionary used to keep track of number of deleted entries delcount = {'session': 0, 'user': 0, 'user_query': 0, 'query': 0, 'bskBASKET': 0, 'user_bskBASKET': 0, 'bskREC': 0, 'bskRECORDCOMMENT': 0, 'bskEXTREC': 0, 'bskEXTFMT': 0, 'user_query_basket': 0, 'mail_cookie': 0, 'email_addresses': 0, 'role_membership' : 0} write_message("CLEANING OF GUEST SESSIONS STARTED") # 1 - DELETE EXPIRED SESSIONS write_message("- deleting expired sessions") timelimit = convert_datestruct_to_datetext(time.gmtime()) write_message(" DELETE FROM session WHERE" " session_expiry < %s \n" % (timelimit,), verbose=9) delcount['session'] += run_sql("DELETE FROM session WHERE" " session_expiry < %s """, (timelimit,)) # 1b - DELETE GUEST USERS WITHOUT SESSION write_message("- deleting guest users without session") # get uids write_message(""" SELECT u.id\n FROM user AS u LEFT JOIN session AS s\n ON u.id = s.uid\n WHERE s.uid IS NULL AND u.email = ''""", verbose=9) result = run_sql("""SELECT u.id FROM user AS u LEFT JOIN session AS s ON u.id = s.uid WHERE s.uid IS NULL AND u.email = ''""") write_message(result, verbose=9) if result: # work on slices of result list in case of big result for i in range(0, len(result), CFG_MYSQL_ARGUMENTLIST_SIZE): # create string of uids uidstr = '' for (id_user,) in result[i:i + CFG_MYSQL_ARGUMENTLIST_SIZE]: if uidstr: uidstr += ',' uidstr += "%s" % (id_user,) # delete users write_message(" DELETE FROM user WHERE" " id IN (TRAVERSE LAST RESULT) AND email = '' \n", verbose=9) delcount['user'] += run_sql("DELETE FROM user WHERE" " id IN (%s) AND email = ''" % (uidstr,)) # 2 - DELETE QUERIES NOT ATTACHED TO ANY USER # first step, delete from user_query write_message("- deleting user_queries referencing" " non-existent users") # find user_queries referencing non-existent users write_message(" SELECT DISTINCT uq.id_user\n" " FROM user_query AS uq LEFT JOIN user AS u\n" " ON uq.id_user = u.id\n WHERE u.id IS NULL", verbose=9) result = run_sql("""SELECT DISTINCT uq.id_user FROM user_query AS uq LEFT JOIN user AS u ON uq.id_user = u.id WHERE u.id IS NULL""") write_message(result, verbose=9) # delete in user_query one by one write_message(" DELETE FROM user_query WHERE" " id_user = 'TRAVERSE LAST RESULT' \n", verbose=9) for (id_user,) in result: delcount['user_query'] += run_sql("""DELETE FROM user_query WHERE id_user = %s""" % (id_user,)) # delete the actual queries write_message("- deleting queries not attached to any user") # select queries that must be deleted write_message(""" SELECT DISTINCT q.id\n FROM query AS q LEFT JOIN user_query AS uq\n ON uq.id_query = q.id\n WHERE uq.id_query IS NULL AND\n q.type <> 'p' """, verbose=9) result = run_sql("""SELECT DISTINCT q.id FROM query AS q LEFT JOIN user_query AS uq ON uq.id_query = q.id WHERE uq.id_query IS NULL AND q.type <> 'p'""") write_message(result, verbose=9) # delete queries one by one write_message(""" DELETE FROM query WHERE id = 'TRAVERSE LAST RESULT \n""", verbose=9) for (id_user,) in result: delcount['query'] += run_sql("""DELETE FROM query WHERE id = %s""", (id_user,)) # 3 - DELETE BASKETS NOT OWNED BY ANY USER write_message("- deleting baskets not owned by any user") # select basket ids write_message(""" SELECT ub.id_bskBASKET\n FROM user_bskBASKET AS ub LEFT JOIN user AS u\n ON u.id = ub.id_user\n WHERE u.id IS NULL""", verbose=9) try: result = run_sql("""SELECT ub.id_bskBASKET FROM user_bskBASKET AS ub LEFT JOIN user AS u ON u.id = ub.id_user WHERE u.id IS NULL""") except: result = [] write_message(result, verbose=9) # delete from user_basket and basket one by one write_message(""" DELETE FROM user_bskBASKET WHERE id_bskBASKET = 'TRAVERSE LAST RESULT' """, verbose=9) write_message(""" DELETE FROM bskBASKET WHERE id = 'TRAVERSE LAST RESULT' """, verbose=9) write_message(""" DELETE FROM bskREC WHERE id_bskBASKET = 'TRAVERSE LAST RESULT'""", verbose=9) write_message(""" DELETE FROM bskRECORDCOMMENT WHERE id_bskBASKET = 'TRAVERSE LAST RESULT' \n""", verbose=9) for (id_basket,) in result: delcount['user_bskBASKET'] += run_sql("""DELETE FROM user_bskBASKET WHERE id_bskBASKET = %s""", (id_basket,)) delcount['bskBASKET'] += run_sql("""DELETE FROM bskBASKET WHERE id = %s""", (id_basket,)) delcount['bskREC'] += run_sql("""DELETE FROM bskREC WHERE id_bskBASKET = %s""", (id_basket,)) delcount['bskRECORDCOMMENT'] += run_sql("""DELETE FROM bskRECORDCOMMENT WHERE id_bskBASKET = %s""", (id_basket,)) write_message(""" SELECT DISTINCT ext.id, rec.id_bibrec_or_bskEXTREC FROM bskEXTREC AS ext \nLEFT JOIN bskREC AS rec ON ext.id=-rec.id_bibrec_or_bskEXTREC WHERE id_bibrec_or_bskEXTREC is NULL""", verbose=9) try: result = run_sql("""SELECT DISTINCT ext.id FROM bskEXTREC AS ext LEFT JOIN bskREC AS rec ON ext.id=-rec.id_bibrec_or_bskEXTREC WHERE id_bibrec_or_bskEXTREC is NULL""") except: result = [] write_message(result, verbose=9) write_message(""" DELETE FROM bskEXTREC WHERE id = 'TRAVERSE LAST RESULT' """, verbose=9) write_message(""" DELETE FROM bskEXTFMT WHERE id_bskEXTREC = 'TRAVERSE LAST RESULT' \n""", verbose=9) for (id_basket,) in result: delcount['bskEXTREC'] += run_sql("""DELETE FROM bskEXTREC WHERE id=%s""", (id_basket,)) delcount['bskEXTFMT'] += run_sql("""DELETE FROM bskEXTFMT WHERE id_bskEXTREC=%s""", (id_basket,)) # 4 - DELETE ALERTS NOT OWNED BY ANY USER write_message('- deleting alerts not owned by any user') # select user ids in uqb that reference non-existent users write_message("""SELECT DISTINCT uqb.id_user FROM user_query_basket AS uqb LEFT JOIN user AS u ON uqb.id_user = u.id WHERE u.id IS NULL""", verbose=9) result = run_sql("""SELECT DISTINCT uqb.id_user FROM user_query_basket AS uqb LEFT JOIN user AS u ON uqb.id_user = u.id WHERE u.id IS NULL""") write_message(result, verbose=9) # delete all these entries for (id_user,) in result: write_message("""DELETE FROM user_query_basket WHERE id_user = 'TRAVERSE LAST RESULT """, verbose=9) delcount['user_query_basket'] += run_sql("""DELETE FROM user_query_basket WHERE id_user = %s """, (id_user,)) # 5 - delete expired mailcookies write_message("""mail_cookie_gc()""", verbose=9) delcount['mail_cookie'] = mail_cookie_gc() ## 5b - delete expired not confirmed email address write_message("""DELETE FROM user WHERE note='2' AND NOW()>ADDTIME(last_login, '%s 0:0:0')""" % CFG_WEBSESSION_NOT_CONFIRMED_EMAIL_ADDRESS_EXPIRE_IN_DAYS, verbose=9) delcount['email_addresses'] = run_sql("""DELETE FROM user WHERE note='2' AND NOW()>ADDTIME(last_login, '%s 0:0:0')""", (CFG_WEBSESSION_NOT_CONFIRMED_EMAIL_ADDRESS_EXPIRE_IN_DAYS,)) # 6 - delete expired roles memberships write_message("""DELETE FROM user_accROLE WHERE expiration