diff --git a/.gitignore b/.gitignore index 54535e987..b1f08d07d 100644 --- a/.gitignore +++ b/.gitignore @@ -1,91 +1,92 @@ .version Makefile Makefile.in configure config.cache config.log config.status config.nice config.guess config.sub install-sh missing autom4te.cache aclocal.m4 TAGS invenio-autotools.conf po/POTFILES po/POTFILES-py po/POTFILES-webdoc po/stamp-po po/*.gmo po/*.mo po/*.sed *~ *.pyc *.clisp.mem *.cmucl.core *.sbcl.core *.fas *.fasl *.sse2f *.lib *.x86f modules/webauthorprofile/bin/webauthorprofile modules/bibauthorid/bin/bibauthorid modules/bibcirculation/bin/bibcircd modules/bibclassify/bin/bibclassify modules/bibconvert/bin/bibconvert +modules/bibdocfile/bin/bibdocfile modules/bibedit/bin/bibedit modules/refextract/bin/refextract modules/bibrecord/bin/textmarc2xmlmarc modules/bibrecord/bin/xmlmarc2textmarc modules/bibrecord/bin/xmlmarclint modules/bibencode/bin/bibencode modules/bibexport/bin/bibexport modules/bibformat/bin/bibreformat modules/oaiharvest/bin/oaiharvest modules/oairepository/bin/oairepositoryupdater modules/bibindex/bin/bibindex modules/bibindex/bin/bibstat modules/bibmatch/bin/bibmatch modules/bibrank/bin/bibrank modules/bibrank/bin/bibrankgkb modules/bibrank/etc/bibrankgkb.cfg modules/bibrank/etc/demo_jif.cfg modules/bibrank/etc/template_single_tag_rank_method.cfg modules/bibsched/bin/bibsched modules/bibsched/bin/bibtaskex modules/bibsched/bin/bibtasklet modules/bibsort/bin/bibsort modules/bibsword/bin/bibsword modules/bibupload/bin/batchuploader modules/bibupload/bin/bibupload modules/elmsubmit/bin/elmsubmit modules/elmsubmit/etc/elmsubmit.cfg modules/miscutil/bin/dbdump modules/miscutil/bin/dbexec modules/miscutil/bin/inveniocfg modules/miscutil/bin/plotextractor modules/miscutil/etc/bash_completion.d/inveniocfg modules/miscutil/lib/build modules/webaccess/bin/authaction modules/webaccess/bin/webaccessadmin modules/webalert/bin/alertengine modules/webmessage/bin/webmessageadmin modules/websearch/bin/webcoll modules/websession/bin/inveniogc modules/webstat/bin/webstat modules/webstat/bin/webstatadmin modules/webstyle/bin/webdoc modules/websubmit/bin/bibdocfile modules/websubmit/bin/inveniounoconv tags config.status.lineno configure.lineno *.kdevelop *.kdevses .project .settings .pydevproject org.eclipse.core.resources.prefs diff --git a/Makefile.am b/Makefile.am index 15762d69f..c7699d399 100644 --- a/Makefile.am +++ b/Makefile.am @@ -1,623 +1,624 @@ ## This file is part of Invenio. ## Copyright (C) 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011, 2012 CERN. ## ## Invenio is free software; you can redistribute it and/or ## modify it under the terms of the GNU General Public License as ## published by the Free Software Foundation; either version 2 of the ## License, or (at your option) any later version. ## ## Invenio is distributed in the hope that it will be useful, but ## WITHOUT ANY WARRANTY; without even the implied warranty of ## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ## General Public License for more details. ## ## You should have received a copy of the GNU General Public License ## along with Invenio; if not, write to the Free Software Foundation, Inc., ## 59 Temple Place, Suite 330, Boston, MA 02111-1307, USA. confignicedir = $(sysconfdir)/build confignice_SCRIPTS=config.nice SUBDIRS = po config modules EXTRA_DIST = UNINSTALL THANKS RELEASE-NOTES configure-tests.py config.nice.in \ config.rpath # current MathJax version and packages # See also modules/miscutil/lib/htmlutils.py (get_mathjax_header) MJV = 1.1 MATHJAX = https://github.com/mathjax/MathJax/zipball/v$(MJV) # current CKeditor version CKV = 3.6.2 CKEDITOR = ckeditor_$(CKV).zip # current MediaElement.js version MEV = master MEDIAELEMENT = http://github.com/johndyer/mediaelement/zipball/$(MEV) #for solrutils INVENIO_JAVA_PATH = org/invenio_software/solr solrdirname = apache-solr-3.1.0 solrdir = $(prefix)/lib/$(solrdirname) solrutils_dir=$(CURDIR)/modules/miscutil/lib/solrutils CLASSPATH=.:${solrdir}/dist/solrj-lib/commons-io-1.4.jar:${solrdir}/dist/apache-solr-core-*jar:${solrdir}/contrib/jzlib-1.0.7.jar:${solrdir}/dist/apache-solr-solrj-3.1.0.jar:${solrdir}/dist/solrj-lib/slf4j-api-1.5.5.jar:${solrdir}/dist/*:${solrdir}/contrib/basic-lucene-libs/*:${solrdir}/contrib/analysis-extras/lucene-libs/*:${solrdir}/dist/solrj-lib/* # git-version-get stuff: BUILT_SOURCES = $(top_srcdir)/.version $(top_srcdir)/.version: echo $(VERSION) > $@-t && mv $@-t $@ dist-hook: echo $(VERSION) > $(distdir)/.tarball-version check-custom-templates: $(PYTHON) $(top_srcdir)/modules/webstyle/lib/template.py --check-custom-templates $(top_srcdir) kwalitee-check: @$(PYTHON) $(top_srcdir)/modules/miscutil/lib/kwalitee.py --stats $(top_srcdir) kwalitee-check-errors-only: @$(PYTHON) $(top_srcdir)/modules/miscutil/lib/kwalitee.py --check-errors $(top_srcdir) kwalitee-check-variables: @$(PYTHON) $(top_srcdir)/modules/miscutil/lib/kwalitee.py --check-variables $(top_srcdir) kwalitee-check-indentation: @$(PYTHON) $(top_srcdir)/modules/miscutil/lib/kwalitee.py --check-indentation $(top_srcdir) kwalitee-check-sql-queries: @$(PYTHON) $(top_srcdir)/modules/miscutil/lib/kwalitee.py --check-sql $(top_srcdir) etags: \rm -f $(top_srcdir)/TAGS (cd $(top_srcdir) && find $(top_srcdir) -name "*.py" -print | xargs etags) install-data-local: for d in / /cache /cache/RTdata /log /tmp /tmp-shared /data /run /tmp-shared/bibencode/jobs/done ; do \ mkdir -p $(localstatedir)$$d ; \ done @echo "************************************************************" @echo "** Invenio software has been successfully installed! **" @echo "** **" @echo "** You may proceed to customizing your installation now. **" @echo "************************************************************" install-mathjax-plugin: @echo "***********************************************************" @echo "** Installing MathJax plugin, please wait... **" @echo "***********************************************************" rm -rf /tmp/invenio-mathjax-plugin mkdir /tmp/invenio-mathjax-plugin mkdir -p ${prefix}/var/www/MathJax (cd /tmp/invenio-mathjax-plugin && \ wget '$(MATHJAX)' -O mathjax.zip --no-check-certificate && \ unzip -q mathjax.zip && cd mathjax-MathJax-* && cp -ur * \ ${prefix}/var/www/MathJax) rm -fr /tmp/invenio-mathjax-plugin @echo "************************************************************" @echo "** The MathJax plugin was successfully installed. **" @echo "** Please do not forget to properly set the option **" @echo "** CFG_WEBSEARCH_USE_MATHJAX_FOR_FORMATS in invenio.conf. **" @echo "************************************************************" uninstall-mathjax-plugin: @rm -rvf ${prefix}/var/www/MathJax @echo "***********************************************************" @echo "** The MathJax plugin was successfully uninstalled. **" @echo "***********************************************************" install-jscalendar-plugin: @echo "***********************************************************" @echo "** Installing jsCalendar plugin, please wait... **" @echo "***********************************************************" rm -rf /tmp/invenio-jscalendar-plugin mkdir /tmp/invenio-jscalendar-plugin (cd /tmp/invenio-jscalendar-plugin && \ wget 'http://www.dynarch.com/static/jscalendar-1.0.zip' && \ unzip -u jscalendar-1.0.zip && \ mkdir -p ${prefix}/var/www/jsCalendar && \ cp jscalendar-1.0/img.gif ${prefix}/var/www/jsCalendar/jsCalendar.gif && \ cp jscalendar-1.0/calendar.js ${prefix}/var/www/jsCalendar/ && \ cp jscalendar-1.0/calendar-setup.js ${prefix}/var/www/jsCalendar/ && \ cp jscalendar-1.0/lang/calendar-en.js ${prefix}/var/www/jsCalendar/ && \ cp jscalendar-1.0/calendar-blue.css ${prefix}/var/www/jsCalendar/) rm -fr /tmp/invenio-jscalendar-plugin @echo "***********************************************************" @echo "** The jsCalendar plugin was successfully installed. **" @echo "***********************************************************" uninstall-jscalendar-plugin: @rm -rvf ${prefix}/var/www/jsCalendar @echo "***********************************************************" @echo "** The jsCalendar plugin was successfully uninstalled. **" @echo "***********************************************************" install-jquery-plugins: @echo "***********************************************************" @echo "** Installing various jQuery plugins, please wait... **" @echo "***********************************************************" mkdir -p ${prefix}/var/www/js mkdir -p $(prefix)/var/www/css (cd ${prefix}/var/www/js && \ wget http://code.jquery.com/jquery-1.7.1.min.js && \ mv jquery-1.7.1.min.js jquery.min.js && \ wget http://ajax.googleapis.com/ajax/libs/jqueryui/1.8.17/jquery-ui.min.js && \ wget http://invenio-software.org/download/jquery/v1.5/js/jquery.jeditable.mini.js && \ wget https://raw.github.com/malsup/form/master/jquery.form.js --no-check-certificate && \ wget http://jquery-multifile-plugin.googlecode.com/svn/trunk/jquery.MultiFile.pack.js && \ wget -O jquery.tablesorter.zip http://invenio-software.org/download/jquery/jquery.tablesorter.20111208.zip && \ wget http://invenio-software.org/download/jquery/uploadify-v2.1.4.zip -O uploadify.zip && \ wget http://www.datatables.net/download/build/jquery.dataTables.min.js && \ wget http://invenio-software.org/download/jquery/jquery.bookmark.package-1.4.0.zip && \ unzip jquery.tablesorter.zip && \ rm jquery.tablesorter.zip && \ rm -rf uploadify && \ unzip -u uploadify.zip -d uploadify && \ wget http://flot.googlecode.com/files/flot-0.6.zip && \ wget http://trentrichardson.com/examples/timepicker/jquery-ui-timepicker-addon.js && \ unzip -u flot-0.6.zip && \ mv flot/jquery.flot.selection.min.js flot/jquery.flot.min.js flot/excanvas.min.js ./ && \ rm flot-0.6.zip && rm -r flot && \ mv uploadify/swfobject.js ./ && \ mv uploadify/cancel.png uploadify/uploadify.css uploadify/uploadify.allglyphs.swf uploadify/uploadify.fla uploadify/uploadify.swf ../img/ && \ mv uploadify/jquery.uploadify.v2.1.4.min.js ./jquery.uploadify.min.js && \ rm uploadify.zip && rm -r uploadify && \ wget --no-check-certificate https://github.com/douglascrockford/JSON-js/raw/master/json2.js && \ wget https://raw.github.com/jeresig/jquery.hotkeys/master/jquery.hotkeys.js --no-check-certificate && \ wget http://jquery.bassistance.de/treeview/jquery.treeview.zip && \ unzip jquery.treeview.zip -d jquery-treeview && \ rm jquery.treeview.zip && \ wget http://invenio-software.org/download/jquery/v1.5/js/jquery.ajaxPager.js && \ unzip jquery.bookmark.package-1.4.0.zip && \ rm -f jquery.bookmark.ext.* bookmarks-big.png bookmarkBasic.html jquery.bookmark.js jquery.bookmark.pack.js && \ mv bookmarks.png ../img/ && \ mv jquery.bookmark.css ../css/ && \ rm -f jquery.bookmark.package-1.4.0.zip && \ mkdir -p ${prefix}/var/www/img && \ cd ${prefix}/var/www/img && \ wget -r -np -nH --cut-dirs=4 -A "png,css" -P jquery-ui/themes http://jquery-ui.googlecode.com/svn/tags/1.8.17/themes/base/ && \ wget -r -np -nH --cut-dirs=4 -A "png,css" -P jquery-ui/themes http://jquery-ui.googlecode.com/svn/tags/1.8.17/themes/smoothness/ && \ wget -r -np -nH --cut-dirs=4 -A "png,css" -P jquery-ui/themes http://jquery-ui.googlecode.com/svn/tags/1.8.17/themes/redmond/ && \ wget --no-check-certificate -O datatables_jquery-ui.css https://github.com/DataTables/DataTables/raw/master/media/css/demo_table_jui.css && \ wget http://jquery-ui.googlecode.com/svn/tags/1.8.17/themes/redmond/jquery-ui.css && \ wget http://jquery-ui.googlecode.com/svn/tags/1.8.17/demos/images/calendar.gif && \ wget -r -np -nH --cut-dirs=5 -A "png" http://jquery-ui.googlecode.com/svn/tags/1.8.17/themes/redmond/images/) @echo "***********************************************************" @echo "** The jQuery plugins were successfully installed. **" @echo "***********************************************************" uninstall-jquery-plugins: (cd ${prefix}/var/www/js && \ rm -f jquery.min.js && \ rm -f jquery.MultiFile.pack.js && \ rm -f jquery.jeditable.mini.js && \ rm -f jquery.flot.selection.min.js && \ rm -f jquery.flot.min.js && \ rm -f excanvas.min.js && \ rm -f jquery-ui-timepicker-addon.min.js && \ rm -f jquery.tablesorter.js && \ rm -f jquery.tablesorter.pager.js && \ rm -f json2.js && \ rm -f jquery.uploadify.min.js && \ rm -rf tablesorter && \ rm -rf jquery-treeview && \ rm -f jquery.ajaxPager.js && \ rm -f jquery.form.js && \ rm -f jquery.dataTables.min.js && \ rm -f ui.core.js && \ rm -f jquery.bookmark.min.js && \ rm -f jquery.hotkeys.js && \ rm -f jquery.tablesorter.min.js && \ rm -f jquery-ui-1.7.3.custom.min.js && \ rm -f jquery.metadata.js && \ rm -f jquery-latest.js && \ rm -f jquery-ui.min.js) (cd ${prefix}/var/www/img && \ rm -f cancel.png uploadify.css uploadify.swf uploadify.allglyphs.swf uploadify.fla && \ rm -f datatables_jquery-ui.css \ rm -f bookmarks.png) && \ (cd ${prefix}/var/www/css && \ rm -f jquery.bookmark.css) @echo "***********************************************************" @echo "** The jquery plugins were successfully uninstalled. **" @echo "***********************************************************" install-ckeditor-plugin: @echo "***********************************************************" @echo "** Installing CKeditor plugin, please wait... **" @echo "***********************************************************" rm -rf ${prefix}/lib/python/invenio/ckeditor/ rm -rf /tmp/invenio-ckeditor-plugin mkdir /tmp/invenio-ckeditor-plugin (cd /tmp/invenio-ckeditor-plugin && \ wget 'http://download.cksource.com/CKEditor/CKEditor/CKEditor%20$(CKV)/$(CKEDITOR)' && \ unzip -u -d ${prefix}/var/www $(CKEDITOR)) && \ find ${prefix}/var/www/ckeditor/ -depth -name '_*' -exec rm -rf {} \; && \ find ${prefix}/var/www/ckeditor/ckeditor* -maxdepth 0 ! -name "ckeditor.js" -exec rm -r {} \; && \ rm -fr /tmp/invenio-ckeditor-plugin @echo "* Installing Invenio-specific CKeditor config..." (cd $(top_srcdir)/modules/webstyle/etc && make install) @echo "***********************************************************" @echo "** The CKeditor plugin was successfully installed. **" @echo "** Please do not forget to properly set the option **" @echo "** CFG_WEBCOMMENT_USE_RICH_TEXT_EDITOR in invenio.conf. **" @echo "***********************************************************" uninstall-ckeditor-plugin: @rm -rvf ${prefix}/var/www/ckeditor @rm -rvf ${prefix}/lib/python/invenio/ckeditor @echo "***********************************************************" @echo "** The CKeditor plugin was successfully uninstalled. **" @echo "***********************************************************" install-pdfa-helper-files: @echo "***********************************************************" @echo "** Installing PDF/A helper files, please wait... **" @echo "***********************************************************" wget 'http://invenio-software.org/download/invenio-demo-site-files/ISOCoatedsb.icc' -O ${prefix}/etc/websubmit/file_converter_templates/ISOCoatedsb.icc @echo "***********************************************************" @echo "** The PDF/A helper files were successfully installed. **" @echo "***********************************************************" install-mediaelement: @echo "***********************************************************" @echo "** MediaElement.js, please wait... **" @echo "***********************************************************" rm -rf /tmp/mediaelement mkdir /tmp/mediaelement wget 'http://github.com/johndyer/mediaelement/zipball/master' -O '/tmp/mediaelement/mediaelement.zip' --no-check-certificate unzip -u -d '/tmp/mediaelement' '/tmp/mediaelement/mediaelement.zip' rm -rf ${prefix}/var/www/mediaelement mkdir ${prefix}/var/www/mediaelement mv /tmp/mediaelement/johndyer-mediaelement-*/build/* ${prefix}/var/www/mediaelement rm -rf /tmp/mediaelement @echo "***********************************************************" @echo "** MediaElement.js was successfully installed. **" @echo "***********************************************************" uninstall-pdfa-helper-files: rm -f ${prefix}/etc/websubmit/file_converter_templates/ISOCoatedsb.icc @echo "***********************************************************" @echo "** The PDF/A helper files were successfully uninstalled. **" @echo "***********************************************************" #Solrutils allows automatic installation, running and searching of an external Solr index. install-solrutils: @echo "***********************************************************" @echo "** Installing Solrutils and solr, please wait... **" @echo "***********************************************************" cd $(prefix)/lib && \ if test -d apache-solr*; then echo A solr directory already exists in `pwd` . \ Please remove it manually, if you are sure it is not needed; exit 2; fi ; \ if test -f apache-solr*; then echo solr tarball already exists in `pwd` . \ Please remove it manually.; exit 2; fi ; \ wget http://www.apache.org/dist//lucene/solr/3.1.0/apache-solr-3.1.0.tgz && \ tar -xzf apache-solr-3.1.0.tgz && \ rm apache-solr-3.1.0.tgz cd $(solrdir)/contrib/ ;\ wget http://mirrors.ibiblio.org/pub/mirrors/maven2/com/jcraft/jzlib/1.0.7/jzlib-1.0.7.jar && \ cd $(solrdir)/contrib/ ;\ jar -xf ../example/webapps/solr.war WEB-INF/lib/lucene-core-3.1.0.jar ; \ if test -d basic-lucene-libs; then rm -rf basic-lucene-libs; fi ; \ mv WEB-INF/lib/ basic-lucene-libs ; \ cp $(solrutils_dir)/schema.xml $(solrdir)/example/solr/conf/ cp $(solrutils_dir)/solrconfig.xml $(solrdir)/example/solr/conf/ cd $(solrutils_dir) && \ javac -classpath $(CLASSPATH) -d $(solrdir)/contrib @$(solrutils_dir)/java_sources.txt && \ cd $(solrdir)/contrib/ && \ jar -cf invenio-solr.jar org/invenio_software/solr/*class update-v0.3.0-tables update-v0.3.1-tables: echo "ALTER TABLE idxINDEXNAME CHANGE id_idxINDEX id_idxINDEX mediumint(9) unsigned NOT NULL FIRST;" | ${prefix}/bin/dbexec echo "ALTER TABLE rnkMETHODNAME CHANGE id_rnkMETHOD id_rnkMETHOD mediumint(9) unsigned NOT NULL FIRST;" | ${prefix}/bin/dbexec echo "ALTER TABLE collectionname CHANGE id_collection id_collection mediumint(9) unsigned NOT NULL FIRST;" | ${prefix}/bin/dbexec echo "ALTER TABLE formatname CHANGE id_format id_format mediumint(9) unsigned NOT NULL FIRST;" | ${prefix}/bin/dbexec echo "ALTER TABLE fieldname CHANGE id_field id_field mediumint(9) unsigned NOT NULL FIRST;" | ${prefix}/bin/dbexec echo "INSERT INTO accACTION (id,name,description,allowedkeywords,optional) VALUES (NULL,'runbibrank','run BibRank','','no');" | ${prefix}/bin/dbexec echo "INSERT INTO accACTION (id,name,description,allowedkeywords,optional) VALUES (NULL,'cfgbibrank','configure BibRank','','no');" | ${prefix}/bin/dbexec update-v0.3.2-tables: echo "ALTER TABLE sbmCOLLECTION_sbmDOCTYPE CHANGE id_son id_son char(10) NOT NULL default '0';" | ${prefix}/bin/dbexec update-v0.3.3-tables: ${prefix}/bin/dbexec < $(top_srcdir)/modules/miscutil/sql/tabcreate.sql echo "ALTER TABLE flxLINKTYPEPARAMS CHANGE pname pname varchar(78) NOT NULL default '';" | ${prefix}/bin/dbexec echo "ALTER TABLE rnkMETHOD DROP star_category_ranges;" | ${prefix}/bin/dbexec echo "DROP TABLE rnkSET;" | ${prefix}/bin/dbexec echo "ALTER TABLE schTASK CHANGE arguments arguments LONGTEXT;" | ${prefix}/bin/dbexec echo "ALTER TABLE schTASK CHANGE status status varchar(50);" | ${prefix}/bin/dbexec update-v0.5.0-tables: ${prefix}/bin/dbexec < $(top_srcdir)/modules/miscutil/sql/tabcreate.sql echo "ALTER TABLE session ADD INDEX uid (uid);" | ${prefix}/bin/dbexec echo "UPDATE idxINDEXNAME SET ln='cs' WHERE ln='cz';" | ${prefix}/bin/dbexec echo "UPDATE rnkMETHODNAME SET ln='cs' WHERE ln='cz';" | ${prefix}/bin/dbexec echo "UPDATE collectionname SET ln='cs' WHERE ln='cz';" | ${prefix}/bin/dbexec echo "UPDATE collection_portalbox SET ln='cs' WHERE ln='cz';" | ${prefix}/bin/dbexec echo "UPDATE formatname SET ln='cs' WHERE ln='cz';" | ${prefix}/bin/dbexec echo "UPDATE fieldname SET ln='cs' WHERE ln='cz';" | ${prefix}/bin/dbexec echo "UPDATE idxINDEXNAME SET ln='sv' WHERE ln='se';" | ${prefix}/bin/dbexec echo "UPDATE rnkMETHODNAME SET ln='sv' WHERE ln='se';" | ${prefix}/bin/dbexec echo "UPDATE collectionname SET ln='sv' WHERE ln='se';" | ${prefix}/bin/dbexec echo "UPDATE collection_portalbox SET ln='sv' WHERE ln='se';" | ${prefix}/bin/dbexec echo "UPDATE formatname SET ln='sv' WHERE ln='se';" | ${prefix}/bin/dbexec echo "UPDATE fieldname SET ln='sv' WHERE ln='se';" | ${prefix}/bin/dbexec update-v0.7.1-tables: echo "DROP TABLE oaiHARVEST;" | ${prefix}/bin/dbexec ${prefix}/bin/dbexec < $(top_srcdir)/modules/miscutil/sql/tabcreate.sql echo "INSERT INTO accACTION (id,name,description,allowedkeywords,optional) VALUES (NULL,'cfgbibharvest','configure BibHarvest','','no');" | ${prefix}/bin/dbexec echo "INSERT INTO accACTION (id,name,description,allowedkeywords,optional) VALUES (NULL,'runoaiharvest','run BibHarvest oaiharvest','','no');" | ${prefix}/bin/dbexec echo "INSERT INTO accACTION (id,name,description,allowedkeywords,optional) VALUES (NULL,'cfgwebcomment','configure WebComment','','no');" | ${prefix}/bin/dbexec echo "INSERT INTO accACTION (id,name,description,allowedkeywords,optional) VALUES (NULL,'runoaiarchive','run BibHarvest oaiarchive','','no');" | ${prefix}/bin/dbexec echo "INSERT INTO accACTION (id,name,description,allowedkeywords,optional) VALUES (NULL,'runbibedit','run BibEdit','','no');" | ${prefix}/bin/dbexec echo "ALTER TABLE user ADD nickname varchar(255) NOT NULL default '';" | ${prefix}/bin/dbexec echo "ALTER TABLE user ADD last_login datetime NOT NULL default '0000-00-00 00:00:00';" | ${prefix}/bin/dbexec echo "ALTER TABLE user ADD INDEX nickname (nickname);" | ${prefix}/bin/dbexec echo "ALTER TABLE sbmFIELD CHANGE subname subname varchar(13) default NULL;" | ${prefix}/bin/dbexec echo "ALTER TABLE user_query_basket CHANGE alert_name alert_name varchar(30) NOT NULL default '';" | ${prefix}/bin/dbexec echo "TRUNCATE TABLE session;" | ${prefix}/bin/dbexec @echo "**********************************************************" @echo "** Do not forget to run the basket migration now: **" @echo "** @PYTHON@ modules/webbasket/lib/webbasket_migration_kit.py " @echo "** Please see the RELEASE-NOTES for details. **" @echo "**********************************************************" @echo "INSERT INTO oaiARCHIVE (id, setName, setSpec, setDescription, setDefinition, setRecList) SELECT id, setName, setSpec, CONCAT_WS('', setDescription), setDefinition, setRecList FROM oaiSET;" update-v0.90.0-tables: ${prefix}/bin/dbexec < $(top_srcdir)/modules/miscutil/sql/tabcreate.sql echo "ALTER TABLE format ADD COLUMN (description varchar(255) default '');" | ${prefix}/bin/dbexec echo "ALTER TABLE format ADD COLUMN (content_type varchar(255) default '');" | ${prefix}/bin/dbexec update-v0.90.1-tables: ${prefix}/bin/dbexec < $(top_srcdir)/modules/miscutil/sql/tabcreate.sql echo "ALTER TABLE schTASK ADD INDEX status (status);" | ${prefix}/bin/dbexec echo "ALTER TABLE schTASK ADD INDEX runtime (runtime);" | ${prefix}/bin/dbexec echo "ALTER TABLE sbmCATEGORIES ADD COLUMN score TINYINT UNSIGNED NOT NULL DEFAULT 0;" | ${prefix}/bin/dbexec echo "ALTER TABLE sbmCATEGORIES ADD PRIMARY KEY (doctype, sname);" | ${prefix}/bin/dbexec echo "ALTER TABLE sbmCATEGORIES ADD KEY doctype (doctype);" | ${prefix}/bin/dbexec echo "ALTER TABLE oaiHARVEST ADD COLUMN setspecs TEXT NOT NULL DEFAULT '';" | ${prefix}/bin/dbexec echo "ALTER TABLE oaiARCHIVE CHANGE setDescription setDescription text NOT NULL default '';" | ${prefix}/bin/dbexec echo "ALTER TABLE oaiARCHIVE CHANGE p1 p1 text NOT NULL default '';" | ${prefix}/bin/dbexec echo "ALTER TABLE oaiARCHIVE CHANGE f1 f1 text NOT NULL default '';" | ${prefix}/bin/dbexec echo "ALTER TABLE oaiARCHIVE CHANGE m1 m1 text NOT NULL default '';" | ${prefix}/bin/dbexec echo "ALTER TABLE oaiARCHIVE CHANGE p2 p2 text NOT NULL default '';" | ${prefix}/bin/dbexec echo "ALTER TABLE oaiARCHIVE CHANGE f2 f2 text NOT NULL default '';" | ${prefix}/bin/dbexec echo "ALTER TABLE oaiARCHIVE CHANGE m2 m2 text NOT NULL default '';" | ${prefix}/bin/dbexec echo "ALTER TABLE oaiARCHIVE CHANGE p3 p3 text NOT NULL default '';" | ${prefix}/bin/dbexec echo "ALTER TABLE oaiARCHIVE CHANGE f3 f3 text NOT NULL default '';" | ${prefix}/bin/dbexec echo "ALTER TABLE oaiARCHIVE CHANGE m3 m3 text NOT NULL default '';" | ${prefix}/bin/dbexec echo "UPDATE bibdoc SET status=0 WHERE status='';" | ${prefix}/bin/dbexec echo "UPDATE bibdoc SET status=1 WHERE status='deleted';" | ${prefix}/bin/dbexec echo "ALTER TABLE fmtKNOWLEDGEBASES add COLUMN kbtype char default NULL;" | ${prefix}/bin/dbexec update-v0.92.0-tables: echo "UPDATE bibdoc SET status=0 WHERE status='';" | ${prefix}/bin/dbexec echo "UPDATE bibdoc SET status=1 WHERE status='deleted';" | ${prefix}/bin/dbexec echo "ALTER TABLE schTASK CHANGE arguments arguments mediumblob;" | ${prefix}/bin/dbexec echo "UPDATE user SET note=1 WHERE nickname='admin' AND note IS NULL;" | ${prefix}/bin/dbexec echo "ALTER TABLE usergroup CHANGE name name varchar(255) NOT NULL default '';" | ${prefix}/bin/dbexec echo "ALTER TABLE usergroup ADD login_method varchar(255) NOT NULL default 'INTERNAL';" | ${prefix}/bin/dbexec echo "ALTER TABLE usergroup ADD UNIQUE KEY login_method_name (login_method(70), name);" | ${prefix}/bin/dbexec echo "ALTER TABLE user CHANGE settings settings blob default NULL;" | ${prefix}/bin/dbexec echo "INSERT INTO sbmALLFUNCDESCR VALUES ('Get_Recid', 'This function gets the recid for a document with a given report-number (as stored in the global variable rn).');" | ${prefix}/bin/dbexec update-v0.92.1-tables: echo "DROP TABLE rnkCITATIONDATA;" | ${prefix}/bin/dbexec ${prefix}/bin/dbexec < $(top_srcdir)/modules/miscutil/sql/tabcreate.sql echo "UPDATE bibdoc SET status='DELETED' WHERE status='1';" | ${prefix}/bin/dbexec echo "UPDATE bibdoc SET status='' WHERE status='0';" | ${prefix}/bin/dbexec echo "ALTER TABLE bibrec ADD KEY creation_date (creation_date);" | ${prefix}/bin/dbexec echo "ALTER TABLE bibrec ADD KEY modification_date (modification_date);" | ${prefix}/bin/dbexec echo "ALTER TABLE bibdoc ADD KEY creation_date (creation_date);" | ${prefix}/bin/dbexec echo "ALTER TABLE bibdoc ADD KEY modification_date (modification_date);" | ${prefix}/bin/dbexec echo "ALTER TABLE bibdoc ADD KEY docname (docname);" | ${prefix}/bin/dbexec echo "ALTER TABLE oaiHARVEST CHANGE postprocess postprocess varchar(20) NOT NULL default 'h';" | ${prefix}/bin/dbexec echo "ALTER TABLE oaiHARVEST ADD COLUMN bibfilterprogram varchar(255) NOT NULL default '';" | ${prefix}/bin/dbexec echo "ALTER TABLE idxINDEXNAME CHANGE ln ln char(5) NOT NULL default '';" | ${prefix}/bin/dbexec echo "ALTER TABLE idxINDEX ADD COLUMN stemming_language VARCHAR(10) NOT NULL default '';" | ${prefix}/bin/dbexec echo "ALTER TABLE rnkMETHODNAME CHANGE ln ln char(5) NOT NULL default '';" | ${prefix}/bin/dbexec echo "ALTER TABLE rnkDOWNLOADS CHANGE id_bibdoc id_bibdoc mediumint(9) unsigned default NULL;" | ${prefix}/bin/dbexec echo "ALTER TABLE rnkDOWNLOADS CHANGE file_format file_format varchar(10) NULL default NULL;" | ${prefix}/bin/dbexec echo "ALTER TABLE collectionname CHANGE ln ln char(5) NOT NULL default '';" | ${prefix}/bin/dbexec echo "ALTER TABLE collection_portalbox CHANGE ln ln char(5) NOT NULL default '';" | ${prefix}/bin/dbexec echo "ALTER TABLE format ADD COLUMN visibility TINYINT NOT NULL default 1;" | ${prefix}/bin/dbexec echo "ALTER TABLE formatname CHANGE ln ln char(5) NOT NULL default '';" | ${prefix}/bin/dbexec echo "ALTER TABLE fieldname CHANGE ln ln char(5) NOT NULL default '';" | ${prefix}/bin/dbexec echo "ALTER TABLE accROLE ADD COLUMN firerole_def_ser blob NULL;" | ${prefix}/bin/dbexec echo "ALTER TABLE accROLE ADD COLUMN firerole_def_src text NULL;" | ${prefix}/bin/dbexec echo "ALTER TABLE user_accROLE ADD COLUMN expiration datetime NOT NULL default '9999-12-31 23:59:59';" | ${prefix}/bin/dbexec echo "ALTER TABLE user DROP INDEX id, ADD PRIMARY KEY id (id);" | ${prefix}/bin/dbexec echo -e 'from invenio.dbquery import run_sql;\ map(lambda index_id: run_sql("ALTER TABLE idxPHRASE%02dF CHANGE term term TEXT NULL DEFAULT NULL, DROP INDEX term, ADD INDEX term (term (50))" % index_id[0]), run_sql("select id from idxINDEX"))' | $(PYTHON) echo "INSERT INTO rnkCITATIONDATA VALUES (1,'citationdict','','');" | ${prefix}/bin/dbexec echo "INSERT INTO rnkCITATIONDATA VALUES (2,'reversedict','','');" | ${prefix}/bin/dbexec echo "INSERT INTO rnkCITATIONDATA VALUES (3,'selfcitdict','','');" | ${prefix}/bin/dbexec update-v0.99.0-tables: ${prefix}/bin/dbexec < $(top_srcdir)/modules/miscutil/sql/tabcreate.sql echo "ALTER TABLE bibdoc ADD COLUMN more_info mediumblob NULL default NULL;" | ${prefix}/bin/dbexec echo "ALTER TABLE schTASK ADD COLUMN priority tinyint(4) NOT NULL default 0;" | ${prefix}/bin/dbexec echo "ALTER TABLE schTASK ADD KEY priority (priority);" | ${prefix}/bin/dbexec echo "ALTER TABLE rnkCITATIONDATA DROP PRIMARY KEY;" | ${prefix}/bin/dbexec echo "ALTER TABLE rnkCITATIONDATA ADD PRIMARY KEY (id);" | ${prefix}/bin/dbexec echo "ALTER TABLE rnkCITATIONDATA CHANGE id id mediumint(8) unsigned NOT NULL auto_increment;" | ${prefix}/bin/dbexec echo "ALTER TABLE rnkCITATIONDATA ADD UNIQUE KEY object_name (object_name);" | ${prefix}/bin/dbexec echo "ALTER TABLE sbmPARAMETERS CHANGE value value text NOT NULL default '';" | ${prefix}/bin/dbexec echo "ALTER TABLE sbmAPPROVAL ADD note text NOT NULL default '';" | ${prefix}/bin/dbexec echo "ALTER TABLE hstDOCUMENT CHANGE docsize docsize bigint(15) unsigned NOT NULL;" | ${prefix}/bin/dbexec echo "ALTER TABLE cmtACTIONHISTORY CHANGE client_host client_host int(10) unsigned default NULL;" | ${prefix}/bin/dbexec update-v0.99.1-tables: @echo "Nothing to do; table structure did not change between v0.99.1 and v0.99.2." update-v0.99.2-tables: @echo "Nothing to do; table structure did not change between v0.99.2 and v0.99.3." update-v0.99.3-tables: @echo "Nothing to do; table structure did not change between v0.99.3 and v0.99.4." update-v0.99.4-tables: @echo "Nothing to do; table structure did not change between v0.99.4 and v0.99.5." update-v0.99.5-tables: # from v0.99.5 to v1.0.0-rc0 echo "RENAME TABLE oaiARCHIVE TO oaiREPOSITORY;" | ${prefix}/bin/dbexec ${prefix}/bin/dbexec < $(top_srcdir)/modules/miscutil/sql/tabcreate.sql echo "INSERT INTO knwKB (id,name,description,kbtype) SELECT id,name,description,'' FROM fmtKNOWLEDGEBASES;" | ${prefix}/bin/dbexec echo "INSERT INTO knwKBRVAL (id,m_key,m_value,id_knwKB) SELECT id,m_key,m_value,id_fmtKNOWLEDGEBASES FROM fmtKNOWLEDGEBASEMAPPINGS;" | ${prefix}/bin/dbexec echo "ALTER TABLE sbmPARAMETERS CHANGE name name varchar(40) NOT NULL default '';" | ${prefix}/bin/dbexec echo "ALTER TABLE bibdoc CHANGE docname docname varchar(250) COLLATE utf8_bin NOT NULL default 'file';" | ${prefix}/bin/dbexec echo "ALTER TABLE bibdoc CHANGE status status text NOT NULL default '';" | ${prefix}/bin/dbexec echo "ALTER TABLE bibdoc ADD COLUMN text_extraction_date datetime NOT NULL default '0000-00-00';" | ${prefix}/bin/dbexec echo "ALTER TABLE collection DROP COLUMN restricted;" | ${prefix}/bin/dbexec echo "ALTER TABLE schTASK CHANGE host host varchar(255) NOT NULL default '';" | ${prefix}/bin/dbexec echo "ALTER TABLE hstTASK CHANGE host host varchar(255) NOT NULL default '';" | ${prefix}/bin/dbexec echo "ALTER TABLE bib85x DROP INDEX kv, ADD INDEX kv (value(100));" | ${prefix}/bin/dbexec echo "UPDATE clsMETHOD SET location='http://invenio-software.org/download/invenio-demo-site-files/HEP.rdf' WHERE name='HEP' AND location='';" | ${prefix}/bin/dbexec echo "UPDATE clsMETHOD SET location='http://invenio-software.org/download/invenio-demo-site-files/NASA-subjects.rdf' WHERE name='NASA-subjects' AND location='';" | ${prefix}/bin/dbexec echo "UPDATE accACTION SET name='runoairepository', description='run oairepositoryupdater task' WHERE name='runoaiarchive';" | ${prefix}/bin/dbexec echo "UPDATE accACTION SET name='cfgoaiharvest', description='configure OAI Harvest' WHERE name='cfgbibharvest';" | ${prefix}/bin/dbexec echo "ALTER TABLE accARGUMENT CHANGE value value varchar(255);" | ${prefix}/bin/dbexec echo "UPDATE accACTION SET allowedkeywords='doctype,act,categ' WHERE name='submit';" | ${prefix}/bin/dbexec echo "INSERT INTO accARGUMENT(keyword,value) VALUES ('categ','*');" | ${prefix}/bin/dbexec echo "INSERT INTO accROLE_accACTION_accARGUMENT(id_accROLE,id_accACTION,id_accARGUMENT,argumentlistid) SELECT DISTINCT raa.id_accROLE,raa.id_accACTION,accARGUMENT.id,raa.argumentlistid FROM accROLE_accACTION_accARGUMENT as raa JOIN accACTION on id_accACTION=accACTION.id,accARGUMENT WHERE accACTION.name='submit' and accARGUMENT.keyword='categ' and accARGUMENT.value='*';" | ${prefix}/bin/dbexec echo "UPDATE accACTION SET allowedkeywords='name,with_editor_rights' WHERE name='cfgwebjournal';" | ${prefix}/bin/dbexec echo "INSERT INTO accARGUMENT(keyword,value) VALUES ('with_editor_rights','yes');" | ${prefix}/bin/dbexec echo "INSERT INTO accROLE_accACTION_accARGUMENT(id_accROLE,id_accACTION,id_accARGUMENT,argumentlistid) SELECT DISTINCT raa.id_accROLE,raa.id_accACTION,accARGUMENT.id,raa.argumentlistid FROM accROLE_accACTION_accARGUMENT as raa JOIN accACTION on id_accACTION=accACTION.id,accARGUMENT WHERE accACTION.name='cfgwebjournal' and accARGUMENT.keyword='with_editor_rights' and accARGUMENT.value='yes';" | ${prefix}/bin/dbexec echo "ALTER TABLE bskEXTREC CHANGE id id int(15) unsigned NOT NULL auto_increment;" | ${prefix}/bin/dbexec echo "ALTER TABLE bskEXTREC ADD external_id int(15) NOT NULL default '0';" | ${prefix}/bin/dbexec echo "ALTER TABLE bskEXTREC ADD collection_id int(15) unsigned NOT NULL default '0';" | ${prefix}/bin/dbexec echo "ALTER TABLE bskEXTREC ADD original_url text;" | ${prefix}/bin/dbexec echo "ALTER TABLE cmtRECORDCOMMENT ADD status char(2) NOT NULL default 'ok';" | ${prefix}/bin/dbexec echo "ALTER TABLE cmtRECORDCOMMENT ADD KEY status (status);" | ${prefix}/bin/dbexec echo "INSERT INTO sbmALLFUNCDESCR VALUES ('Move_Photos_to_Storage','Attach/edit the pictures uploaded with the \"create_photos_manager_interface()\" function');" | ${prefix}/bin/dbexec echo "INSERT INTO sbmFIELDDESC VALUES ('Upload_Photos',NULL,'','R',NULL,NULL,NULL,NULL,NULL,'\"\"\"\r\nThis is an example of element that creates a photos upload interface.\r\nClone it, customize it and integrate it into your submission. Then add function \r\n\'Move_Photos_to_Storage\' to your submission functions list, in order for files \r\nuploaded with this interface to be attached to the record. More information in \r\nthe WebSubmit admin guide.\r\n\"\"\"\r\n\r\nfrom invenio.websubmit_functions.ParamFile import ParamFromFile\r\nfrom invenio.websubmit_functions.Move_Photos_to_Storage import read_param_file, create_photos_manager_interface, get_session_id\r\n\r\n# Retrieve session id\r\ntry:\r\n # User info is defined only in MBI/MPI actions...\r\n session_id = get_session_id(None, uid, user_info) \r\nexcept:\r\n session_id = get_session_id(req, uid, {})\r\n\r\n# Retrieve context\r\nindir = curdir.split(\'/\')[-3]\r\ndoctype = curdir.split(\'/\')[-2]\r\naccess = curdir.split(\'/\')[-1]\r\n\r\n# Get the record ID, if any\r\nsysno = ParamFromFile(\"%s/%s\" % (curdir,\'SN\')).strip()\r\n\r\n\"\"\"\r\nModify below the configuration of the photos manager interface.\r\nNote: \'can_reorder_photos\' parameter is not yet fully taken into consideration\r\n\r\nDocumentation of the function is available by running:\r\necho -e \'from invenio.websubmit_functions.Move_Photos_to_Storage import create_photos_manager_interface as f\\nprint f.__doc__\' | python\r\n\"\"\"\r\ntext += create_photos_manager_interface(sysno, session_id, uid,\r\n doctype, indir, curdir, access,\r\n can_delete_photos=True,\r\n can_reorder_photos=True,\r\n can_upload_photos=True,\r\n editor_width=700,\r\n editor_height=400,\r\n initial_slider_value=100,\r\n max_slider_value=200,\r\n min_slider_value=80)','0000-00-00','0000-00-00',NULL,NULL,0);" | ${prefix}/bin/dbexec echo "INSERT INTO sbmFUNDESC VALUES ('Move_Photos_to_Storage','iconsize');" | ${prefix}/bin/dbexec echo "INSERT INTO sbmFIELDDESC VALUES ('Upload_Files',NULL,'','R',NULL,NULL,NULL,NULL,NULL,'\"\"\"\r\nThis is an example of element that creates a file upload interface.\r\nClone it, customize it and integrate it into your submission. Then add function \r\n\'Move_Uploaded_Files_to_Storage\' to your submission functions list, in order for files \r\nuploaded with this interface to be attached to the record. More information in \r\nthe WebSubmit admin guide.\r\n\"\"\"\r\nfrom invenio.websubmit_managedocfiles import create_file_upload_interface\r\nfrom invenio.websubmit_functions.Shared_Functions import ParamFromFile\r\n\r\nindir = ParamFromFile(os.path.join(curdir, \'indir\'))\r\ndoctype = ParamFromFile(os.path.join(curdir, \'doctype\'))\r\naccess = ParamFromFile(os.path.join(curdir, \'access\'))\r\ntry:\r\n sysno = int(ParamFromFile(os.path.join(curdir, \'SN\')).strip())\r\nexcept:\r\n sysno = -1\r\nln = ParamFromFile(os.path.join(curdir, \'ln\'))\r\n\r\n\"\"\"\r\nRun the following to get the list of parameters of function \'create_file_upload_interface\':\r\necho -e \'from invenio.websubmit_managedocfiles import create_file_upload_interface as f\\nprint f.__doc__\' | python\r\n\"\"\"\r\ntext = create_file_upload_interface(recid=sysno,\r\n print_outside_form_tag=False,\r\n include_headers=True,\r\n ln=ln,\r\n doctypes_and_desc=[(\'main\',\'Main document\'),\r\n (\'additional\',\'Figure, schema, etc.\')],\r\n can_revise_doctypes=[\'*\'],\r\n can_describe_doctypes=[\'main\'],\r\n can_delete_doctypes=[\'additional\'],\r\n can_rename_doctypes=[\'main\'],\r\n sbm_indir=indir, sbm_doctype=doctype, sbm_access=access)[1]\r\n','0000-00-00','0000-00-00',NULL,NULL,0);" | ${prefix}/bin/dbexec echo "INSERT INTO sbmFUNDESC VALUES ('Move_Uploaded_Files_to_Storage','forceFileRevision');" | ${prefix}/bin/dbexec echo "INSERT INTO sbmALLFUNCDESCR VALUES ('Create_Upload_Files_Interface','Display generic interface to add/revise/delete files. To be used before function \"Move_Uploaded_Files_to_Storage\"');" | ${prefix}/bin/dbexec echo "INSERT INTO sbmALLFUNCDESCR VALUES ('Move_Uploaded_Files_to_Storage','Attach files uploaded with \"Create_Upload_Files_Interface\"')" | ${prefix}/bin/dbexec echo "INSERT INTO sbmFUNDESC VALUES ('Move_Revised_Files_to_Storage','elementNameToDoctype');" | ${prefix}/bin/dbexec echo "INSERT INTO sbmFUNDESC VALUES ('Move_Revised_Files_to_Storage','createIconDoctypes');" | ${prefix}/bin/dbexec echo "INSERT INTO sbmFUNDESC VALUES ('Move_Revised_Files_to_Storage','createRelatedFormats');" | ${prefix}/bin/dbexec echo "INSERT INTO sbmFUNDESC VALUES ('Move_Revised_Files_to_Storage','iconsize');" | ${prefix}/bin/dbexec echo "INSERT INTO sbmFUNDESC VALUES ('Move_Revised_Files_to_Storage','keepPreviousVersionDoctypes');" | ${prefix}/bin/dbexec echo "INSERT INTO sbmALLFUNCDESCR VALUES ('Move_Revised_Files_to_Storage','Revise files initially uploaded with \"Move_Files_to_Storage\"')" | ${prefix}/bin/dbexec echo "INSERT INTO sbmFUNDESC VALUES ('Create_Upload_Files_Interface','maxsize');" | ${prefix}/bin/dbexec echo "INSERT INTO sbmFUNDESC VALUES ('Create_Upload_Files_Interface','minsize');" | ${prefix}/bin/dbexec echo "INSERT INTO sbmFUNDESC VALUES ('Create_Upload_Files_Interface','doctypes');" | ${prefix}/bin/dbexec echo "INSERT INTO sbmFUNDESC VALUES ('Create_Upload_Files_Interface','restrictions');" | ${prefix}/bin/dbexec echo "INSERT INTO sbmFUNDESC VALUES ('Create_Upload_Files_Interface','canDeleteDoctypes');" | ${prefix}/bin/dbexec echo "INSERT INTO sbmFUNDESC VALUES ('Create_Upload_Files_Interface','canReviseDoctypes');" | ${prefix}/bin/dbexec echo "INSERT INTO sbmFUNDESC VALUES ('Create_Upload_Files_Interface','canDescribeDoctypes');" | ${prefix}/bin/dbexec echo "INSERT INTO sbmFUNDESC VALUES ('Create_Upload_Files_Interface','canCommentDoctypes');" | ${prefix}/bin/dbexec echo "INSERT INTO sbmFUNDESC VALUES ('Create_Upload_Files_Interface','canKeepDoctypes');" | ${prefix}/bin/dbexec echo "INSERT INTO sbmFUNDESC VALUES ('Create_Upload_Files_Interface','canAddFormatDoctypes');" | ${prefix}/bin/dbexec echo "INSERT INTO sbmFUNDESC VALUES ('Create_Upload_Files_Interface','canRestrictDoctypes');" | ${prefix}/bin/dbexec echo "INSERT INTO sbmFUNDESC VALUES ('Create_Upload_Files_Interface','canRenameDoctypes');" | ${prefix}/bin/dbexec echo "INSERT INTO sbmFUNDESC VALUES ('Create_Upload_Files_Interface','canNameNewFiles');" | ${prefix}/bin/dbexec echo "INSERT INTO sbmFUNDESC VALUES ('Create_Upload_Files_Interface','createRelatedFormats');" | ${prefix}/bin/dbexec echo "INSERT INTO sbmFUNDESC VALUES ('Create_Upload_Files_Interface','keepDefault');" | ${prefix}/bin/dbexec echo "INSERT INTO sbmFUNDESC VALUES ('Create_Upload_Files_Interface','showLinks');" | ${prefix}/bin/dbexec echo "INSERT INTO sbmFUNDESC VALUES ('Create_Upload_Files_Interface','fileLabel');" | ${prefix}/bin/dbexec echo "INSERT INTO sbmFUNDESC VALUES ('Create_Upload_Files_Interface','filenameLabel');" | ${prefix}/bin/dbexec echo "INSERT INTO sbmFUNDESC VALUES ('Create_Upload_Files_Interface','descriptionLabel');" | ${prefix}/bin/dbexec echo "INSERT INTO sbmFUNDESC VALUES ('Create_Upload_Files_Interface','commentLabel');" | ${prefix}/bin/dbexec echo "INSERT INTO sbmFUNDESC VALUES ('Create_Upload_Files_Interface','restrictionLabel');" | ${prefix}/bin/dbexec echo "INSERT INTO sbmFUNDESC VALUES ('Create_Upload_Files_Interface','startDoc');" | ${prefix}/bin/dbexec echo "INSERT INTO sbmFUNDESC VALUES ('Create_Upload_Files_Interface','endDoc');" | ${prefix}/bin/dbexec echo "INSERT INTO sbmFUNDESC VALUES ('Create_Upload_Files_Interface','defaultFilenameDoctypes');" | ${prefix}/bin/dbexec echo "INSERT INTO sbmFUNDESC VALUES ('Create_Upload_Files_Interface','maxFilesDoctypes');" | ${prefix}/bin/dbexec echo "INSERT INTO sbmFUNDESC VALUES ('Move_Uploaded_Files_to_Storage','iconsize');" | ${prefix}/bin/dbexec echo "INSERT INTO sbmFUNDESC VALUES ('Move_Uploaded_Files_to_Storage','createIconDoctypes');" | ${prefix}/bin/dbexec echo "INSERT INTO sbmFUNDESC VALUES ('Report_Number_Generation','nblength');" | ${prefix}/bin/dbexec echo "INSERT INTO sbmFUNDESC VALUES ('Second_Report_Number_Generation','2nd_nb_length');" | ${prefix}/bin/dbexec echo "INSERT INTO sbmFUNDESC VALUES ('Get_Recid','record_search_pattern');" | ${prefix}/bin/dbexec echo "INSERT INTO sbmALLFUNCDESCR VALUES ('Move_FCKeditor_Files_to_Storage','Transfer files attached to the record with the FCKeditor');" | ${prefix}/bin/dbexec echo "INSERT INTO sbmFUNDESC VALUES ('Move_FCKeditor_Files_to_Storage','input_fields');" | ${prefix}/bin/dbexec echo "INSERT INTO sbmFUNDESC VALUES ('Stamp_Uploaded_Files','layer');" | ${prefix}/bin/dbexec echo "INSERT INTO sbmFUNDESC VALUES ('Stamp_Replace_Single_File_Approval','layer');" | ${prefix}/bin/dbexec echo "INSERT INTO sbmFUNDESC VALUES ('Stamp_Replace_Single_File_Approval','switch_file');" | ${prefix}/bin/dbexec echo "INSERT INTO sbmFUNDESC VALUES ('Stamp_Uploaded_Files','switch_file');" | ${prefix}/bin/dbexec echo "INSERT INTO sbmFUNDESC VALUES ('Move_Files_to_Storage','paths_and_restrictions');" | ${prefix}/bin/dbexec echo "INSERT INTO sbmFUNDESC VALUES ('Move_Files_to_Storage','paths_and_doctypes');" | ${prefix}/bin/dbexec echo "ALTER TABLE cmtRECORDCOMMENT ADD round_name varchar(255) NOT NULL default ''" | ${prefix}/bin/dbexec echo "ALTER TABLE cmtRECORDCOMMENT ADD restriction varchar(50) NOT NULL default ''" | ${prefix}/bin/dbexec echo "ALTER TABLE cmtRECORDCOMMENT ADD in_reply_to_id_cmtRECORDCOMMENT int(15) unsigned NOT NULL default '0'" | ${prefix}/bin/dbexec echo "ALTER TABLE cmtRECORDCOMMENT ADD KEY in_reply_to_id_cmtRECORDCOMMENT (in_reply_to_id_cmtRECORDCOMMENT);" | ${prefix}/bin/dbexec echo "ALTER TABLE bskRECORDCOMMENT ADD in_reply_to_id_bskRECORDCOMMENT int(15) unsigned NOT NULL default '0'" | ${prefix}/bin/dbexec echo "ALTER TABLE bskRECORDCOMMENT ADD KEY in_reply_to_id_bskRECORDCOMMENT (in_reply_to_id_bskRECORDCOMMENT);" | ${prefix}/bin/dbexec echo "ALTER TABLE cmtRECORDCOMMENT ADD reply_order_cached_data blob NULL default NULL;" | ${prefix}/bin/dbexec echo "ALTER TABLE bskRECORDCOMMENT ADD reply_order_cached_data blob NULL default NULL;" | ${prefix}/bin/dbexec echo "ALTER TABLE cmtRECORDCOMMENT ADD INDEX (reply_order_cached_data(40));" | ${prefix}/bin/dbexec echo "ALTER TABLE bskRECORDCOMMENT ADD INDEX (reply_order_cached_data(40));" | ${prefix}/bin/dbexec echo -e 'from invenio.webcommentadminlib import migrate_comments_populate_threads_index;\ migrate_comments_populate_threads_index()' | $(PYTHON) echo -e 'from invenio.access_control_firerole import repair_role_definitions;\ repair_role_definitions()' | $(PYTHON) update-v1.0.0-rc0-tables: # from v1.0.0-rc0 to v1.0.0 @echo "Nothing to do; table structure did not change between v1.0.0-rc0 and v1.0.0." update-v1.0.0-tables: # from v1.0.0 to v1.0.1 @echo "Nothing to do; table structure did not change between v1.0.0 and v1.0.1." update-v1.0.1-tables: # from v1.0.1 to v1.0.2 @echo "ALTER TABLE session ADD KEY session_expiry (session_expiry);" | ${prefix}/bin/dbexec update-db-from-v1.0-to-v1.1: # update DB from v1.0 release series to v1.1 release series ${prefix}/bin/dbexec < $(top_srcdir)/modules/miscutil/sql/tabcreate.sql echo "INSERT INTO sbmALLFUNCDESCR VALUES ('Set_Embargo','Set an embargo on all the documents of a given record.');" | ${prefix}/bin/dbexec echo "INSERT INTO sbmFUNDESC VALUES ('Set_Embargo','date_file');" | ${prefix}/bin/dbexec echo "INSERT INTO sbmFUNDESC VALUES ('Set_Embargo','date_format');" | ${prefix}/bin/dbexec echo "INSERT INTO sbmFUNDESC VALUES ('User_is_Record_Owner_or_Curator','curator_role');" | ${prefix}/bin/dbexec echo "INSERT INTO sbmFUNDESC VALUES ('User_is_Record_Owner_or_Curator','curator_flag');" | ${prefix}/bin/dbexec echo "INSERT INTO sbmFUNDESC VALUES ('Move_Photos_to_Storage','iconformat');" | ${prefix}/bin/dbexec echo "INSERT INTO format (name, code, description, content_type, visibility) VALUES ('Podcast', 'xp', 'Sample format suitable for multimedia feeds, such as podcasts', 'application/rss+xml', 0);" | ${prefix}/bin/dbexec echo "ALTER TABLE accMAILCOOKIE ADD INDEX expiration (expiration);" | ${prefix}/bin/dbexec echo "UPDATE sbmFUNDESC SET function='Move_CKEditor_Files_to_Storage' WHERE function='Move_FCKeditor_Files_to_Storage';" | ${prefix}/bin/dbexec echo "UPDATE sbmALLFUNCDESCR SET function='Move_CKEditor_Files_to_Storage', description='Transfer files attached to the record with the CKEditor' WHERE function='Move_FCKeditor_Files_to_Storage';" | ${prefix}/bin/dbexec echo "UPDATE sbmFUNCTIONS SET function='Move_CKEditor_Files_to_Storage' WHERE function='Move_FCKeditor_Files_to_Storage';" | ${prefix}/bin/dbexec echo "ALTER TABLE schTASK CHANGE proc proc varchar(255) NOT NULL;" | ${prefix}/bin/dbexec echo "ALTER TABLE schTASK ADD sequenceid int(8) NULL default NULL;" | ${prefix}/bin/dbexec echo "ALTER TABLE schTASK ADD INDEX sequenceid (sequenceid);" | ${prefix}/bin/dbexec echo "ALTER TABLE hstTASK CHANGE proc proc varchar(255) NOT NULL;" | ${prefix}/bin/dbexec echo "ALTER TABLE hstTASK ADD sequenceid int(8) NULL default NULL;" | ${prefix}/bin/dbexec echo "ALTER TABLE hstTASK ADD INDEX sequenceid (sequenceid);" | ${prefix}/bin/dbexec echo "ALTER TABLE session CHANGE session_object session_object longblob;" | ${prefix}/bin/dbexec echo "ALTER TABLE session CHANGE session_expiry session_expiry datetime NOT NULL default '0000-00-00 00:00:00';" | ${prefix}/bin/dbexec echo "ALTER TABLE oaiREPOSITORY CHANGE setSpec setSpec varchar(255) NOT NULL default 'GLOBAL_SET';" | ${prefix}/bin/dbexec echo "UPDATE oaiREPOSITORY SET setSpec='GLOBAL_SET' WHERE setSpec='';" | ${prefix}/bin/dbexec echo "ALTER TABLE user_query_basket ADD COLUMN alert_desc TEXT DEFAULT NULL AFTER alert_name;" | ${prefix}/bin/dbexec echo "INSERT INTO sbmALLFUNCDESCR VALUES ('Link_Records','Link two records toghether via MARC');" | ${prefix}/bin/dbexec echo "INSERT INTO sbmALLFUNCDESCR VALUES ('Video_Processing',NULL);" | ${prefix}/bin/dbexec echo "INSERT INTO sbmFUNDESC VALUES ('Link_Records','edsrn');" | ${prefix}/bin/dbexec echo "INSERT INTO sbmFUNDESC VALUES ('Link_Records','edsrn2');" | ${prefix}/bin/dbexec echo "INSERT INTO sbmFUNDESC VALUES ('Link_Records','directRelationship');" | ${prefix}/bin/dbexec echo "INSERT INTO sbmFUNDESC VALUES ('Link_Records','reverseRelationship');" | ${prefix}/bin/dbexec echo "INSERT INTO sbmFUNDESC VALUES ('Video_Processing','aspect');" | ${prefix}/bin/dbexec echo "INSERT INTO sbmFUNDESC VALUES ('Video_Processing','batch_template');" | ${prefix}/bin/dbexec echo "INSERT INTO sbmFUNDESC VALUES ('Video_Processing','title');" | ${prefix}/bin/dbexec echo "INSERT INTO sbmALLFUNCDESCR VALUES ('Set_RN_From_Sysno', 'Set the value of global rn variable to the report number identified by sysno (recid)');" | ${prefix}/bin/dbexec echo "INSERT INTO sbmFUNDESC VALUES ('Set_RN_From_Sysno','edsrn');" | ${prefix}/bin/dbexec echo "INSERT INTO sbmFUNDESC VALUES ('Set_RN_From_Sysno','rep_tags');" | ${prefix}/bin/dbexec echo "INSERT INTO sbmFUNDESC VALUES ('Set_RN_From_Sysno','record_search_pattern');" | ${prefix}/bin/dbexec echo "UPDATE externalcollection SET name='INSPIRE' where name='SPIRES HEP';" | ${prefix}/bin/dbexec echo "INSERT INTO sbmALLFUNCDESCR VALUES ('Notify_URL','Access URL, possibly to post content');" | ${prefix}/bin/dbexec echo "INSERT INTO sbmFUNDESC VALUES ('Notify_URL','url');" | ${prefix}/bin/dbexec echo "INSERT INTO sbmFUNDESC VALUES ('Notify_URL','data');" | ${prefix}/bin/dbexec echo "INSERT INTO sbmFUNDESC VALUES ('Notify_URL','admin_emails');" | ${prefix}/bin/dbexec echo "INSERT INTO sbmFUNDESC VALUES ('Notify_URL','content_type');" | ${prefix}/bin/dbexec echo "INSERT INTO sbmFUNDESC VALUES ('Notify_URL','attempt_times');" | ${prefix}/bin/dbexec echo "INSERT INTO sbmFUNDESC VALUES ('Notify_URL','attempt_sleeptime');" | ${prefix}/bin/dbexec echo "INSERT INTO sbmFUNDESC VALUES ('Notify_URL','user');" | ${prefix}/bin/dbexec echo "ALTER TABLE bibfmt DROP COLUMN id;" | ${prefix}/bin/dbexec echo "ALTER TABLE bibfmt ADD PRIMARY KEY (id_bibrec, format);" | ${prefix}/bin/dbexec echo "ALTER TABLE bibfmt DROP KEY id_bibrec;" | ${prefix}/bin/dbexec echo "ALTER TABLE bibfmt ADD KEY last_updated (last_updated);" | ${prefix}/bin/dbexec echo "ALTER TABLE user_query_basket ADD COLUMN alert_recipient TEXT DEFAULT NULL AFTER alert_desc;" | ${prefix}/bin/dbexec echo "ALTER TABLE format ADD COLUMN last_updated datetime NOT NULL default '0000-00-00' AFTER visibility;" | ${prefix}/bin/dbexec + echo "REPLACE INTO sbmFIELDDESC VALUES ('Upload_Files',NULL,'','R',NULL,NULL,NULL,NULL,NULL,'\"\"\"\r\nThis is an example of element that creates a file upload interface.\r\nClone it, customize it and integrate it into your submission. Then add function \r\n\'Move_Uploaded_Files_to_Storage\' to your submission functions list, in order for files \r\nuploaded with this interface to be attached to the record. More information in \r\nthe WebSubmit admin guide.\r\n\"\"\"\r\nfrom invenio.bibdocfile_managedocfiles import create_file_upload_interface\r\nfrom invenio.websubmit_functions.Shared_Functions import ParamFromFile\r\n\r\nindir = ParamFromFile(os.path.join(curdir, \'indir\'))\r\ndoctype = ParamFromFile(os.path.join(curdir, \'doctype\'))\r\naccess = ParamFromFile(os.path.join(curdir, \'access\'))\r\ntry:\r\n sysno = int(ParamFromFile(os.path.join(curdir, \'SN\')).strip())\r\nexcept:\r\n sysno = -1\r\nln = ParamFromFile(os.path.join(curdir, \'ln\'))\r\n\r\n\"\"\"\r\nRun the following to get the list of parameters of function \'create_file_upload_interface\':\r\necho -e \'from invenio.bibdocfile_managedocfiles import create_file_upload_interface as f\\nprint f.__doc__\' | python\r\n\"\"\"\r\ntext = create_file_upload_interface(recid=sysno,\r\n print_outside_form_tag=False,\r\n include_headers=True,\r\n ln=ln,\r\n doctypes_and_desc=[(\'main\',\'Main document\'),\r\n (\'additional\',\'Figure, schema, etc.\')],\r\n can_revise_doctypes=[\'*\'],\r\n can_describe_doctypes=[\'main\'],\r\n can_delete_doctypes=[\'additional\'],\r\n can_rename_doctypes=[\'main\'],\r\n sbm_indir=indir, sbm_doctype=doctype, sbm_access=access)[1]\r\n','0000-00-00','0000-00-00',NULL,NULL,0);" | ${prefix}/bin/dbexec CLEANFILES = *~ *.pyc *.tmp diff --git a/config/invenio-autotools.conf.in b/config/invenio-autotools.conf.in index 5d4e0cc40..abdb4480a 100644 --- a/config/invenio-autotools.conf.in +++ b/config/invenio-autotools.conf.in @@ -1,86 +1,88 @@ ## This file is part of Invenio. -## Copyright (C) 2008, 2009, 2010, 2011 CERN. +## Copyright (C) 2008, 2009, 2010, 2011, 2012 CERN. ## ## Invenio is free software; you can redistribute it and/or ## modify it under the terms of the GNU General Public License as ## published by the Free Software Foundation; either version 2 of the ## License, or (at your option) any later version. ## ## Invenio is distributed in the hope that it will be useful, but ## WITHOUT ANY WARRANTY; without even the implied warranty of ## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ## General Public License for more details. ## ## You should have received a copy of the GNU General Public License ## along with Invenio; if not, write to the Free Software Foundation, Inc., ## 59 Temple Place, Suite 330, Boston, MA 02111-1307, USA. ## DO NOT EDIT THIS FILE. ## YOU SHOULD NOT EDIT THESE VALUES. THEY WERE AUTOMATICALLY ## CALCULATED BY AUTOTOOLS DURING THE "CONFIGURE" STAGE. [Invenio] ## Invenio version: CFG_VERSION = @VERSION@ ## directories detected from 'configure --prefix ...' parameters: CFG_PREFIX = @prefix@ CFG_BINDIR = @prefix@/bin CFG_PYLIBDIR = @prefix@/lib/python CFG_LOGDIR = @localstatedir@/log CFG_ETCDIR = @prefix@/etc CFG_LOCALEDIR = @prefix@/share/locale CFG_TMPDIR = @localstatedir@/tmp CFG_TMPSHAREDDIR = @localstatedir@/tmp-shared CFG_CACHEDIR = @localstatedir@/cache CFG_WEBDIR = @localstatedir@/www ## path to interesting programs: CFG_PATH_MYSQL = @MYSQL@ CFG_PATH_PHP = @PHP@ CFG_PATH_GZIP = @GZIP@ CFG_PATH_GUNZIP = @GUNZIP@ CFG_PATH_TAR = @TAR@ CFG_PATH_GFILE = @FILE@ CFG_PATH_CONVERT = @CONVERT@ CFG_PATH_PDFTOTEXT = @PDFTOTEXT@ CFG_PATH_PDFTK = @PDFTK@ CFG_PATH_PDFTOPS = @PDFTOPS@ CFG_PATH_PDF2PS = @PDF2PS@ CFG_PATH_PDFINFO = @PDFINFO@ CFG_PATH_PDFTOPPM = @PDFTOPPM@ CFG_PATH_PAMFILE = @PAMFILE@ CFG_PATH_GS = @GS@ CFG_PATH_PS2PDF = @PS2PDF@ CFG_PATH_PDFLATEX = @PDFLATEX@ CFG_PATH_PDFOPT = @PDFOPT@ CFG_PATH_PSTOTEXT = @PSTOTEXT@ CFG_PATH_PSTOASCII = @PSTOASCII@ CFG_PATH_ANY2DJVU = @ANY2DJVU@ CFG_PATH_DJVUPS = @DJVUPS@ CFG_PATH_DJVUTXT = @DJVUTXT@ CFG_PATH_TIFF2PDF = @TIFF2PDF@ CFG_PATH_OCROSCRIPT = @OCROSCRIPT@ CFG_PATH_OPENOFFICE_PYTHON = @OPENOFFICE_PYTHON@ CFG_PATH_WGET = @WGET@ CFG_PATH_MD5SUM = @MD5SUM@ CFG_PATH_FFMPEG = @FFMPEG@ CFG_PATH_FFPROBE = @FFPROBE@ CFG_PATH_MEDIAINFO = @MEDIAINFO@ ## CFG_BIBINDEX_PATH_TO_STOPWORDS_FILE -- path to the stopwords file. You ## probably don't want to change this path, although you may want to ## change the content of that file. Note that the file is used by the ## rank engine internally, so it should be given even if stopword ## removal in the indexes is not used. CFG_BIBINDEX_PATH_TO_STOPWORDS_FILE = @prefix@/etc/bibrank/stopwords.kb ## helper style of variables for WebSubmit: CFG_WEBSUBMIT_COUNTERSDIR = @localstatedir@/data/submit/counters CFG_WEBSUBMIT_STORAGEDIR = @localstatedir@/data/submit/storage -CFG_WEBSUBMIT_FILEDIR = @localstatedir@/data/files CFG_WEBSUBMIT_BIBCONVERTCONFIGDIR = @prefix@/etc/bibconvert/config +## helper style of variables for BibDocFile: +CFG_BIBDOCFILE_FILEDIR = @localstatedir@/data/files + ## - end of file - diff --git a/config/invenio.conf b/config/invenio.conf index 1351aa2f2..de674d3e0 100644 --- a/config/invenio.conf +++ b/config/invenio.conf @@ -1,2021 +1,2018 @@ ## This file is part of Invenio. ## Copyright (C) 2008, 2009, 2010, 2011, 2012 CERN. ## ## Invenio is free software; you can redistribute it and/or ## modify it under the terms of the GNU General Public License as ## published by the Free Software Foundation; either version 2 of the ## License, or (at your option) any later version. ## ## Invenio is distributed in the hope that it will be useful, but ## WITHOUT ANY WARRANTY; without even the implied warranty of ## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ## General Public License for more details. ## ## You should have received a copy of the GNU General Public License ## along with Invenio; if not, write to the Free Software Foundation, Inc., ## 59 Temple Place, Suite 330, Boston, MA 02111-1307, USA. ################################################### ## About 'invenio.conf' and 'invenio-local.conf' ## ################################################### ## The 'invenio.conf' file contains the vanilla default configuration ## parameters of a Invenio installation, as coming out of the ## distribution. The file should be self-explanatory. Once installed ## in its usual location (usually /opt/invenio/etc), you could in ## principle go ahead and change the values according to your local ## needs, but this is not advised. ## ## If you would like to customize some of these parameters, you should ## rather create a file named 'invenio-local.conf' in the same ## directory where 'invenio.conf' lives and you should write there ## only the customizations that you want to be different from the ## vanilla defaults. ## ## Here is a realistic, minimalist, yet production-ready example of ## what you would typically put there: ## ## $ cat /opt/invenio/etc/invenio-local.conf ## [Invenio] ## CFG_SITE_NAME = John Doe's Document Server ## CFG_SITE_NAME_INTL_fr = Serveur des Documents de John Doe ## CFG_SITE_URL = http://your.site.com ## CFG_SITE_SECURE_URL = https://your.site.com ## CFG_SITE_ADMIN_EMAIL = john.doe@your.site.com ## CFG_SITE_SUPPORT_EMAIL = john.doe@your.site.com ## CFG_WEBALERT_ALERT_ENGINE_EMAIL = john.doe@your.site.com ## CFG_WEBCOMMENT_ALERT_ENGINE_EMAIL = john.doe@your.site.com ## CFG_WEBCOMMENT_DEFAULT_MODERATOR = john.doe@your.site.com ## CFG_DATABASE_HOST = localhost ## CFG_DATABASE_NAME = invenio ## CFG_DATABASE_USER = invenio ## CFG_DATABASE_PASS = my123p$ss ## ## You should override at least the parameters mentioned above and the ## parameters mentioned in the `Part 1: Essential parameters' below in ## order to define some very essential runtime parameters such as the ## name of your document server (CFG_SITE_NAME and ## CFG_SITE_NAME_INTL_*), the visible URL of your document server ## (CFG_SITE_URL and CFG_SITE_SECURE_URL), the email address of the ## local Invenio administrator, comment moderator, and alert engine ## (CFG_SITE_SUPPORT_EMAIL, CFG_SITE_ADMIN_EMAIL, etc), and last but ## not least your database credentials (CFG_DATABASE_*). ## ## The Invenio system will then read both the default invenio.conf ## file and your customized invenio-local.conf file and it will ## override any default options with the ones you have specified in ## your local file. This cascading of configuration parameters will ## ease your future upgrades. [Invenio] ################################### ## Part 1: Essential parameters ## ################################### ## This part defines essential Invenio internal parameters that ## everybody should override, like the name of the server or the email ## address of the local Invenio administrator. ## CFG_DATABASE_* - specify which MySQL server to use, the name of the ## database to use, and the database access credentials. CFG_DATABASE_HOST = localhost CFG_DATABASE_PORT = 3306 CFG_DATABASE_NAME = invenio CFG_DATABASE_USER = invenio CFG_DATABASE_PASS = my123p$ss ## CFG_DATABASE_SLAVE - if you use DB replication, then specify the DB ## slave address credentials. (Assuming the same access rights to the ## DB slave as to the DB master.) If you don't use DB replication, ## then leave this option blank. CFG_DATABASE_SLAVE = ## CFG_SITE_URL - specify URL under which your installation will be ## visible. For example, use "http://your.site.com". Do not leave ## trailing slash. CFG_SITE_URL = http://localhost ## CFG_SITE_SECURE_URL - specify secure URL under which your ## installation secure pages such as login or registration will be ## visible. For example, use "https://your.site.com". Do not leave ## trailing slash. If you don't plan on using HTTPS, then you may ## leave this empty. CFG_SITE_SECURE_URL = https://localhost ## CFG_SITE_NAME -- the visible name of your Invenio installation. CFG_SITE_NAME = Atlantis Institute of Fictive Science ## CFG_SITE_NAME_INTL -- the international versions of CFG_SITE_NAME ## in various languages. (See also CFG_SITE_LANGS below.) CFG_SITE_NAME_INTL_en = Atlantis Institute of Fictive Science CFG_SITE_NAME_INTL_fr = Atlantis Institut des Sciences Fictives CFG_SITE_NAME_INTL_de = Atlantis Institut der fiktiven Wissenschaft CFG_SITE_NAME_INTL_es = Atlantis Instituto de la Ciencia Fictive CFG_SITE_NAME_INTL_ca = Institut Atlantis de Ciència Fictícia CFG_SITE_NAME_INTL_pt = Instituto Atlantis de Ciência Fictícia CFG_SITE_NAME_INTL_it = Atlantis Istituto di Scienza Fittizia CFG_SITE_NAME_INTL_ru = Институт Фиктивных Наук Атлантиды CFG_SITE_NAME_INTL_sk = Atlantis Inštitút Fiktívnych Vied CFG_SITE_NAME_INTL_cs = Atlantis Institut Fiktivních Věd CFG_SITE_NAME_INTL_no = Atlantis Institutt for Fiktiv Vitenskap CFG_SITE_NAME_INTL_sv = Atlantis Institut för Fiktiv Vetenskap CFG_SITE_NAME_INTL_el = Ινστιτούτο Φανταστικών Επιστημών Ατλαντίδος CFG_SITE_NAME_INTL_uk = Інститут вигаданих наук в Атлантісі CFG_SITE_NAME_INTL_ja = Fictive 科学のAtlantis の協会 CFG_SITE_NAME_INTL_pl = Instytut Fikcyjnej Nauki Atlantis CFG_SITE_NAME_INTL_bg = Институт за фиктивни науки Атлантис CFG_SITE_NAME_INTL_hr = Institut Fiktivnih Znanosti Atlantis CFG_SITE_NAME_INTL_zh_CN = 阿特兰提斯虚拟科学学院 CFG_SITE_NAME_INTL_zh_TW = 阿特蘭提斯虛擬科學學院 CFG_SITE_NAME_INTL_hu = Kitalált Tudományok Atlantiszi Intézete CFG_SITE_NAME_INTL_af = Atlantis Instituut van Fiktiewe Wetenskap CFG_SITE_NAME_INTL_gl = Instituto Atlantis de Ciencia Fictive CFG_SITE_NAME_INTL_ro = Institutul Atlantis al Ştiinţelor Fictive CFG_SITE_NAME_INTL_rw = Atlantis Ishuri Rikuru Ry'ubuhanga CFG_SITE_NAME_INTL_ka = ატლანტიდის ფიქტიური მეცნიერების ინსტიტუტი CFG_SITE_NAME_INTL_lt = Fiktyvių Mokslų Institutas Atlantis CFG_SITE_NAME_INTL_ar = معهد أطلنطيس للعلوم الافتراضية ## CFG_SITE_LANG -- the default language of the interface: ' CFG_SITE_LANG = en ## CFG_SITE_LANGS -- list of all languages the user interface should ## be available in, separated by commas. The order specified below ## will be respected on the interface pages. A good default would be ## to use the alphabetical order. Currently supported languages ## include Afrikaans, Arabic, Bulgarian, Catalan, Czech, German, Georgian, ## Greek, English, Spanish, French, Croatian, Hungarian, Galician, ## Italian, Japanese, Kinyarwanda, Lithuanian, Norwegian, Polish, ## Portuguese, Romanian, Russian, Slovak, Swedish, Ukrainian, Chinese ## (China), Chinese (Taiwan), so that the eventual maximum you can ## currently select is ## "af,ar,bg,ca,cs,de,el,en,es,fr,hr,gl,ka,it,rw,lt,hu,ja,no,pl,pt,ro,ru,sk,sv,uk,zh_CN,zh_TW". CFG_SITE_LANGS = af,ar,bg,ca,cs,de,el,en,es,fr,hr,gl,ka,it,rw,lt,hu,ja,no,pl,pt,ro,ru,sk,sv,uk,zh_CN,zh_TW ## CFG_SITE_SUPPORT_EMAIL -- the email address of the support team for ## this installation: CFG_SITE_SUPPORT_EMAIL = info@invenio-software.org ## CFG_SITE_ADMIN_EMAIL -- the email address of the 'superuser' for ## this installation. Enter your email address below and login with ## this address when using Invenio inistration modules. You ## will then be automatically recognized as superuser of the system. CFG_SITE_ADMIN_EMAIL = info@invenio-software.org ## CFG_SITE_EMERGENCY_EMAIL_ADDRESSES -- list of email addresses to ## which an email should be sent in case of emergency (e.g. bibsched ## queue has been stopped because of an error). Configuration ## dictionary allows for different recipients based on weekday and ## time-of-day. Example: ## ## CFG_SITE_EMERGENCY_EMAIL_ADDRESSES = { ## 'Sunday 22:00-06:00': '0041761111111@email2sms.foo.com', ## '06:00-18:00': 'team-in-europe@foo.com,0041762222222@email2sms.foo.com', ## '18:00-06:00': 'team-in-usa@foo.com', ## '*': 'john.doe.phone@foo.com'} ## ## If you want the emergency email notifications to always go to the ## same address, just use the wildcard line in the above example. CFG_SITE_EMERGENCY_EMAIL_ADDRESSES = {} ## CFG_SITE_ADMIN_EMAIL_EXCEPTIONS -- set this to 0 if you do not want ## to receive any captured exception via email to CFG_SITE_ADMIN_EMAIL ## address. Captured exceptions will still be available in ## var/log/invenio.err file. Set this to 1 if you want to receive ## some of the captured exceptions (this depends on the actual place ## where the exception is captured). Set this to 2 if you want to ## receive all captured exceptions. CFG_SITE_ADMIN_EMAIL_EXCEPTIONS = 1 ## CFG_SITE_RECORD -- what is the URI part representing detailed ## record pages? We recomment to leave the default value `record' ## unchanged. CFG_SITE_RECORD = record ## CFG_ERRORLIB_RESET_EXCEPTION_NOTIFICATION_COUNTER_AFTER -- set this to ## the number of seconds after which to reset the exception notification ## counter. A given repetitive exception is notified via email with a ## logarithmic strategy: the first time it is seen it is sent via email, ## then the second time, then the fourth, then the eighth and so forth. ## If the number of seconds elapsed since the last time it was notified ## is greater than CFG_ERRORLIB_RESET_EXCEPTION_NOTIFICATION_COUNTER_AFTER ## then the internal counter is reset in order not to have exception ## notification become more and more rare. CFG_ERRORLIB_RESET_EXCEPTION_NOTIFICATION_COUNTER_AFTER = 14400 ## CFG_CERN_SITE -- do we want to enable CERN-specific code? ## Put "1" for "yes" and "0" for "no". CFG_CERN_SITE = 0 ## CFG_INSPIRE_SITE -- do we want to enable INSPIRE-specific code? ## Put "1" for "yes" and "0" for "no". CFG_INSPIRE_SITE = 0 ## CFG_ADS_SITE -- do we want to enable ADS-specific code? ## Put "1" for "yes" and "0" for "no". CFG_ADS_SITE = 0 ## CFG_OPENAIRE_SITE -- do we want to enable OpenAIRE-specific code? ## Put "1" for "yes" and "0" for "no". CFG_OPENAIRE_SITE = 0 ## CFG_DEVEL_SITE -- is this a development site? If it is, you might ## prefer that it does not do certain things. For example, you might ## not want WebSubmit to send certain emails or trigger certain ## processes on a development site. ## Put "1" for "yes" (this is a development site) or "0" for "no" ## (this isn't a development site.) CFG_DEVEL_SITE = 0 ################################ ## Part 2: Web page style ## ################################ ## The variables affecting the page style. The most important one is ## the 'template skin' you would like to use and the obfuscation mode ## for your email addresses. Please refer to the WebStyle Admin Guide ## for more explanation. The other variables are listed here mostly ## for backwards compatibility purposes only. ## CFG_WEBSTYLE_TEMPLATE_SKIN -- what template skin do you want to ## use? CFG_WEBSTYLE_TEMPLATE_SKIN = default ## CFG_WEBSTYLE_EMAIL_ADDRESSES_OBFUSCATION_MODE. How do we "protect" ## email addresses from undesired automated email harvesters? This ## setting will not affect 'support' and 'admin' emails. ## NOTE: there is no ultimate solution to protect against email ## harvesting. All have drawbacks and can more or less be ## circumvented. Choose you preferred mode ([t] means "transparent" ## for the user): ## -1: hide all emails. ## [t] 0 : no protection, email returned as is. ## foo@example.com => foo@example.com ## 1 : basic email munging: replaces @ by [at] and . by [dot] ## foo@example.com => foo [at] example [dot] com ## [t] 2 : transparent name mangling: characters are replaced by ## equivalent HTML entities. ## foo@example.com => foo@example.com ## [t] 3 : javascript insertion. Requires Javascript enabled on client ## side. ## 4 : replaces @ and . characters by gif equivalents. ## foo@example.com => foo [at] example [dot] com CFG_WEBSTYLE_EMAIL_ADDRESSES_OBFUSCATION_MODE = 2 ## CFG_WEBSTYLE_INSPECT_TEMPLATES -- Do we want to debug all template ## functions so that they would return HTML results wrapped in ## comments indicating which part of HTML page was created by which ## template function? Useful only for debugging Pythonic HTML ## template. See WebStyle Admin Guide for more information. CFG_WEBSTYLE_INSPECT_TEMPLATES = 0 ## (deprecated) CFG_WEBSTYLE_CDSPAGEBOXLEFTTOP -- eventual global HTML ## left top box: CFG_WEBSTYLE_CDSPAGEBOXLEFTTOP = ## (deprecated) CFG_WEBSTYLE_CDSPAGEBOXLEFTBOTTOM -- eventual global ## HTML left bottom box: CFG_WEBSTYLE_CDSPAGEBOXLEFTBOTTOM = ## (deprecated) CFG_WEBSTYLE_CDSPAGEBOXRIGHTTOP -- eventual global ## HTML right top box: CFG_WEBSTYLE_CDSPAGEBOXRIGHTTOP = ## (deprecated) CFG_WEBSTYLE_CDSPAGEBOXRIGHTBOTTOM -- eventual global ## HTML right bottom box: CFG_WEBSTYLE_CDSPAGEBOXRIGHTBOTTOM = ## CFG_WEBSTYLE_HTTP_STATUS_ALERT_LIST -- when certain HTTP status ## codes are raised to the WSGI handler, the corresponding exceptions ## and error messages can be sent to the system administrator for ## inspecting. This is useful to detect and correct errors. The ## variable represents a comma-separated list of HTTP statuses that ## should alert admin. Wildcards are possible. If the status is ## followed by an "r", it means that a referer is required to exist ## (useful to distinguish broken known links from URL typos when 404 ## errors are raised). CFG_WEBSTYLE_HTTP_STATUS_ALERT_LIST = 404r,400,5*,41* ## CFG_WEBSTYLE_HTTP_USE_COMPRESSION -- whether to enable deflate ## compression of your HTTP/HTTPS connections. This will affect the Apache ## configuration snippets created by inveniocfg --create-apache-conf and ## the OAI-PMH Identify response. CFG_WEBSTYLE_HTTP_USE_COMPRESSION = 0 ## CFG_WEBSTYLE_REVERSE_PROXY_IPS -- if you are setting a multinode ## environment where an HTTP proxy such as mod_proxy is sitting in ## front of the Invenio web application and is forwarding requests to ## worker nodes, set here the the list of IP addresses of the allowed ## HTTP proxies. This is needed in order to avoid IP address spoofing ## when worker nodes are also available on the public Internet and ## might receive forged HTTP requests. Only HTTP requests coming from ## the specified IP addresses will be considered as forwarded from a ## reverse proxy. E.g. set this to '123.123.123.123'. CFG_WEBSTYLE_REVERSE_PROXY_IPS = ################################## ## Part 3: WebSearch parameters ## ################################## ## This section contains some configuration parameters for WebSearch ## module. Please note that WebSearch is mostly configured on ## run-time via its WebSearch Admin web interface. The parameters ## below are the ones that you do not probably want to modify very ## often during the runtime. (Note that you may modify them ## afterwards too, though.) ## CFG_WEBSEARCH_SEARCH_CACHE_SIZE -- how many queries we want to ## cache in memory per one Apache httpd process? This cache is used ## mainly for "next/previous page" functionality, but it caches also ## "popular" user queries if more than one user happen to search for ## the same thing. Note that large numbers may lead to great memory ## consumption. We recommend a value not greater than 100. CFG_WEBSEARCH_SEARCH_CACHE_SIZE = 0 ## CFG_WEBSEARCH_FIELDS_CONVERT -- if you migrate from an older ## system, you may want to map field codes of your old system (such as ## 'ti') to Invenio/MySQL ("title"). Use Python dictionary syntax ## for the translation table, e.g. {'wau':'author', 'wti':'title'}. ## Usually you don't want to do that, and you would use empty dict {}. CFG_WEBSEARCH_FIELDS_CONVERT = {} ## CFG_WEBSEARCH_LIGHTSEARCH_PATTERN_BOX_WIDTH -- width of the ## search pattern window in the light search interface, in ## characters. CFG_WEBSEARCH_LIGHTSEARCH_PATTERN_BOX_WIDTH = 60 CFG_WEBSEARCH_LIGHTSEARCH_PATTERN_BOX_WIDTH = 60 ## CFG_WEBSEARCH_SIMPLESEARCH_PATTERN_BOX_WIDTH -- width of the search ## pattern window in the simple search interface, in characters. CFG_WEBSEARCH_SIMPLESEARCH_PATTERN_BOX_WIDTH = 40 ## CFG_WEBSEARCH_ADVANCEDSEARCH_PATTERN_BOX_WIDTH -- width of the ## search pattern window in the advanced search interface, in ## characters. CFG_WEBSEARCH_ADVANCEDSEARCH_PATTERN_BOX_WIDTH = 30 ## CFG_WEBSEARCH_NB_RECORDS_TO_SORT -- how many records do we still ## want to sort? For higher numbers we print only a warning and won't ## perform any sorting other than default 'latest records first', as ## sorting would be very time consuming then. We recommend a value of ## not more than a couple of thousands. CFG_WEBSEARCH_NB_RECORDS_TO_SORT = 1000 ## CFG_WEBSEARCH_CALL_BIBFORMAT -- if a record is being displayed but ## it was not preformatted in the "HTML brief" format, do we want to ## call BibFormatting on the fly? Put "1" for "yes" and "0" for "no". ## Note that "1" will display the record exactly as if it were fully ## preformatted, but it may be slow due to on-the-fly processing; "0" ## will display a default format very fast, but it may not have all ## the fields as in the fully preformatted HTML brief format. Note ## also that this option is active only for old (PHP) formats; the new ## (Python) formats are called on the fly by default anyway, since ## they are much faster. When usure, please set "0" here. CFG_WEBSEARCH_CALL_BIBFORMAT = 0 ## CFG_WEBSEARCH_USE_ALEPH_SYSNOS -- do we want to make old SYSNOs ## visible rather than MySQL's record IDs? You may use this if you ## migrate from a different e-doc system, and you store your old ## system numbers into 970__a. Put "1" for "yes" and "0" for ## "no". Usually you don't want to do that, though. CFG_WEBSEARCH_USE_ALEPH_SYSNOS = 0 ## CFG_WEBSEARCH_I18N_LATEST_ADDITIONS -- Put "1" if you want the ## "Latest Additions" in the web collection pages to show ## internationalized records. Useful only if your brief BibFormat ## templates contains internationalized strings. Otherwise put "0" in ## order not to slow down the creation of latest additions by WebColl. CFG_WEBSEARCH_I18N_LATEST_ADDITIONS = 0 ## CFG_WEBSEARCH_INSTANT_BROWSE -- the number of records to display ## under 'Latest Additions' in the web collection pages. CFG_WEBSEARCH_INSTANT_BROWSE = 10 ## CFG_WEBSEARCH_INSTANT_BROWSE_RSS -- the number of records to ## display in the RSS feed. CFG_WEBSEARCH_INSTANT_BROWSE_RSS = 25 ## CFG_WEBSEARCH_RSS_I18N_COLLECTIONS -- comma-separated list of ## collections that feature an internationalized RSS feed on their ## main seach interface page created by webcoll. Other collections ## will have RSS feed using CFG_SITE_LANG. CFG_WEBSEARCH_RSS_I18N_COLLECTIONS = ## CFG_WEBSEARCH_RSS_TTL -- number of minutes that indicates how long ## a feed cache is valid. CFG_WEBSEARCH_RSS_TTL = 360 ## CFG_WEBSEARCH_RSS_MAX_CACHED_REQUESTS -- maximum number of request kept ## in cache. If the cache is filled, following request are not cached. CFG_WEBSEARCH_RSS_MAX_CACHED_REQUESTS = 1000 ## CFG_WEBSEARCH_AUTHOR_ET_AL_THRESHOLD -- up to how many author names ## to print explicitely; for more print "et al". Note that this is ## used in default formatting that is seldomly used, as usually ## BibFormat defines all the format. The value below is only used ## when BibFormat fails, for example. CFG_WEBSEARCH_AUTHOR_ET_AL_THRESHOLD = 3 ## CFG_WEBSEARCH_NARROW_SEARCH_SHOW_GRANDSONS -- whether to show or ## not collection grandsons in Narrow Search boxes (sons are shown by ## default, grandsons are configurable here). Use 0 for no and 1 for ## yes. CFG_WEBSEARCH_NARROW_SEARCH_SHOW_GRANDSONS = 1 ## CFG_WEBSEARCH_CREATE_SIMILARLY_NAMED_AUTHORS_LINK_BOX -- shall we ## create help links for Ellis, Nick or Ellis, Nicholas and friends ## when Ellis, N was searched for? Useful if you have one author ## stored in the database under several name formats, namely surname ## comma firstname and surname comma initial cataloging policy. Use 0 ## for no and 1 for yes. CFG_WEBSEARCH_CREATE_SIMILARLY_NAMED_AUTHORS_LINK_BOX = 1 ## CFG_WEBSEARCH_USE_MATHJAX_FOR_FORMATS -- MathJax is a JavaScript ## library that renders (La)TeX mathematical formulas in the client ## browser. This parameter must contain a comma-separated list of ## output formats for which to apply the MathJax rendering, for example ## "hb,hd". If the list is empty, MathJax is disabled. CFG_WEBSEARCH_USE_MATHJAX_FOR_FORMATS = ## CFG_WEBSEARCH_EXTERNAL_COLLECTION_SEARCH_TIMEOUT -- when searching ## external collections (e.g. SPIRES, CiteSeer, etc), how many seconds ## do we wait for reply before abandonning? CFG_WEBSEARCH_EXTERNAL_COLLECTION_SEARCH_TIMEOUT = 5 ## CFG_WEBSEARCH_EXTERNAL_COLLECTION_SEARCH_MAXRESULTS -- how many ## results do we fetch? CFG_WEBSEARCH_EXTERNAL_COLLECTION_SEARCH_MAXRESULTS = 10 ## CFG_WEBSEARCH_SPLIT_BY_COLLECTION -- do we want to split the search ## results by collection or not? Use 0 for not, 1 for yes. CFG_WEBSEARCH_SPLIT_BY_COLLECTION = 1 ## CFG_WEBSEARCH_DEF_RECORDS_IN_GROUPS -- the default number of ## records to display per page in the search results pages. CFG_WEBSEARCH_DEF_RECORDS_IN_GROUPS = 10 ## CFG_WEBSEARCH_MAX_RECORDS_IN_GROUPS -- in order to limit denial of ## service attacks the total number of records per group displayed as a ## result of a search query will be limited to this number. Only the superuser ## queries will not be affected by this limit. CFG_WEBSEARCH_MAX_RECORDS_IN_GROUPS = 200 ## CFG_WEBSEARCH_PERMITTED_RESTRICTED_COLLECTIONS_LEVEL -- logged in users ## might have rights to access some restricted collections. This variable ## tweaks the kind of support the system will automatically provide to the ## user with respect to searching into these restricted collections. ## Set this to 0 in order to have the user to explicitly activate restricted ## collections in order to search into them. Set this to 1 in order to ## propose to the user the list of restricted collections to which he/she has ## rights (note: this is not yet implemented). Set this to 2 in order to ## silently add all the restricted collections to which the user has rights to ## to any query. ## Note: the system will discover which restricted collections a user has ## rights to, at login time. The time complexity of this procedure is ## proportional to the number of restricted collections. E.g. for a system ## with ~50 restricted collections, you might expect ~1s of delay in the ## login time, when this variable is set to a value higher than 0. CFG_WEBSEARCH_PERMITTED_RESTRICTED_COLLECTIONS_LEVEL = 0 ## CFG_WEBSEARCH_SHOW_COMMENT_COUNT -- do we want to show the 'N comments' ## links on the search engine pages? (useful only when you have allowed ## commenting) CFG_WEBSEARCH_SHOW_COMMENT_COUNT = 1 ## CFG_WEBSEARCH_SHOW_REVIEW_COUNT -- do we want to show the 'N reviews' ## links on the search engine pages? (useful only when you have allowed ## reviewing) CFG_WEBSEARCH_SHOW_REVIEW_COUNT = 1 ## CFG_WEBSEARCH_FULLTEXT_SNIPPETS -- how many full-text snippets do ## we want to display for full-text searches? If you want to specify ## different values for different document status types, please add ## more items into this dictionary. (Unless specified, the empty ## value will be used as default.) This is useful if you have ## restricted files of different types with various restrictions on ## what we can show. CFG_WEBSEARCH_FULLTEXT_SNIPPETS = { '': 4, } ## CFG_WEBSEARCH_FULLTEXT_SNIPPETS_CHARS -- what is the maximum size ## of a snippet to display around the pattern found in the full-text? ## If you want to specify different values for different document ## status types, please add more items into this dictionary. (Unless ## specified, the empty value will be used as default.) This is ## useful if you have restricted files of different types with various ## restrictions on what we can show. CFG_WEBSEARCH_FULLTEXT_SNIPPETS_CHARS = { '': 100, } ## CFG_WEBSEARCH_WILDCARD_LIMIT -- some of the queries, wildcard ## queries in particular (ex: cern*, a*), but also regular expressions ## (ex: [a-z]+), may take a long time to respond due to the high ## number of hits. You can limit the number of terms matched by a ## wildcard by setting this variable. A negative value or zero means ## that none of the queries will be limited (which may be wanted by ## also prone to denial-of-service kind of attacks). CFG_WEBSEARCH_WILDCARD_LIMIT = 50000 ## CFG_WEBSEARCH_SYNONYM_KBRS -- defines which knowledge bases are to ## be used for which index in order to provide runtime synonym lookup ## of user-supplied terms, and what massaging function should be used ## upon search pattern before performing the KB lookup. (Can be one ## of `exact', 'leading_to_comma', `leading_to_number'.) CFG_WEBSEARCH_SYNONYM_KBRS = { 'journal': ['SEARCH-SYNONYM-JOURNAL', 'leading_to_number'], } ## CFG_SOLR_URL -- optionally, you may use Solr to serve full-text ## queries. If so, please specify the URL of your Solr instance. ## Example: http://localhost:8983/solr (default solr port) CFG_SOLR_URL = ## CFG_WEBSEARCH_PREV_NEXT_HIT_LIMIT -- specify the limit when ## the previous/next/back hit links are to be displayed on detailed record pages. ## In order to speeding up list manipulations, if a search returns lots of hits, ## more than this limit, then do not loose time calculating next/previous/back ## hits at all, but display page directly without these. ## Note also that Invenio installations that do not like ## to have the next/previous hit link functionality would be able to set this ## variable to zero and not see anything. CFG_WEBSEARCH_PREV_NEXT_HIT_LIMIT = 1000 ## CFG_WEBSEARCH_VIEWRESTRCOLL_POLICY -- when a record belongs to more than one ## restricted collection, if the viewrestcoll policy is set to "ALL" (default) ## then the user must be authorized to all the restricted collections, in ## order to be granted access to the specific record. If the policy is set to ## "ANY", then the user need to be authorized to only one of the collections ## in order to be granted access to the specific record. CFG_WEBSEARCH_VIEWRESTRCOLL_POLICY = ALL ## CFG_WEBSEARCH_SPIRES_SYNTAX -- variable to configure the use of the ## SPIRES query syntax in searches. Values: 0 = SPIRES syntax is ## switched off; 1 = leading 'find' is required; 9 = leading 'find' is ## not required (leading SPIRES operator, space-operator-space, etc ## are also accepted). CFG_WEBSEARCH_SPIRES_SYNTAX = 1 ## CFG_WEBSEARCH_DISPLAY_NEAREST_TERMS -- when user search does not ## return any direct result, what do we want to display? Set to 0 in ## order to display a generic message about search returning no hits. ## Set to 1 in order to display list of nearest terms from the indexes ## that may match user query. Note: this functionality may be slow, ## so you may want to disable it on bigger sites. CFG_WEBSEARCH_DISPLAY_NEAREST_TERMS = 1 ## CFG_WEBSEARCH_DETAILED_META_FORMAT -- the output format to use for ## detailed meta tags containing metadata as configured in the tag ## table. Default output format should be 'hdm', included. This ## format will be included in the header of /record/ pages. For ## efficiency this format should be pre-cached with BibReformat. See ## also CFG_WEBSEARCH_ENABLE_GOOGLESCHOLAR and ## CFG_WEBSEARCH_ENABLE_GOOGLESCHOLAR. CFG_WEBSEARCH_DETAILED_META_FORMAT = hdm ## CFG_WEBSEARCH_ENABLE_GOOGLESCHOLAR -- decides if meta tags for ## Google Scholar shall be included in the detailed record page ## header, when using the standard formatting templates/elements. See ## also CFG_WEBSEARCH_DETAILED_META_FORMAT and ## CFG_WEBSEARCH_ENABLE_OPENGRAPH. When this variable is changed and ## output format defined in CFG_WEBSEARCH_DETAILED_META_FORMAT is ## cached, a bibreformat must be run for the cached records. CFG_WEBSEARCH_ENABLE_GOOGLESCHOLAR = True ## CFG_WEBSEARCH_ENABLE_OPENGRAPH -- decides if meta tags for the Open ## Graph protocol shall be included in the detailed record page ## header, when using the standard formatting templates/elements. See ## also CFG_WEBSEARCH_DETAILED_META_FORMAT and ## CFG_WEBSEARCH_ENABLE_GOOGLESCHOLAR. When this variable is changed ## and output format defined in CFG_WEBSEARCH_DETAILED_META_FORMAT is ## cached, a bibreformat must be run for the cached records. Note that ## enabling Open Graph produces invalid XHTML/HTML5 markup. CFG_WEBSEARCH_ENABLE_OPENGRAPH = False ####################################### ## Part 4: BibHarvest OAI parameters ## ####################################### ## This part defines parameters for the Invenio OAI gateway. ## Useful if you are running Invenio as OAI data provider. ## CFG_OAI_ID_FIELD -- OAI identifier MARC field: CFG_OAI_ID_FIELD = 909COo ## CFG_OAI_SET_FIELD -- OAI set MARC field: CFG_OAI_SET_FIELD = 909COp ## CFG_OAI_SET_FIELD -- previous OAI set MARC field: CFG_OAI_PREVIOUS_SET_FIELD = 909COq ## CFG_OAI_DELETED_POLICY -- OAI deletedrecordspolicy ## (no/transient/persistent): CFG_OAI_DELETED_POLICY = persistent ## CFG_OAI_ID_PREFIX -- OAI identifier prefix: CFG_OAI_ID_PREFIX = atlantis.cern.ch ## CFG_OAI_SAMPLE_IDENTIFIER -- OAI sample identifier: CFG_OAI_SAMPLE_IDENTIFIER = oai:atlantis.cern.ch:123 ## CFG_OAI_IDENTIFY_DESCRIPTION -- description for the OAI Identify verb: CFG_OAI_IDENTIFY_DESCRIPTION = http://atlantis.cern.ch/ Free and unlimited use by anybody with obligation to refer to original record Full content, i.e. preprints may not be harvested by robots Submission restricted. Submitted documents are subject of approval by OAI repository admins. ## CFG_OAI_LOAD -- OAI number of records in a response: CFG_OAI_LOAD = 500 ## CFG_OAI_EXPIRE -- OAI resumptionToken expiration time: CFG_OAI_EXPIRE = 90000 ## CFG_OAI_SLEEP -- service unavailable between two consecutive ## requests for CFG_OAI_SLEEP seconds: CFG_OAI_SLEEP = 2 ## CFG_OAI_METADATA_FORMATS -- mapping between accepted metadataPrefixes and ## the corresponding output format to use, its schema and its metadataNamespace. CFG_OAI_METADATA_FORMATS = { 'marcxml': ('XOAIMARC', 'http://www.openarchives.org/OAI/1.1/dc.xsd', 'http://purl.org/dc/elements/1.1/'), 'oai_dc': ('XOAIDC', 'http://www.loc.gov/standards/marcxml/schema/MARC21slim.xsd', 'http://www.loc.gov/MARC21/slim'), } ## CFG_OAI_FRIENDS -- list of OAI baseURL of friend repositories. See: ## CFG_OAI_FRIENDS = http://cdsweb.cern.ch/oai2d,http://openaire.cern.ch/oai2d,http://export.arxiv.org/oai2 ## The following subfields are a completition to ## CFG_BIBUPLOAD_EXTERNAL_OAIID_TAG. If CFG_OAI_PROVENANCE_BASEURL_SUBFIELD is ## set for a record, then the corresponding field is considered has being ## harvested via OAI-PMH ## CFG_OAI_PROVENANCE_BASEURL_SUBFIELD -- baseURL of the originDescription or a ## record CFG_OAI_PROVENANCE_BASEURL_SUBFIELD = u ## CFG_OAI_PROVENANCE_DATESTAMP_SUBFIELD -- datestamp of the originDescription ## or a record CFG_OAI_PROVENANCE_DATESTAMP_SUBFIELD = d ## CFG_OAI_PROVENANCE_METADATANAMESPACE_SUBFIELD -- metadataNamespace of the ## originDescription or a record CFG_OAI_PROVENANCE_METADATANAMESPACE_SUBFIELD = m ## CFG_OAI_PROVENANCE_ORIGINDESCRIPTION_SUBFIELD -- originDescription of the ## originDescription or a record CFG_OAI_PROVENANCE_ORIGINDESCRIPTION_SUBFIELD = d ## CFG_OAI_PROVENANCE_HARVESTDATE_SUBFIELD -- harvestDate of the ## originDescription or a record CFG_OAI_PROVENANCE_HARVESTDATE_SUBFIELD = h ## CFG_OAI_PROVENANCE_ALTERED_SUBFIELD -- altered flag of the ## originDescription or a record CFG_OAI_PROVENANCE_ALTERED_SUBFIELD = t ## CFG_OAI_FAILED_HARVESTING_STOP_QUEUE -- when harvesting OAI sources ## fails, shall we report an error with the task and stop BibSched ## queue, or simply wait for the next run of the task? A value of 0 ## will stop the task upon errors, 1 will let the queue run if the ## next run of the oaiharvest task can safely recover the failure ## (this means that the queue will stop if the task is not set to run ## periodically) CFG_OAI_FAILED_HARVESTING_STOP_QUEUE = 1 ## CFG_OAI_FAILED_HARVESTING_EMAILS_ADMIN -- when ## CFG_OAI_FAILED_HARVESTING_STOP_QUEUE is set to leave the queue ## running upon errors, shall we send an email to admin to notify ## about the failure? CFG_OAI_FAILED_HARVESTING_EMAILS_ADMIN = True ## NOTE: the following parameters are experimenta ## ----------------------------------------------------------------------------- ## CFG_OAI_RIGHTS_FIELD -- MARC field dedicated to storing Copyright information CFG_OAI_RIGHTS_FIELD = 542__ ## CFG_OAI_RIGHTS_HOLDER_SUBFIELD -- MARC subfield dedicated to storing the ## Copyright holder information CFG_OAI_RIGHTS_HOLDER_SUBFIELD = d ## CFG_OAI_RIGHTS_DATE_SUBFIELD -- MARC subfield dedicated to storing the ## Copyright date information CFG_OAI_RIGHTS_DATE_SUBFIELD = g ## CFG_OAI_RIGHTS_URI_SUBFIELD -- MARC subfield dedicated to storing the URI ## (URL or URN, more detailed statement about copyright status) information CFG_OAI_RIGHTS_URI_SUBFIELD = u ## CFG_OAI_RIGHTS_CONTACT_SUBFIELD -- MARC subfield dedicated to storing the ## Copyright holder contact information CFG_OAI_RIGHTS_CONTACT_SUBFIELD = e ## CFG_OAI_RIGHTS_STATEMENT_SUBFIELD -- MARC subfield dedicated to storing the ## Copyright statement as presented on the resource CFG_OAI_RIGHTS_STATEMENT_SUBFIELD = f ## CFG_OAI_LICENSE_FIELD -- MARC field dedicated to storing terms governing ## use and reproduction (license) CFG_OAI_LICENSE_FIELD = 540__ ## CFG_OAI_LICENSE_TERMS_SUBFIELD -- MARC subfield dedicated to storing the ## Terms governing use and reproduction, e.g. CC License CFG_OAI_LICENSE_TERMS_SUBFIELD = a ## CFG_OAI_LICENSE_PUBLISHER_SUBFIELD -- MARC subfield dedicated to storing the ## person or institution imposing the license (author, publisher) CFG_OAI_LICENSE_PUBLISHER_SUBFIELD = b ## CFG_OAI_LICENSE_URI_SUBFIELD -- MARC subfield dedicated to storing the URI ## URI CFG_OAI_LICENSE_URI_SUBFIELD = u ##------------------------------------------------------------------------------ -################################## -## Part 5: WebSubmit parameters ## -################################## +################################### +## Part 5: BibDocFile parameters ## +################################### -## This section contains some configuration parameters for WebSubmit -## module. Please note that WebSubmit is mostly configured on -## run-time via its WebSubmit Admin web interface. The parameters -## below are the ones that you do not probably want to modify during -## the runtime. +## This section contains some configuration parameters for BibDocFile +## module. -## CFG_WEBSUBMIT_DOCUMENT_FILE_MANAGER_DOCTYPES -- this is the list of +## CFG_BIBDOCFILE_DOCUMENT_FILE_MANAGER_DOCTYPES -- this is the list of ## doctypes (like 'Main' or 'Additional') and their description that admins ## can choose from when adding new files via the Document File Manager ## admin interface. ## - When no value is provided, admins cannot add new ## file (they can only revise/delete/add format) ## - When a single value is given, it is used as ## default doctype for all new documents ## ## Order is relevant ## Eg: ## [('main', 'Main document'), ('additional', 'Figure, schema. etc')] -CFG_WEBSUBMIT_DOCUMENT_FILE_MANAGER_DOCTYPES = [ +CFG_BIBDOCFILE_DOCUMENT_FILE_MANAGER_DOCTYPES = [ ('Main', 'Main document'), ('LaTeX', 'LaTeX'), ('Source', 'Source'), ('Additional', 'Additional File'), ('Audio', 'Audio file'), ('Video', 'Video file'), ('Script', 'Script'), ('Data', 'Data'), ('Figure', 'Figure'), ('Schema', 'Schema'), ('Graph', 'Graph'), ('Image', 'Image'), ('Drawing', 'Drawing'), ('Slides', 'Slides')] -## CFG_WEBSUBMIT_DOCUMENT_FILE_MANAGER_RESTRICTIONS -- this is the +## CFG_BIBDOCFILE_DOCUMENT_FILE_MANAGER_RESTRICTIONS -- this is the ## list of restrictions (like 'Restricted' or 'No Restriction') and their ## description that admins can choose from when adding or revising files. ## Restrictions can then be configured at the level of WebAccess. ## - When no value is provided, no restriction is ## applied ## - When a single value is given, it is used as ## default resctriction for all documents. ## - The first value of the list is used as default ## restriction if the user if not given the ## choice of the restriction. Order is relevant ## ## Eg: ## [('', 'No restriction'), ('restr', 'Restricted')] -CFG_WEBSUBMIT_DOCUMENT_FILE_MANAGER_RESTRICTIONS = [ +CFG_BIBDOCFILE_DOCUMENT_FILE_MANAGER_RESTRICTIONS = [ ('', 'Public'), ('restricted', 'Restricted')] -## CFG_WEBSUBMIT_DOCUMENT_FILE_MANAGER_MISC -- set here the other +## CFG_BIBDOCFILE_DOCUMENT_FILE_MANAGER_MISC -- set here the other ## default flags and attributes to tune the Document File Manager admin ## interface. -## See the docstring of websubmit_managedocfiles.create_file_upload_interface +## See the docstring of bibdocfile_managedocfiles.create_file_upload_interface ## to have a description of the available parameters and their syntax. ## In general you will rarely need to change this variable. -CFG_WEBSUBMIT_DOCUMENT_FILE_MANAGER_MISC = { +CFG_BIBDOCFILE_DOCUMENT_FILE_MANAGER_MISC = { 'can_revise_doctypes': ['*'], 'can_comment_doctypes': ['*'], 'can_describe_doctypes': ['*'], 'can_delete_doctypes': ['*'], 'can_keep_doctypes': ['*'], 'can_rename_doctypes': ['*'], 'can_add_format_to_doctypes': ['*'], 'can_restrict_doctypes': ['*'], } -## CFG_WEBSUBMIT_FILESYSTEM_BIBDOC_GROUP_LIMIT -- the fulltext +## CFG_BIBDOCFILE_FILESYSTEM_BIBDOC_GROUP_LIMIT -- the fulltext ## documents are stored under "/opt/invenio/var/data/files/gX/Y" ## directories where X is 0,1,... and Y stands for bibdoc ID. Thusly ## documents Y are grouped into directories X and this variable ## indicates the maximum number of documents Y stored in each ## directory X. This limit is imposed solely for filesystem ## performance reasons in order not to have too many subdirectories in ## a given directory. -CFG_WEBSUBMIT_FILESYSTEM_BIBDOC_GROUP_LIMIT = 5000 +CFG_BIBDOCFILE_FILESYSTEM_BIBDOC_GROUP_LIMIT = 5000 -## CFG_WEBSUBMIT_ADDITIONAL_KNOWN_FILE_EXTENSIONS -- a comma-separated +## CFG_BIBDOCFILE_ADDITIONAL_KNOWN_FILE_EXTENSIONS -- a comma-separated ## list of document extensions not listed in Python standard mimetype ## library that should be recognized by Invenio. -CFG_WEBSUBMIT_ADDITIONAL_KNOWN_FILE_EXTENSIONS = hpg,link,lis,llb,mat,mpp,msg,docx,docm,xlsx,xlsm,xlsb,pptx,pptm,ppsx,ppsm +CFG_BIBDOCFILE_ADDITIONAL_KNOWN_FILE_EXTENSIONS = hpg,link,lis,llb,mat,mpp,msg,docx,docm,xlsx,xlsm,xlsb,pptx,pptm,ppsx,ppsm -## CFG_WEBSUBMIT_DESIRED_CONVERSIONS -- a dictionary having as keys +## CFG_BIBDOCFILE_DESIRED_CONVERSIONS -- a dictionary having as keys ## a format and as values the corresponding list of desired converted ## formats. -CFG_WEBSUBMIT_DESIRED_CONVERSIONS = { +CFG_BIBDOCFILE_DESIRED_CONVERSIONS = { 'pdf' : ('pdf;pdfa', ), 'ps.gz' : ('pdf;pdfa', ), 'djvu' : ('pdf', ), 'sxw': ('doc', 'odt', 'pdf;pdfa', ), 'docx' : ('doc', 'odt', 'pdf;pdfa', ), 'doc' : ('odt', 'pdf;pdfa', 'docx'), 'rtf' : ('pdf;pdfa', 'odt', ), 'odt' : ('pdf;pdfa', 'doc', ), 'pptx' : ('ppt', 'odp', 'pdf;pdfa', ), 'ppt' : ('odp', 'pdf;pdfa', 'pptx'), 'sxi': ('odp', 'pdf;pdfa', ), 'odp' : ('pdf;pdfa', 'ppt', ), 'xlsx' : ('xls', 'ods', 'csv'), 'xls' : ('ods', 'csv'), 'ods' : ('xls', 'xlsx', 'csv'), 'sxc': ('xls', 'xlsx', 'csv'), 'tiff' : ('pdf;pdfa', ), 'tif' : ('pdf;pdfa', ),} ## CFG_BIBDOCFILE_USE_XSENDFILE -- if your web server supports ## XSendfile header, you may want to enable this feature in order for ## to Invenio tell the web server to stream files for download (after ## proper authorization checks) by web server's means. This helps to ## liberate Invenio worker processes from being busy with sending big ## files to clients. The web server will take care of that. Note: ## this feature is still somewhat experimental. Note: when enabled ## (set to 1), then you have to also regenerate Apache vhost conf ## snippets (inveniocfg --update-config-py --create-apache-conf). CFG_BIBDOCFILE_USE_XSENDFILE = 0 ## CFG_BIBDOCFILE_MD5_CHECK_PROBABILITY -- a number between 0 and ## 1 that indicates probability with which MD5 checksum will be ## verified when streaming bibdocfile-managed files. (0.1 will cause ## the check to be performed once for every 10 downloads) CFG_BIBDOCFILE_MD5_CHECK_PROBABILITY = 0.1 ## CFG_BIBDOCFILE_BEST_FORMATS_TO_EXTRACT_TEXT_FROM -- a comma-separated ## list of document extensions in decrescent order of preference ## to suggest what is considered the best format to extract text from. CFG_BIBDOCFILE_BEST_FORMATS_TO_EXTRACT_TEXT_FROM = ('txt', 'html', 'xml', 'odt', 'doc', 'docx', 'djvu', 'pdf', 'ps', 'ps.gz') ## CFG_BIBDOCFILE_ENABLE_BIBDOCFSINFO_CACHE -- whether to use the ## database table bibdocfsinfo as reference for filesystem ## information. The default is 0. Switch this to 1 ## after you have run bibdocfile --fix-bibdocfsinfo-cache ## or on an empty system. CFG_BIBDOCFILE_ENABLE_BIBDOCFSINFO_CACHE = 0 ## CFG_OPENOFFICE_SERVER_HOST -- the host where an OpenOffice Server is ## listening to. If localhost an OpenOffice server will be started ## automatically if it is not already running. ## Note: if you set this to an empty value this will disable the usage of ## OpenOffice for converting documents. ## If you set this to something different than localhost you'll have to take ## care to have an OpenOffice server running on the corresponding host and ## to install the same OpenOffice release both on the client and on the server ## side. ## In order to launch an OpenOffice server on a remote machine, just start ## the usual 'soffice' executable in this way: ## $> soffice -headless -nologo -nodefault -norestore -nofirststartwizard \ ## .. -accept=socket,host=HOST,port=PORT;urp;StarOffice.ComponentContext CFG_OPENOFFICE_SERVER_HOST = localhost ## CFG_OPENOFFICE_SERVER_PORT -- the port where an OpenOffice Server is ## listening to. CFG_OPENOFFICE_SERVER_PORT = 2002 ## CFG_OPENOFFICE_USER -- the user that will be used to launch the OpenOffice ## client. It is recommended to set this to a user who don't own files, like ## e.g. 'nobody'. You should also authorize your Apache server user to be ## able to become this user, e.g. by adding to your /etc/sudoers the following ## line: ## "apache ALL=(nobody) NOPASSWD: ALL" ## provided that apache is the username corresponding to the Apache user. ## On some machine this might be apache2 or www-data. CFG_OPENOFFICE_USER = nobody ################################# ## Part 6: BibIndex parameters ## ################################# ## This section contains some configuration parameters for BibIndex ## module. Please note that BibIndex is mostly configured on run-time ## via its BibIndex Admin web interface. The parameters below are the ## ones that you do not probably want to modify very often during the ## runtime. ## CFG_BIBINDEX_FULLTEXT_INDEX_LOCAL_FILES_ONLY -- when fulltext indexing, do ## you want to index locally stored files only, or also external URLs? ## Use "0" to say "no" and "1" to say "yes". CFG_BIBINDEX_FULLTEXT_INDEX_LOCAL_FILES_ONLY = 1 ## CFG_BIBINDEX_REMOVE_STOPWORDS -- when indexing, do we want to remove ## stopwords? Use "0" to say "no" and "1" to say "yes". CFG_BIBINDEX_REMOVE_STOPWORDS = 0 ## CFG_BIBINDEX_CHARS_ALPHANUMERIC_SEPARATORS -- characters considered as ## alphanumeric separators of word-blocks inside words. You probably ## don't want to change this. CFG_BIBINDEX_CHARS_ALPHANUMERIC_SEPARATORS = \!\"\#\$\%\&\'\(\)\*\+\,\-\.\/\:\;\<\=\>\?\@\[\\\]\^\_\`\{\|\}\~ ## CFG_BIBINDEX_CHARS_PUNCTUATION -- characters considered as punctuation ## between word-blocks inside words. You probably don't want to ## change this. CFG_BIBINDEX_CHARS_PUNCTUATION = \.\,\:\;\?\!\" ## CFG_BIBINDEX_REMOVE_HTML_MARKUP -- should we attempt to remove HTML markup ## before indexing? Use 1 if you have HTML markup inside metadata ## (e.g. in abstracts), use 0 otherwise. CFG_BIBINDEX_REMOVE_HTML_MARKUP = 0 ## CFG_BIBINDEX_REMOVE_LATEX_MARKUP -- should we attempt to remove LATEX markup ## before indexing? Use 1 if you have LATEX markup inside metadata ## (e.g. in abstracts), use 0 otherwise. CFG_BIBINDEX_REMOVE_LATEX_MARKUP = 0 ## CFG_BIBINDEX_MIN_WORD_LENGTH -- minimum word length allowed to be added to ## index. The terms smaller then this amount will be discarded. ## Useful to keep the database clean, however you can safely leave ## this value on 0 for up to 1,000,000 documents. CFG_BIBINDEX_MIN_WORD_LENGTH = 0 ## CFG_BIBINDEX_URLOPENER_USERNAME and CFG_BIBINDEX_URLOPENER_PASSWORD -- ## access credentials to access restricted URLs, interesting only if ## you are fulltext-indexing files located on a remote server that is ## only available via username/password. But it's probably better to ## handle this case via IP or some convention; the current scheme is ## mostly there for demo only. CFG_BIBINDEX_URLOPENER_USERNAME = mysuperuser CFG_BIBINDEX_URLOPENER_PASSWORD = mysuperpass ## CFG_INTBITSET_ENABLE_SANITY_CHECKS -- ## Enable sanity checks for integers passed to the intbitset data ## structures. It is good to enable this during debugging ## and to disable this value for speed improvements. CFG_INTBITSET_ENABLE_SANITY_CHECKS = False ## CFG_BIBINDEX_PERFORM_OCR_ON_DOCNAMES -- regular expression that matches ## docnames for which OCR is desired (set this to .* in order to enable ## OCR in general, set this to empty in order to disable it.) CFG_BIBINDEX_PERFORM_OCR_ON_DOCNAMES = scan-.* ## CFG_BIBINDEX_SPLASH_PAGES -- key-value mapping where the key corresponds ## to a regular expression that matches the URLs of the splash pages of ## a given service and the value is a regular expression of the set of URLs ## referenced via tags in the HTML content of the splash pages that are ## referring to documents that need to be indexed. ## NOTE: for backward compatibility reasons you can set this to a simple ## regular expression that will directly be used as the unique key of the ## map, with corresponding value set to ".*" (in order to match any URL) CFG_BIBINDEX_SPLASH_PAGES = { "http://documents\.cern\.ch/setlink\?.*": ".*", "http://ilcagenda\.linearcollider\.org/subContributionDisplay\.py\?.*|http://ilcagenda\.linearcollider\.org/contributionDisplay\.py\?.*": "http://ilcagenda\.linearcollider\.org/getFile\.py/access\?.*|http://ilcagenda\.linearcollider\.org/materialDisplay\.py\?.*", } ## CFG_BIBINDEX_AUTHOR_WORD_INDEX_EXCLUDE_FIRST_NAMES -- do we want ## the author word index to exclude first names to keep only last ## names? If set to True, then for the author `Bernard, Denis', only ## `Bernard' will be indexed in the word index, not `Denis'. Note ## that if you change this variable, you have to re-index the author ## index via `bibindex -w author -R'. CFG_BIBINDEX_AUTHOR_WORD_INDEX_EXCLUDE_FIRST_NAMES = False ## CFG_BIBINDEX_SYNONYM_KBRS -- defines which knowledge bases are to ## be used for which index in order to provide index-time synonym ## lookup, and what massaging function should be used upon search ## pattern before performing the KB lookup. (Can be one of `exact', ## 'leading_to_comma', `leading_to_number'.) CFG_BIBINDEX_SYNONYM_KBRS = { 'global': ['INDEX-SYNONYM-TITLE', 'exact'], 'title': ['INDEX-SYNONYM-TITLE', 'exact'], } ####################################### ## Part 7: Access control parameters ## ####################################### ## This section contains some configuration parameters for the access ## control system. Please note that WebAccess is mostly configured on ## run-time via its WebAccess Admin web interface. The parameters ## below are the ones that you do not probably want to modify very ## often during the runtime. (If you do want to modify them during ## runtime, for example te deny access temporarily because of backups, ## you can edit access_control_config.py directly, no need to get back ## here and no need to redo the make process.) ## CFG_ACCESS_CONTROL_LEVEL_SITE -- defines how open this site is. ## Use 0 for normal operation of the site, 1 for read-only site (all ## write operations temporarily closed), 2 for site fully closed, ## 3 for also disabling any database connection. ## Useful for site maintenance. CFG_ACCESS_CONTROL_LEVEL_SITE = 0 ## CFG_ACCESS_CONTROL_LEVEL_GUESTS -- guest users access policy. Use ## 0 to allow guest users, 1 not to allow them (all users must login). CFG_ACCESS_CONTROL_LEVEL_GUESTS = 0 ## CFG_ACCESS_CONTROL_LEVEL_ACCOUNTS -- account registration and ## activation policy. When 0, users can register and accounts are ## automatically activated. When 1, users can register but admin must ## activate the accounts. When 2, users cannot register nor update ## their email address, only admin can register accounts. When 3, ## users cannot register nor update email address nor password, only ## admin can register accounts. When 4, the same as 3 applies, nor ## user cannot change his login method. When 5, then the same as 4 ## applies, plus info about how to get an account is hidden from the ## login page. CFG_ACCESS_CONTROL_LEVEL_ACCOUNTS = 0 ## CFG_ACCESS_CONTROL_LIMIT_REGISTRATION_TO_DOMAIN -- limit account ## registration to certain email addresses? If wanted, give domain ## name below, e.g. "cern.ch". If not wanted, leave it empty. CFG_ACCESS_CONTROL_LIMIT_REGISTRATION_TO_DOMAIN = ## CFG_ACCESS_CONTROL_NOTIFY_ADMIN_ABOUT_NEW_ACCOUNTS -- send a ## notification email to the administrator when a new account is ## created? Use 0 for no, 1 for yes. CFG_ACCESS_CONTROL_NOTIFY_ADMIN_ABOUT_NEW_ACCOUNTS = 0 ## CFG_ACCESS_CONTROL_NOTIFY_USER_ABOUT_NEW_ACCOUNT -- send a ## notification email to the user when a new account is created in order to ## to verify the validity of the provided email address? Use ## 0 for no, 1 for yes. CFG_ACCESS_CONTROL_NOTIFY_USER_ABOUT_NEW_ACCOUNT = 1 ## CFG_ACCESS_CONTROL_NOTIFY_USER_ABOUT_ACTIVATION -- send a ## notification email to the user when a new account is activated? ## Use 0 for no, 1 for yes. CFG_ACCESS_CONTROL_NOTIFY_USER_ABOUT_ACTIVATION = 0 ## CFG_ACCESS_CONTROL_NOTIFY_USER_ABOUT_DELETION -- send a ## notification email to the user when a new account is deleted or ## account demand rejected? Use 0 for no, 1 for yes. CFG_ACCESS_CONTROL_NOTIFY_USER_ABOUT_DELETION = 0 ## CFG_APACHE_PASSWORD_FILE -- the file where Apache user credentials ## are stored. Must be an absolute pathname. If the value does not ## start by a slash, it is considered to be the filename of a file ## located under prefix/var/tmp directory. This is useful for the ## demo site testing purposes. For the production site, if you plan ## to restrict access to some collections based on the Apache user ## authentication mechanism, you should put here an absolute path to ## your Apache password file. CFG_APACHE_PASSWORD_FILE = demo-site-apache-user-passwords ## CFG_APACHE_GROUP_FILE -- the file where Apache user groups are ## defined. See the documentation of the preceding config variable. CFG_APACHE_GROUP_FILE = demo-site-apache-user-groups ################################### ## Part 8: WebSession parameters ## ################################### ## This section contains some configuration parameters for tweaking ## session handling. ## CFG_WEBSESSION_EXPIRY_LIMIT_DEFAULT -- number of days after which a session ## and the corresponding cookie is considered expired. CFG_WEBSESSION_EXPIRY_LIMIT_DEFAULT = 2 ## CFG_WEBSESSION_EXPIRY_LIMIT_REMEMBER -- number of days after which a session ## and the corresponding cookie is considered expired, when the user has ## requested to permanently stay logged in. CFG_WEBSESSION_EXPIRY_LIMIT_REMEMBER = 365 ## CFG_WEBSESSION_RESET_PASSWORD_EXPIRE_IN_DAYS -- when user requested ## a password reset, for how many days is the URL valid? CFG_WEBSESSION_RESET_PASSWORD_EXPIRE_IN_DAYS = 3 ## CFG_WEBSESSION_ADDRESS_ACTIVATION_EXPIRE_IN_DAYS -- when an account ## activation email was sent, for how many days is the URL valid? CFG_WEBSESSION_ADDRESS_ACTIVATION_EXPIRE_IN_DAYS = 3 ## CFG_WEBSESSION_NOT_CONFIRMED_EMAIL_ADDRESS_EXPIRE_IN_DAYS -- when ## user won't confirm his email address and not complete ## registeration, after how many days will it expire? CFG_WEBSESSION_NOT_CONFIRMED_EMAIL_ADDRESS_EXPIRE_IN_DAYS = 10 ## CFG_WEBSESSION_DIFFERENTIATE_BETWEEN_GUESTS -- when set to 1, the session ## system allocates the same uid=0 to all guests users regardless of where they ## come from. 0 allocate a unique uid to each guest. CFG_WEBSESSION_DIFFERENTIATE_BETWEEN_GUESTS = 0 ## CFG_WEBSESSION_IPADDR_CHECK_SKIP_BITS -- to prevent session cookie ## stealing, Invenio checks that the IP address of a connection is the ## same as that of the connection which created the initial session. ## This variable let you decide how many bits should be skipped during ## this check. Set this to 0 in order to enable full IP address ## checking. Set this to 32 in order to disable IP address checking. ## Intermediate values (say 8) let you have some degree of security so ## that you can trust your local network only while helping to solve ## issues related to outside clients that configured their browser to ## use a web proxy for HTTP connection but not for HTTPS, thus ## potentially having two different IP addresses. In general, if use ## HTTPS in order to serve authenticated content, you can safely set ## CFG_WEBSESSION_IPADDR_CHECK_SKIP_BITS to 32. CFG_WEBSESSION_IPADDR_CHECK_SKIP_BITS = 0 ################################ ## Part 9: BibRank parameters ## ################################ ## This section contains some configuration parameters for the ranking ## system. ## CFG_BIBRANK_SHOW_READING_STATS -- do we want to show reading ## similarity stats? ('People who viewed this page also viewed') CFG_BIBRANK_SHOW_READING_STATS = 1 ## CFG_BIBRANK_SHOW_DOWNLOAD_STATS -- do we want to show the download ## similarity stats? ('People who downloaded this document also ## downloaded') CFG_BIBRANK_SHOW_DOWNLOAD_STATS = 1 ## CFG_BIBRANK_SHOW_DOWNLOAD_GRAPHS -- do we want to show download ## history graph? (0=no | 1=classic/gnuplot | 2=flot) CFG_BIBRANK_SHOW_DOWNLOAD_GRAPHS = 1 ## CFG_BIBRANK_SHOW_DOWNLOAD_GRAPHS_CLIENT_IP_DISTRIBUTION -- do we ## want to show a graph representing the distribution of client IPs ## downloading given document? (0=no | 1=classic/gnuplot | 2=flot) CFG_BIBRANK_SHOW_DOWNLOAD_GRAPHS_CLIENT_IP_DISTRIBUTION = 0 ## CFG_BIBRANK_SHOW_CITATION_LINKS -- do we want to show the 'Cited ## by' links? (useful only when you have citations in the metadata) CFG_BIBRANK_SHOW_CITATION_LINKS = 1 ## CFG_BIBRANK_SHOW_CITATION_STATS -- de we want to show citation ## stats? ('Cited by M recors', 'Co-cited with N records') CFG_BIBRANK_SHOW_CITATION_STATS = 1 ## CFG_BIBRANK_SHOW_CITATION_GRAPHS -- do we want to show citation ## history graph? (0=no | 1=classic/gnuplot | 2=flot) CFG_BIBRANK_SHOW_CITATION_GRAPHS = 1 #################################### ## Part 10: WebComment parameters ## #################################### ## This section contains some configuration parameters for the ## commenting and reviewing facilities. ## CFG_WEBCOMMENT_ALLOW_COMMENTS -- do we want to allow users write ## public comments on records? CFG_WEBCOMMENT_ALLOW_COMMENTS = 1 ## CFG_WEBCOMMENT_ALLOW_REVIEWS -- do we want to allow users write ## public reviews of records? CFG_WEBCOMMENT_ALLOW_REVIEWS = 1 ## CFG_WEBCOMMENT_ALLOW_SHORT_REVIEWS -- do we want to allow short ## reviews, that is just the attribution of stars without submitting ## detailed review text? CFG_WEBCOMMENT_ALLOW_SHORT_REVIEWS = 0 ## CFG_WEBCOMMENT_NB_REPORTS_BEFORE_SEND_EMAIL_TO_ADMIN -- if users ## report a comment to be abusive, how many they have to be before the ## site admin is alerted? CFG_WEBCOMMENT_NB_REPORTS_BEFORE_SEND_EMAIL_TO_ADMIN = 5 ## CFG_WEBCOMMENT_NB_COMMENTS_IN_DETAILED_VIEW -- how many comments do ## we display in the detailed record page upon welcome? CFG_WEBCOMMENT_NB_COMMENTS_IN_DETAILED_VIEW = 1 ## CFG_WEBCOMMENT_NB_REVIEWS_IN_DETAILED_VIEW -- how many reviews do ## we display in the detailed record page upon welcome? CFG_WEBCOMMENT_NB_REVIEWS_IN_DETAILED_VIEW = 1 ## CFG_WEBCOMMENT_ADMIN_NOTIFICATION_LEVEL -- do we notify the site ## admin after every comment? CFG_WEBCOMMENT_ADMIN_NOTIFICATION_LEVEL = 1 ## CFG_WEBCOMMENT_TIMELIMIT_PROCESSING_COMMENTS_IN_SECONDS -- how many ## elapsed seconds do we consider enough when checking for possible ## multiple comment submissions by a user? CFG_WEBCOMMENT_TIMELIMIT_PROCESSING_COMMENTS_IN_SECONDS = 20 ## CFG_WEBCOMMENT_TIMELIMIT_PROCESSING_REVIEWS_IN_SECONDS -- how many ## elapsed seconds do we consider enough when checking for possible ## multiple review submissions by a user? CFG_WEBCOMMENT_TIMELIMIT_PROCESSING_REVIEWS_IN_SECONDS = 20 ## CFG_WEBCOMMENT_USE_RICH_EDITOR -- enable the WYSIWYG ## Javascript-based editor when user edits comments? CFG_WEBCOMMENT_USE_RICH_TEXT_EDITOR = False ## CFG_WEBCOMMENT_ALERT_ENGINE_EMAIL -- the email address from which the ## alert emails will appear to be sent: CFG_WEBCOMMENT_ALERT_ENGINE_EMAIL = info@invenio-software.org ## CFG_WEBCOMMENT_DEFAULT_MODERATOR -- if no rules are ## specified to indicate who is the comment moderator of ## a collection, this person will be used as default CFG_WEBCOMMENT_DEFAULT_MODERATOR = info@invenio-software.org ## CFG_WEBCOMMENT_USE_MATHJAX_IN_COMMENTS -- do we want to allow the use ## of MathJax plugin to render latex input in comments? CFG_WEBCOMMENT_USE_MATHJAX_IN_COMMENTS = 1 ## CFG_WEBCOMMENT_AUTHOR_DELETE_COMMENT_OPTION -- allow comment author to ## delete its own comment? CFG_WEBCOMMENT_AUTHOR_DELETE_COMMENT_OPTION = 1 # CFG_WEBCOMMENT_EMAIL_REPLIES_TO -- which field of the record define # email addresses that should be notified of newly submitted comments, # and for which collection. Use collection names as keys, and list of # tags as values CFG_WEBCOMMENT_EMAIL_REPLIES_TO = { 'Articles': ['506__d', '506__m'], } # CFG_WEBCOMMENT_RESTRICTION_DATAFIELD -- which field of the record # define the restriction (must be linked to WebAccess # 'viewrestrcomment') to apply to newly submitted comments, and for # which collection. Use collection names as keys, and one tag as value CFG_WEBCOMMENT_RESTRICTION_DATAFIELD = { 'Articles': '5061_a', 'Pictures': '5061_a', 'Theses': '5061_a', } # CFG_WEBCOMMENT_ROUND_DATAFIELD -- which field of the record define # the current round of comment for which collection. Use collection # name as key, and one tag as value CFG_WEBCOMMENT_ROUND_DATAFIELD = { 'Articles': '562__c', 'Pictures': '562__c', } # CFG_WEBCOMMENT_MAX_ATTACHMENT_SIZE -- max file size per attached # file, in bytes. Choose 0 if you don't want to limit the size CFG_WEBCOMMENT_MAX_ATTACHMENT_SIZE = 5242880 # CFG_WEBCOMMENT_MAX_ATTACHED_FILES -- maxium number of files that can # be attached per comment. Choose 0 if you don't want to limit the # number of files. File uploads can be restricted with action # "attachcommentfile". CFG_WEBCOMMENT_MAX_ATTACHED_FILES = 5 # CFG_WEBCOMMENT_MAX_COMMENT_THREAD_DEPTH -- how many levels of # indentation discussions can be. This can be used to ensure that # discussions will not go into deep levels of nesting if users don't # understand the difference between "reply to comment" and "add # comment". When the depth is reached, any "reply to comment" is # conceptually converted to a "reply to thread" (i.e. reply to this # parent's comment). Use -1 for no limit, 0 for unthreaded (flat) # discussions. CFG_WEBCOMMENT_MAX_COMMENT_THREAD_DEPTH = 1 ################################## ## Part 11: BibSched parameters ## ################################## ## This section contains some configuration parameters for the ## bibliographic task scheduler. ## CFG_BIBSCHED_REFRESHTIME -- how often do we want to refresh ## bibsched monitor? (in seconds) CFG_BIBSCHED_REFRESHTIME = 5 ## CFG_BIBSCHED_LOG_PAGER -- what pager to use to view bibsched task ## logs? CFG_BIBSCHED_LOG_PAGER = /usr/bin/less ## CFG_BIBSCHED_EDITOR -- what editor to use to edit the marcxml ## code of the locked records CFG_BIBSCHED_EDITOR = /usr/bin/vim ## CFG_BIBSCHED_GC_TASKS_OLDER_THAN -- after how many days to perform the ## gargbage collector of BibSched queue (i.e. removing/moving task to archive). CFG_BIBSCHED_GC_TASKS_OLDER_THAN = 30 ## CFG_BIBSCHED_GC_TASKS_TO_REMOVE -- list of BibTask that can be safely ## removed from the BibSched queue once they are DONE. CFG_BIBSCHED_GC_TASKS_TO_REMOVE = bibindex,bibreformat,webcoll,bibrank,inveniogc ## CFG_BIBSCHED_GC_TASKS_TO_ARCHIVE -- list of BibTasks that should be safely ## archived out of the BibSched queue once they are DONE. CFG_BIBSCHED_GC_TASKS_TO_ARCHIVE = bibupload,oairepositoryupdater ## CFG_BIBSCHED_MAX_NUMBER_CONCURRENT_TASKS -- maximum number of BibTasks ## that can run concurrently. ## NOTE: concurrent tasks are still considered as an experimental ## feature. Please keep this value set to 1 on production environments. CFG_BIBSCHED_MAX_NUMBER_CONCURRENT_TASKS = 1 ## CFG_BIBSCHED_PROCESS_USER -- bibsched and bibtask processes must ## usually run under the same identity as the Apache web server ## process in order to share proper file read/write privileges. If ## you want to force some other bibsched/bibtask user, e.g. because ## you are using a local `invenio' user that belongs to your ## `www-data' Apache user group and so shares writing rights with your ## Apache web server process in this way, then please set its username ## identity here. Otherwise we shall check whether your ## bibsched/bibtask processes are run under the same identity as your ## Apache web server process (in which case you can leave the default ## empty value here). CFG_BIBSCHED_PROCESS_USER = ## CFG_BIBSCHED_NODE_TASKS -- specific nodes may be configured to ## run only specific tasks; if you want this, then this variable is a ## dictionary of the form {'hostname1': ['task1', 'task2']}. The ## default is that any node can run any task. CFG_BIBSCHED_NODE_TASKS = {} ## CFG_BIBSCHED_MAX_ARCHIVED_ROWS_DISPLAY -- number of tasks displayed ## CFG_BIBSCHED_MAX_ARCHIVED_ROWS_DISPLAY = 500 ################################### ## Part 12: WebBasket parameters ## ################################### ## CFG_WEBBASKET_MAX_NUMBER_OF_DISPLAYED_BASKETS -- a safety limit for ## a maximum number of displayed baskets CFG_WEBBASKET_MAX_NUMBER_OF_DISPLAYED_BASKETS = 20 ## CFG_WEBBASKET_USE_RICH_TEXT_EDITOR -- enable the WYSIWYG ## Javascript-based editor when user edits comments in WebBasket? CFG_WEBBASKET_USE_RICH_TEXT_EDITOR = False ################################## ## Part 13: WebAlert parameters ## ################################## ## This section contains some configuration parameters for the ## automatic email notification alert system. ## CFG_WEBALERT_ALERT_ENGINE_EMAIL -- the email address from which the ## alert emails will appear to be sent: CFG_WEBALERT_ALERT_ENGINE_EMAIL = info@invenio-software.org ## CFG_WEBALERT_MAX_NUM_OF_RECORDS_IN_ALERT_EMAIL -- how many records ## at most do we send in an outgoing alert email? CFG_WEBALERT_MAX_NUM_OF_RECORDS_IN_ALERT_EMAIL = 20 ## CFG_WEBALERT_MAX_NUM_OF_CHARS_PER_LINE_IN_ALERT_EMAIL -- number of ## chars per line in an outgoing alert email? CFG_WEBALERT_MAX_NUM_OF_CHARS_PER_LINE_IN_ALERT_EMAIL = 72 ## CFG_WEBALERT_SEND_EMAIL_NUMBER_OF_TRIES -- when sending alert ## emails fails, how many times we retry? CFG_WEBALERT_SEND_EMAIL_NUMBER_OF_TRIES = 3 ## CFG_WEBALERT_SEND_EMAIL_SLEEPTIME_BETWEEN_TRIES -- when sending ## alert emails fails, what is the sleeptime between tries? (in ## seconds) CFG_WEBALERT_SEND_EMAIL_SLEEPTIME_BETWEEN_TRIES = 300 #################################### ## Part 14: WebMessage parameters ## #################################### ## CFG_WEBMESSAGE_MAX_SIZE_OF_MESSAGE -- how large web messages do we ## allow? CFG_WEBMESSAGE_MAX_SIZE_OF_MESSAGE = 20000 ## CFG_WEBMESSAGE_MAX_NB_OF_MESSAGES -- how many messages for a ## regular user do we allow in its inbox? CFG_WEBMESSAGE_MAX_NB_OF_MESSAGES = 30 ## CFG_WEBMESSAGE_DAYS_BEFORE_DELETE_ORPHANS -- how many days before ## we delete orphaned messages? CFG_WEBMESSAGE_DAYS_BEFORE_DELETE_ORPHANS = 60 ################################## ## Part 15: MiscUtil parameters ## ################################## ## CFG_MISCUTIL_SQL_USE_SQLALCHEMY -- whether to use SQLAlchemy.pool ## in the DB engine of Invenio. It is okay to enable this flag ## even if you have not installed SQLAlchemy. Note that Invenio will ## loose some perfomance if this option is enabled. CFG_MISCUTIL_SQL_USE_SQLALCHEMY = False ## CFG_MISCUTIL_SQL_RUN_SQL_MANY_LIMIT -- how many queries can we run ## inside run_sql_many() in one SQL statement? The limit value ## depends on MySQL's max_allowed_packet configuration. CFG_MISCUTIL_SQL_RUN_SQL_MANY_LIMIT = 10000 ## CFG_MISCUTIL_SMTP_HOST -- which server to use as outgoing mail server to ## send outgoing emails generated by the system, for example concerning ## submissions or email notification alerts. CFG_MISCUTIL_SMTP_HOST = localhost ## CFG_MISCUTIL_SMTP_PORT -- which port to use on the outgoing mail server ## defined in the previous step. CFG_MISCUTIL_SMTP_PORT = 25 ## CFG_MISCUTILS_DEFAULT_PROCESS_TIMEOUT -- the default number of seconds after ## which a process launched trough shellutils.run_process_with_timeout will ## be killed. This is useful to catch runaway processes. CFG_MISCUTIL_DEFAULT_PROCESS_TIMEOUT = 300 ## CFG_MATHJAX_HOSTING -- if you plan to use MathJax to display TeX ## formulas on HTML web pages, you can specify whether you wish to use ## 'local' hosting or 'cdn' hosting of MathJax libraries. (If set to ## 'local', you have to run 'make install-mathjax-plugin' as described ## in the INSTALL guide.) If set to 'local', users will use your site ## to download MathJax sources. If set to 'cdn', users will use ## centralized MathJax CDN servers instead. Please note that using ## CDN is suitable only for small institutions or for MathJax ## sponsors; see the MathJax website for more details. (Also, please ## note that if you plan to use MathJax on your site, you have to ## adapt CFG_WEBSEARCH_USE_MATHJAX_FOR_FORMATS and ## CFG_WEBCOMMENT_USE_MATHJAX_IN_COMMENTS configuration variables ## elsewhere in this file.) CFG_MATHJAX_HOSTING = local ################################# ## Part 16: BibEdit parameters ## ################################# ## CFG_BIBEDIT_TIMEOUT -- when a user edits a record, this record is ## locked to prevent other users to edit it at the same time. ## How many seconds of inactivity before the locked record again will be free ## for other people to edit? CFG_BIBEDIT_TIMEOUT = 3600 ## CFG_BIBEDIT_LOCKLEVEL -- when a user tries to edit a record which there ## is a pending bibupload task for in the queue, this shouldn't be permitted. ## The lock level determines how thouroughly the queue should be investigated ## to determine if this is the case. ## Level 0 - always permits editing, doesn't look at the queue ## (unsafe, use only if you know what you are doing) ## Level 1 - permits editing if there are no queued bibedit tasks for this record ## (safe with respect to bibedit, but not for other bibupload maintenance jobs) ## Level 2 - permits editing if there are no queued bibupload tasks of any sort ## (safe, but may lock more than necessary if many cataloguers around) ## Level 3 - permits editing if no queued bibupload task concerns given record ## (safe, most precise locking, but slow, ## checks for 001/EXTERNAL_SYSNO_TAG/EXTERNAL_OAIID_TAG) ## The recommended level is 3 (default) or 2 (if you use maintenance jobs often). CFG_BIBEDIT_LOCKLEVEL = 3 ## CFG_BIBEDIT_PROTECTED_FIELDS -- a comma-separated list of fields that BibEdit ## will not allow to be added, edited or deleted. Wildcards are not supported, ## but conceptually a wildcard is added at the end of every field specification. ## Examples: ## 500A - protect all MARC fields with tag 500 and first indicator A ## 5 - protect all MARC fields in the 500-series. ## 909C_a - protect subfield a in tag 909 with first indicator C and empty ## second indicator ## Note that 001 is protected by default, but if protection of other ## identifiers or automated fields is a requirement, they should be added to ## this list. CFG_BIBEDIT_PROTECTED_FIELDS = ## CFG_BIBEDIT_QUEUE_CHECK_METHOD -- how do we want to check for ## possible queue locking situations to prevent cataloguers from ## editing a record that may be waiting in the queue? Use 'bibrecord' ## for exact checking (always works, but may be slow), use 'regexp' ## for regular expression based checking (very fast, but may be ## inaccurate). When unsure, use 'bibrecord'. CFG_BIBEDIT_QUEUE_CHECK_METHOD = bibrecord ## CFG_BIBEDIT_EXTEND_RECORD_WITH_COLLECTION_TEMPLATE -- a dictionary ## containing which collections will be extended with a given template ## while being displayed in BibEdit UI. CFG_BIBEDIT_EXTEND_RECORD_WITH_COLLECTION_TEMPLATE = { 'Poetry' : 'poem'} ## CFG_BIBEDIT_KB_SUBJECTS - Name of the KB used in the field 65017a ## to automatically convert codes into extended version. e.g ## a - Astrophysics CFG_BIBEDIT_KB_SUBJECTS = Subjects ## CFG_BIBEDIT_KB_INSTITUTIONS - Name of the KB used for institution ## autocomplete. To be applied in fields defined in ## CFG_BIBEDIT_AUTOCOMPLETE_INSTITUTIONS_FIELDS CFG_BIBEDIT_KB_INSTITUTIONS = InstitutionsCollection ## CFG_BIBEDIT_AUTOCOMPLETE_INSTITUTIONS_FIELDS - list of fields to ## be autocompleted with the KB CFG_BIBEDIT_KB_INSTITUTIONS CFG_BIBEDIT_AUTOCOMPLETE_INSTITUTIONS_FIELDS = 100__u,700__u,701__u,502__c ## CFG_BIBEDITMULTI_LIMIT_INSTANT_PROCESSING -- maximum number of records ## that can be modified instantly using the multi-record editor. Above ## this limit, modifications will only be executed in limited hours. CFG_BIBEDITMULTI_LIMIT_INSTANT_PROCESSING = 2000 ## CFG_BIBEDITMULTI_LIMIT_DELAYED_PROCESSING -- maximum number of records ## that can be send for modification without having a superadmin role. ## If the number of records is between CFG_BIBEDITMULTI_LIMIT_INSTANT_PROCESSING ## and this number, the modifications will take place only in limited hours. CFG_BIBEDITMULTI_LIMIT_DELAYED_PROCESSING = 20000 ## CFG_BIBEDITMULTI_LIMIT_DELAYED_PROCESSING_TIME -- Allowed time to ## execute modifications on records, when the number exceeds ## CFG_BIBEDITMULTI_LIMIT_INSTANT_PROCESSING. CFG_BIBEDITMULTI_LIMIT_DELAYED_PROCESSING_TIME = 22:00-05:00 ################################### ## Part 17: BibUpload parameters ## ################################### ## CFG_BIBUPLOAD_REFERENCE_TAG -- where do we store references? CFG_BIBUPLOAD_REFERENCE_TAG = 999 ## CFG_BIBUPLOAD_EXTERNAL_SYSNO_TAG -- where do we store external ## system numbers? Useful for matching when our records come from an ## external digital library system. CFG_BIBUPLOAD_EXTERNAL_SYSNO_TAG = 970__a ## CFG_BIBUPLOAD_EXTERNAL_OAIID_TAG -- where do we store OAI ID tags ## of harvested records? Useful for matching when we harvest stuff ## via OAI that we do not want to reexport via Invenio OAI; so records ## may have only the source OAI ID stored in this tag (kind of like ## external system number too). CFG_BIBUPLOAD_EXTERNAL_OAIID_TAG = 035__a ## CFG_BIBUPLOAD_EXTERNAL_OAIID_PROVENANCE_TAG -- where do we store OAI SRC ## tags of harvested records? Useful for matching when we harvest stuff ## via OAI that we do not want to reexport via Invenio OAI; so records ## may have only the source OAI SRC stored in this tag (kind of like ## external system number too). Note that the field should be the same of ## CFG_BIBUPLOAD_EXTERNAL_OAIID_TAG. CFG_BIBUPLOAD_EXTERNAL_OAIID_PROVENANCE_TAG = 035__9 ## CFG_BIBUPLOAD_STRONG_TAGS -- a comma-separated list of tags that ## are strong enough to resist the replace mode. Useful for tags that ## might be created from an external non-metadata-like source, ## e.g. the information about the number of copies left. CFG_BIBUPLOAD_STRONG_TAGS = 964 ## CFG_BIBUPLOAD_CONTROLLED_PROVENANCE_TAGS -- a comma-separated list ## of tags that contain provenance information that should be checked ## in the bibupload correct mode via matching provenance codes. (Only ## field instances of the same provenance information would be acted ## upon.) Please specify the whole tag info up to subfield codes. CFG_BIBUPLOAD_CONTROLLED_PROVENANCE_TAGS = 6531_9 ## CFG_BIBUPLOAD_FFT_ALLOWED_LOCAL_PATHS -- a comma-separated list of system ## paths from which it is allowed to take fulltextes that will be uploaded via ## FFT (CFG_TMPDIR is included by default). CFG_BIBUPLOAD_FFT_ALLOWED_LOCAL_PATHS = /tmp,/home ## CFG_BIBUPLOAD_FFT_ALLOWED_EXTERNAL_URLS -- a dictionary containing ## external URLs that can be accessed by Invenio and specific HTTP ## headers that will be used for each URL. The keys of the dictionary ## are regular expressions matching a set of URLs, the values are ## dictionaries of headers as consumed by urllib2.Request. If a ## regular expression matching all URLs is created at the end of the ## list, it means that Invenio will download all URLs. Otherwise ## Invenio will just download authorized URLs. Note: by default, a ## User-Agent built after the current Invenio version, site name, and ## site URL will be used. The values of the header dictionary can ## also contain a call to a python function, in the form of a ## disctionary with two entries: the name of the function to be called ## as a value for the 'fnc' key, and the arguments to this function, ## as a value for the 'args' key (in the form of a dictionary). ## CFG_BIBUPLOAD_FFT_ALLOWED_EXTERNAL_URLS = [ ## ('http://myurl.com/.*', {'User-Agent': 'Me'}), ## ('http://yoururl.com/.*', {'User-Agent': 'You', 'Accept': 'text/plain'}), ## ('http://thisurl.com/.*', {'Cookie': {'fnc':'read_cookie', 'args':{'cookiefile':'/tmp/cookies.txt'}}}) ## ('http://.*', {'User-Agent': 'Invenio'}), ## ] CFG_BIBUPLOAD_FFT_ALLOWED_EXTERNAL_URLS = [ ('http(s)?://.*', {}), ] ## CFG_BIBUPLOAD_SERIALIZE_RECORD_STRUCTURE -- do we want to serialize ## internal representation of records (Pythonic record structure) into ## the database? This can improve internal processing speed of some ## operations at the price of somewhat bigger disk space usage. ## If you change this value after some records have already been added ## to your installation, you may want to run: ## $ /opt/invenio/bin/inveniocfg --reset-recstruct-cache ## in order to either erase the cache thus freeing database space, ## or to fill the cache for all records that have not been cached yet. CFG_BIBUPLOAD_SERIALIZE_RECORD_STRUCTURE = 1 ## CFG_BIBUPLOAD_DELETE_FORMATS -- which formats do we want bibupload ## to delete when a record is ingested? Enter comma-separated list of ## formats. For example, 'hb,hd' will delete pre-formatted HTML brief ## and defailed formats from cache, so that search engine will ## generate them on-the-fly. Useful to always present latest data of ## records upon record display, until the periodical bibreformat job ## runs next and updates the cache. CFG_BIBUPLOAD_DELETE_FORMATS = hb ## CFG_BATCHUPLOADER_FILENAME_MATCHING_POLICY -- a comma-separated list ## indicating which fields match the file names of the documents to be ## uploaded. ## The matching will be done in the same order as the list provided. CFG_BATCHUPLOADER_FILENAME_MATCHING_POLICY = reportnumber,recid ## CFG_BATCHUPLOADER_DAEMON_DIR -- Directory where the batchuploader daemon ## will look for the subfolders metadata and document by default. ## If path is relative, CFG_PREFIX will be joined as a prefix CFG_BATCHUPLOADER_DAEMON_DIR = var/batchupload ## CFG_BATCHUPLOADER_WEB_ROBOT_AGENTS -- Regular expression to specify the ## agents permitted when calling batch uploader web interface ## cdsweb.cern.ch/batchuploader/robotupload ## if using a curl, eg: curl xxx -A invenio CFG_BATCHUPLOADER_WEB_ROBOT_AGENTS = invenio_webupload|Invenio-.* ## CFG_BATCHUPLOADER_WEB_ROBOT_RIGHTS -- Access list specifying for each ## IP address, which collections are allowed using batch uploader robot ## interface. CFG_BATCHUPLOADER_WEB_ROBOT_RIGHTS = { '127.0.0.1': ['*'], # useful for testing '127.0.1.1': ['*'], # useful for testing '10.0.0.1': ['BOOK', 'REPORT'], # Example 1 '10.0.0.2': ['POETRY', 'PREPRINT'], # Example 2 } #################################### ## Part 18: BibCatalog parameters ## #################################### ## CFG_BIBCATALOG_SYSTEM -- set desired catalog system. For example, RT. CFG_BIBCATALOG_SYSTEM = ## RT CONFIGURATION ## CFG_BIBCATALOG_SYSTEM_RT_CLI -- path to the RT CLI client CFG_BIBCATALOG_SYSTEM_RT_CLI = /usr/bin/rt ## CFG_BIBCATALOG_SYSTEM_RT_URL -- Base URL of the remote RT system CFG_BIBCATALOG_SYSTEM_RT_URL = http://localhost/rt3 ## CFG_BIBCATALOG_SYSTEM_RT_DEFAULT_USER -- Set the username for a default RT account ## on remote system, with limited privileges, in order to only create and modify own tickets. CFG_BIBCATALOG_SYSTEM_RT_DEFAULT_USER = ## CFG_BIBCATALOG_SYSTEM_RT_DEFAULT_PWD -- Set the password for the default RT account ## on remote system. CFG_BIBCATALOG_SYSTEM_RT_DEFAULT_PWD = #################################### ## Part 19: BibFormat parameters ## #################################### ## CFG_BIBFORMAT_HIDDEN_TAGS -- comma-separated list of MARC tags that ## are not shown to users not having cataloging authorizations. CFG_BIBFORMAT_HIDDEN_TAGS = 595 ## CFG_BIBFORMAT_HIDDEN_FILE_FORMATS -- comma-separated list of file formats ## that are not shown explicitly to user not having cataloging authorizations. ## e.g. pdf;pdfa,xml CFG_BIBFORMAT_HIDDEN_FILE_FORMATS = ## CFG_BIBFORMAT_ADDTHIS_ID -- if you want to use the AddThis service from ## , set this value to the pubid parameter as ## provided by the service (e.g. ra-4ff80aae118f4dad), and add a call to ## formatting element in your formats, for example ## Default_HTML_detailed.bft. CFG_BIBFORMAT_ADDTHIS_ID = ## CFG_BIBFORMAT_DISABLE_I18N_FOR_CACHED_FORMATS -- For each output ## format BibReformat currently creates a cache for only one language ## (CFG_SITE_LANG) per record. This means that visitors having set a ## different language than CFG_SITE_LANG will be served an on-the-fly ## output using the language of their choice. You can disable this ## behaviour by specifying below for which output format you would ## like to force the cache to be used whatever language is ## requested. If your format templates do not provide ## internationalization, you can optimize your site by setting for ## eg. hb,hd to always serve the precached output (if it exists) in ## the CFG_SITE_LANG CFG_BIBFORMAT_DISABLE_I18N_FOR_CACHED_FORMATS = #################################### ## Part 20: BibMatch parameters ## #################################### ## CFG_BIBMATCH_LOCAL_SLEEPTIME -- Determines the amount of seconds to sleep ## between search queries on LOCAL system. CFG_BIBMATCH_LOCAL_SLEEPTIME = 0.0 ## CFG_BIBMATCH_REMOTE_SLEEPTIME -- Determines the amount of seconds to sleep ## between search queries on REMOTE systems. CFG_BIBMATCH_REMOTE_SLEEPTIME = 2.0 ## CFG_BIBMATCH_FUZZY_WORDLIMITS -- Determines the amount of words to extract ## from a certain fields value during fuzzy matching mode. Add/change field ## and appropriate number to the dictionary to configure this. CFG_BIBMATCH_FUZZY_WORDLIMITS = { '100__a': 2, '245__a': 4 } ## CFG_BIBMATCH_FUZZY_EMPTY_RESULT_LIMIT -- Determines the amount of empty results ## to accept during fuzzy matching mode. CFG_BIBMATCH_FUZZY_EMPTY_RESULT_LIMIT = 1 ## CFG_BIBMATCH_QUERY_TEMPLATES -- Here you can set the various predefined querystrings ## used to standardize common matching queries. By default the following templates ## are given: ## title - standard title search. Taken from 245__a (default) ## title-author - title and author search (i.e. this is a title AND author a) ## Taken from 245__a and 100__a ## reportnumber - reportnumber search (i.e. reportnumber:REP-NO-123). CFG_BIBMATCH_QUERY_TEMPLATES = { 'title' : '[title]', 'title-author' : '[title] [author]', 'reportnumber' : 'reportnumber:[reportnumber]' } ## CFG_BIBMATCH_MATCH_VALIDATION_RULESETS -- Here you can define the various rulesets for ## validating search results done by BibMatch. Each ruleset contains a certain pattern mapped ## to a tuple defining a "matching-strategy". ## ## The rule-definitions must come in two parts: ## ## * The first part is a string containing a regular expression ## that is matched against the textmarc representation of each record. ## If a match is found, the final rule-set is updated with ## the given "sub rule-set", where identical tag rules are replaced. ## ## * The second item is a list of key->value mappings (dict) that indicates specific ## strategy parameters with corresponding validation rules. ## ## This strategy consists of five items: ## ## * MARC TAGS: ## These MARC tags represents the fields taken from original record and any records from search ## results. When several MARC tags are specified with a given match-strategy, all the fields ## associated with these tags are matched together (i.e. with key "100__a,700__a", all 100__a ## and 700__a fields are matched together. Which is useful when first-author can vary for ## certain records on different systems). ## ## * COMPARISON THRESHOLD: ## a value between 0.0 and 1.0 specifying the threshold for string matches ## to determine if it is a match or not (using normalized string-distance). ## Normally 0.8 (80% match) is considered to be a close match. ## ## * COMPARISON MODE: ## the parse mode decides how the record datafields are compared: ## - 'strict' : all (sub-)fields are compared, and all must match. Order is significant. ## - 'normal' : all (sub-)fields are compared, and all must match. Order is ignored. ## - 'lazy' : all (sub-)fields are compared with each other and at least one must match ## - 'ignored': the tag is ignored in the match. Used to disable previously defined rules. ## ## * MATCHING MODE: ## the comparison mode decides how the fieldvalues are matched: ## - 'title' : uses a method specialized for comparing titles, e.g. looking for subtitles ## - 'author' : uses a special authorname comparison. Will take initials into account. ## - 'identifier' : special matching for identifiers, stripping away punctuation ## - 'date': matches dates by extracting and comparing the year ## - 'normal': normal string comparison. ## Note: Fields are considered matching when all its subfields or values match. ## ## * RESULT MODE: ## the result mode decides how the results from the comparisons are handled further: ## - 'normal' : a failed match will cause the validation to immediately exit as a failure. ## a successful match will cause the validation to continue on other rules (if any) ## - 'final' : a failed match will cause the validation to immediately exit as a failure. ## a successful match will cause validation to immediately exit as a success. ## - 'joker' : a failed match will cause the validation to continue on other rules (if any). ## a successful match will cause validation to immediately exit as a success. ## ## You can add your own rulesets in the dictionary below. The 'default' ruleset is always applied, ## and should therefore NOT be removed, but can be changed. The tag-rules can also be overwritten ## by other rulesets. ## ## WARNING: Beware that the validation quality is only as good as given rules, so matching results ## are never guaranteed to be accurate, as it is very content-specific. CFG_BIBMATCH_MATCH_VALIDATION_RULESETS = [('default', [{ 'tags' : '245__%,242__%', 'threshold' : 0.8, 'compare_mode' : 'lazy', 'match_mode' : 'title', 'result_mode' : 'normal' }, { 'tags' : '037__a,088__a', 'threshold' : 1.0, 'compare_mode' : 'lazy', 'match_mode' : 'identifier', 'result_mode' : 'final' }, { 'tags' : '100__a,700__a', 'threshold' : 0.8, 'compare_mode' : 'normal', 'match_mode' : 'author', 'result_mode' : 'normal' }, { 'tags' : '773__a', 'threshold' : 1.0, 'compare_mode' : 'lazy', 'match_mode' : 'title', 'result_mode' : 'normal' }]), ('980__ \$\$a(THESIS|Thesis)', [{ 'tags' : '100__a', 'threshold' : 0.8, 'compare_mode' : 'strict', 'match_mode' : 'author', 'result_mode' : 'normal' }, { 'tags' : '700__a,701__a', 'threshold' : 1.0, 'compare_mode' : 'lazy', 'match_mode' : 'author', 'result_mode' : 'normal' }, { 'tags' : '100__a,700__a', 'threshold' : 0.8, 'compare_mode' : 'ignored', 'match_mode' : 'author', 'result_mode' : 'normal' }]), ('260__', [{ 'tags' : '260__c', 'threshold' : 0.8, 'compare_mode' : 'lazy', 'match_mode' : 'date', 'result_mode' : 'normal' }]), ('0247_', [{ 'tags' : '0247_a', 'threshold' : 1.0, 'compare_mode' : 'lazy', 'match_mode' : 'identifier', 'result_mode' : 'final' }]), ('020__', [{ 'tags' : '020__a', 'threshold' : 1.0, 'compare_mode' : 'lazy', 'match_mode' : 'identifier', 'result_mode' : 'joker' }]) ] ## CFG_BIBMATCH_FUZZY_MATCH_VALIDATION_LIMIT -- Determines the minimum percentage of the ## amount of rules to be positively matched when comparing two records. Should the number ## of matches be lower than required matches but equal to or above this limit, ## the match will be considered fuzzy. CFG_BIBMATCH_FUZZY_MATCH_VALIDATION_LIMIT = 0.65 ## CFG_BIBMATCH_SEARCH_RESULT_MATCH_LIMIT -- Determines the maximum amount of search results ## a single search can return before acting as a non-match. CFG_BIBMATCH_SEARCH_RESULT_MATCH_LIMIT = 15 ###################################### ## Part 21: BibAuthorID parameters ## ###################################### # CFG_BIBAUTHORID_MAX_PROCESSES is the max number of processes # that may be spawned by the disambiguation algorithm CFG_BIBAUTHORID_MAX_PROCESSES = 12 # CFG_BIBAUTHORID_PERSONID_SQL_MAX_THREADS is the max number of threads # to parallelize sql queries during personID tables updates CFG_BIBAUTHORID_PERSONID_SQL_MAX_THREADS = 12 # CFG_BIBAUTHORID_EXTERNAL_CLAIMED_RECORDS_KEY defines the user info # keys for externally claimed records in an remote-login scenario--e.g. from arXiv.org # e.g. "external_arxivids" for arXiv SSO CFG_BIBAUTHORID_EXTERNAL_CLAIMED_RECORDS_KEY = # CFG_BIBAUTHORID_AID_ENABLED # Globally enable AuthorID Interfaces. # If False: No guest, user or operator will have access to the system. CFG_BIBAUTHORID_ENABLED = True # CFG_BIBAUTHORID_AID_ON_AUTHORPAGES # Enable AuthorID information on the author pages. CFG_BIBAUTHORID_ON_AUTHORPAGES = True # CFG_BIBAUTHORID_AUTHOR_TICKET_ADMIN_EMAIL defines the eMail address # all ticket requests concerning authors will be sent to. CFG_BIBAUTHORID_AUTHOR_TICKET_ADMIN_EMAIL = info@invenio-software.org #CFG_BIBAUTHORID_UI_SKIP_ARXIV_STUB_PAGE defines if the optional arXive stub page is skipped CFG_BIBAUTHORID_UI_SKIP_ARXIV_STUB_PAGE = False ###################################### ## Part 22: BibClassify parameters ## ###################################### # CFG_BIBCLASSIFY_WEB_MAXKW -- maximum number of keywords to display # in the Keywords tab web page. CFG_BIBCLASSIFY_WEB_MAXKW = 100 ######################################## ## Part 23: Plotextractor parameters ## ######################################## ## CFG_PLOTEXTRACTOR_SOURCE_BASE_URL -- for acquiring source tarballs for plot ## extraction, where should we look? If nothing is set, we'll just go ## to arXiv, but this can be a filesystem location, too CFG_PLOTEXTRACTOR_SOURCE_BASE_URL = http://arxiv.org/ ## CFG_PLOTEXTRACTOR_SOURCE_TARBALL_FOLDER -- for acquiring source tarballs for plot ## extraction, subfolder where the tarballs sit CFG_PLOTEXTRACTOR_SOURCE_TARBALL_FOLDER = e-print/ ## CFG_PLOTEXTRACTOR_SOURCE_PDF_FOLDER -- for acquiring source tarballs for plot ## extraction, subfolder where the pdf sit CFG_PLOTEXTRACTOR_SOURCE_PDF_FOLDER = pdf/ ## CFG_PLOTEXTRACTOR_DOWNLOAD_TIMEOUT -- a float representing the number of seconds ## to wait between each download of pdf and/or tarball from source URL. CFG_PLOTEXTRACTOR_DOWNLOAD_TIMEOUT = 2.0 ## CFG_PLOTEXTRACTOR_CONTEXT_LIMIT -- when extracting context of plots from ## TeX sources, this is the limitation of characters in each direction to extract ## context from. Default 750. CFG_PLOTEXTRACTOR_CONTEXT_EXTRACT_LIMIT = 750 ## CFG_PLOTEXTRACTOR_DISALLOWED_TEX -- when extracting context of plots from TeX ## sources, this is the list of TeX tags that will trigger 'end of context'. CFG_PLOTEXTRACTOR_DISALLOWED_TEX = begin,end,section,includegraphics,caption,acknowledgements ## CFG_PLOTEXTRACTOR_CONTEXT_WORD_LIMIT -- when extracting context of plots from ## TeX sources, this is the limitation of words in each direction. Default 75. CFG_PLOTEXTRACTOR_CONTEXT_WORD_LIMIT = 75 ## CFG_PLOTEXTRACTOR_CONTEXT_SENTENCE_LIMIT -- when extracting context of plots from ## TeX sources, this is the limitation of sentences in each direction. Default 2. CFG_PLOTEXTRACTOR_CONTEXT_SENTENCE_LIMIT = 2 ###################################### ## Part 24: WebStat parameters ## ###################################### # CFG_WEBSTAT_BIBCIRCULATION_START_YEAR defines the start date of the BibCirculation # statistics. Value should have the format 'yyyy'. If empty, take all existing data. CFG_WEBSTAT_BIBCIRCULATION_START_YEAR = ###################################### ## Part 25: Web API Key parameters ## ###################################### # CFG_WEB_API_KEY_ALLOWED_URL defines the web apps that are going to use the web # API key. It has three values, the name of the web app, the time of life for the # secure url and if a time stamp is needed. #CFG_WEB_API_KEY_ALLOWED_URL = [('search/\?', 3600, True), # ('rss', 0, False)] CFG_WEB_API_KEY_ALLOWED_URL = [] ########################################## ## Part 25: WebAuthorProfile parameters ## ########################################## #CFG_WEBAUTHORPROFILE_CACHE_EXPIRED_DELAY_LIVE consider a cached element expired after days #when loading an authorpage, thus recomputing the content live CFG_WEBAUTHORPROFILE_CACHE_EXPIRED_DELAY_LIVE = 7 #CFG_WEBAUTHORPROFILE_CACHE_EXPIRED_DELAY_BIBSCHED consider a cache element expired after days, #thus recompute it, bibsched daemon CFG_WEBAUTHORPROFILE_CACHE_EXPIRED_DELAY_BIBSCHED = 5 #CFG_WEBAUTHORPROFILE_MAX_COLLAB_LIST: limit collaboration list. #Set to 0 to disable limit. CFG_WEBAUTHORPROFILE_MAX_COLLAB_LIST = 100 #CFG_WEBAUTHORPROFILE_MAX_KEYWORD_LIST: limit keywords list #Set to 0 to disable limit. CFG_WEBAUTHORPROFILE_MAX_KEYWORD_LIST = 100 #CFG_WEBAUTHORPROFILE_MAX_AFF_LIST: limit affiliations list #Set to 0 to disable limit. CFG_WEBAUTHORPROFILE_MAX_AFF_LIST = 100 #CFG_WEBAUTHORPROFILE_MAX_COAUTHOR_LIST: limit coauthors list #Set to 0 to disable limit. CFG_WEBAUTHORPROFILE_MAX_COAUTHOR_LIST = 100 #CFG_WEBAUTHORPROFILE_MAX_HEP_CHOICES: limit HepRecords choices #Set to 0 to disable limit. CFG_WEBAUTHORPROFILE_MAX_HEP_CHOICES = 10 #CFG_WEBAUTHORPROFILE_USE_BIBAUTHORID: use bibauthorid or exactauthor CFG_WEBAUTHORPROFILE_USE_BIBAUTHORID = False #################################### ## Part 25: BibSort parameters ## #################################### ## CFG_BIBSORT_BUCKETS -- the number of buckets bibsort should use. ## If 0, then no buckets will be used (bibsort will be inactive). ## If different from 0, bibsort will be used for sorting the records. ## The number of buckets should be set with regards to the size ## of the repository; having a larger number of buckets will increase ## the sorting performance for the top results but will decrease ## the performance for sorting the middle results. ## We recommend to to use 1 in case you have less than about ## 1,000,000 records. ## When modifying this variable, re-run rebalancing for all the bibsort ## methods, for having the database in synch. CFG_BIBSORT_BUCKETS = 1 ########################## ## THAT's ALL, FOLKS! ## ########################## diff --git a/configure.ac b/configure.ac index 4a0dbeabf..8237cdbf6 100644 --- a/configure.ac +++ b/configure.ac @@ -1,919 +1,924 @@ ## This file is part of Invenio. ## Copyright (C) 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011, 2012 CERN. ## ## Invenio is free software; you can redistribute it and/or ## modify it under the terms of the GNU General Public License as ## published by the Free Software Foundation; either version 2 of the ## License, or (at your option) any later version. ## ## Invenio is distributed in the hope that it will be useful, but ## WITHOUT ANY WARRANTY; without even the implied warranty of ## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ## General Public License for more details. ## ## You should have received a copy of the GNU General Public License ## along with Invenio; if not, write to the Free Software Foundation, Inc., ## 59 Temple Place, Suite 330, Boston, MA 02111-1307, USA. ## This is Invenio main configure.ac file. If you change this ## file, then please run "autoreconf" to regenerate the "configure" ## script. ## Initialize autoconf and automake: AC_INIT([invenio], m4_esyscmd([./git-version-gen .tarball-version]), [info@invenio-software.org]) AM_INIT_AUTOMAKE([tar-ustar]) ## By default we shall install into /opt/invenio. (Do not use ## AC_PREFIX_DEFAULT for this, because it would not work well with ## the localstatedir hack below.) test "${prefix}" = NONE && prefix=/opt/invenio ## Remove eventual trailing slashes from the prefix value: test "${prefix%/}" != "" && prefix=${prefix%/} ## Check for install: AC_PROG_INSTALL ## Check for gettext support: AM_GNU_GETTEXT(external) AM_GNU_GETTEXT_VERSION(0.14.4) ## Check for MySQL client: AC_MSG_CHECKING(for mysql) AC_ARG_WITH(mysql, AC_HELP_STRING([--with-mysql], [path to a specific MySQL binary (optional)]), MYSQL=${withval}) if test -n "$MYSQL"; then AC_MSG_RESULT($MYSQL) else AC_PATH_PROG(MYSQL, mysql) if test -z "$MYSQL"; then AC_MSG_ERROR([ MySQL command-line client was not found in your PATH. Please install it first. Available from .]) fi fi ## Check for Python: AC_MSG_CHECKING(for python) AC_ARG_WITH(python, AC_HELP_STRING([--with-python], [path to a specific Python binary (optional)]), PYTHON=${withval}) if test -n "$PYTHON"; then AC_MSG_RESULT($PYTHON) else AC_PATH_PROG(PYTHON, python) if test -z "$PYTHON"; then AC_MSG_ERROR([ Python was not found in your PATH. Please either install it in your PATH or specify --with-python configure option. Python is available from .]) fi fi ## Check for OpenOffice.org Python binary: AC_MSG_CHECKING(for OpenOffice.org Python binary) AC_ARG_WITH(openoffice-python, AC_HELP_STRING([--with-openoffice-python], [path to a specific OpenOffice.org Python binary (optional)]), OPENOFFICE_PYTHON=`which ${withval}`) if test -z "$OPENOFFICE_PYTHON"; then OPENOFFICE_PYTHON=`locate -l 1 -r "o.*office/program/python$"` OPENOFFICE_PYTHON="$PYTHON $OPENOFFICE_PYTHON" if test -n "$OPENOFFICE_PYTHON" && ($OPENOFFICE_PYTHON -c "import uno" 2> /dev/null); then AC_MSG_RESULT($OPENOFFICE_PYTHON) else AC_MSG_WARN([ You have not specified the path ot the OpenOffice.org Python binary. OpenOffice.org and Microsoft Office document conversion and fulltext indexing will not be available. We recommend you to install OpenOffice.org first and to rerun the configure script. OpenOffice.org is available from .]) fi elif ($OPENOFFICE_PYTHON -c "import uno" 2> /dev/null); then AC_MSG_RESULT($OPENOFFICE_PYTHON) else AC_MSG_ERROR([ The specified OpenOffice.org Python binary is not correctly configured. Please specify the correct path to the specific OpenOffice Python binary (OpenOffice.org is available from ).]) fi ## Check for Python version and modules: AC_MSG_CHECKING(for required Python modules) $PYTHON ${srcdir}/configure-tests.py if test $? -ne 0; then AC_MSG_ERROR([Please fix the above Python problem before continuing.]) fi AC_MSG_RESULT(found) ## Check for PHP: AC_PATH_PROG(PHP, php) ## Check for gzip: AC_PATH_PROG(GZIP, gzip) if test -z "$GZIP"; then AC_MSG_WARN([ Gzip was not found in your PATH. It is used in the WebSubmit module to compress the data submitted in an archive. You can continue without it but you will miss some Invenio functionality. We recommend you to install it first and to rerun the configure script. Gzip is available from .]) fi ## Check for gunzip: AC_PATH_PROG(GUNZIP, gunzip) if test -z "$GUNZIP"; then AC_MSG_WARN([ Gunzip was not found in your PATH. It is used in the WebSubmit module to correctly deal with submitted compressed files. You can continue without it but you will miss some Invenio functionality. We recommend you to install it first and to rerun the configure script. Gunzip is available from .]) fi ## Check for tar: AC_PATH_PROG(TAR, tar) if test -z "$TAR"; then AC_MSG_WARN([ Tar was not found in your PATH. It is used in the WebSubmit module to pack the submitted data into an archive. You can continue without it but you will miss some Invenio functionality. We recommend you to install it first and to rerun the configure script. Tar is available from .]) fi ## Check for wget: AC_PATH_PROG(WGET, wget) if test -z "$WGET"; then AC_MSG_WARN([ wget was not found in your PATH. It is used for the fulltext file retrieval. You can continue without it but we recomend you to install it first and to rerun the configure script. wget is available from .]) fi ## Check for md5sum: AC_PATH_PROG(MD5SUM, md5sum) if test -z "$MD5SUM"; then AC_MSG_WARN([ md5sum was not found in your PATH. It is used for the fulltext file checksum verification. You can continue without it but we recomend you to install it first and to rerun the configure script. md5sum is available from .]) fi ## Check for ps2pdf: AC_PATH_PROG(PS2PDF, ps2pdf) if test -z "$PS2PDF"; then AC_MSG_WARN([ ps2pdf was not found in your PATH. It is used in the WebSubmit module to convert submitted PostScripts into PDF. You can continue without it but you will miss some Invenio functionality. We recommend you to install it first and to rerun the configure script. ps2pdf is available from .]) fi ## Check for pdflatex: AC_PATH_PROG(PDFLATEX, pdflatex) if test -z "$PDFLATEX"; then AC_MSG_WARN([ pdflatex was not found in your PATH. It is used in the WebSubmit module to stamp PDF files. You can continue without it but you will miss some Invenio functionality. We recommend you to install it first and to rerun the configure script.]) fi ## Check for tiff2pdf: AC_PATH_PROG(TIFF2PDF, tiff2pdf) if test -z "$TIFF2PDF"; then AC_MSG_WARN([ tiff2pdf was not found in your PATH. It is used in the WebSubmit module to convert submitted TIFF file into PDF. You can continue without it but you will miss some Invenio functionality. We recommend you to install it first and to rerun the configure script. tiff2pdf is available from .]) fi ## Check for gs: AC_PATH_PROG(GS, gs) if test -z "$GS"; then AC_MSG_WARN([ gs was not found in your PATH. It is used in the WebSubmit module to convert submitted PostScripts into PDF. You can continue without it but you will miss some Invenio functionality. We recommend you to install it first and to rerun the configure script. gs is available from .]) fi ## Check for pdftotext: AC_PATH_PROG(PDFTOTEXT, pdftotext) if test -z "$PDFTOTEXT"; then AC_MSG_WARN([ pdftotext was not found in your PATH. It is used for the fulltext indexation of PDF files. You can continue without it but you may miss fulltext searching capability of Invenio. We recomend you to install it first and to rerun the configure script. pdftotext is available from . ]) fi ## Check for pdftotext: AC_PATH_PROG(PDFINFO, pdfinfo) if test -z "$PDFINFO"; then AC_MSG_WARN([ pdfinfo was not found in your PATH. It is used for gathering information on PDF files. You can continue without it but you may miss this feature of Invenio. We recomend you to install it first and to rerun the configure script. pdftotext is available from . ]) fi ## Check for pdftk: AC_PATH_PROG(PDFTK, pdftk) if test -z "$PDFTK"; then AC_MSG_WARN([ pdftk was not found in your PATH. It is used for the fulltext file stamping. You can continue without it but you may miss this feature of Invenio. We recomend you to install it first and to rerun the configure script. pdftk is available from . ]) fi ## Check for pdf2ps: AC_PATH_PROG(PDF2PS, pdf2ps) if test -z "$PDF2PS"; then AC_MSG_WARN([ pdf2ps was not found in your PATH. It is used in the WebSubmit module to convert submitted PDFs into PostScript. You can continue without it but you will miss some Invenio functionality. We recommend you to install it first and to rerun the configure script. pdf2ps is available from .]) fi ## Check for pdftops: AC_PATH_PROG(PDFTOPS, pdftops) if test -z "$PDFTOPS"; then AC_MSG_WARN([ pdftops was not found in your PATH. It is used in the WebSubmit module to convert submitted PDFs into PostScript. You can continue without it but you will miss some Invenio functionality. We recommend you to install it first and to rerun the configure script. pdftops is available from .]) fi ## Check for pdfopt: AC_PATH_PROG(PDFOPT, pdfopt) if test -z "$PDFOPT"; then AC_MSG_WARN([ pdfopt was not found in your PATH. It is used in the WebSubmit module to linearized submitted PDFs. You can continue without it but you will miss some Invenio functionality. We recommend you to install it first and to rerun the configure script. pdfopt is available from .]) fi ## Check for pdfimages: AC_PATH_PROG(PDFTOPPM, pdftoppm) if test -z "$PDFTOPPM"; then AC_MSG_WARN([ pdftoppm was not found in your PATH. It is used in the WebSubmit module to extract images from PDFs for OCR. You can continue without it but you will miss some Invenio functionality. We recommend you to install it first and to rerun the configure script. pdftoppm is available from .]) fi ## Check for pdfimages: AC_PATH_PROG(PAMFILE, pdftoppm) if test -z "$PAMFILE"; then AC_MSG_WARN([ pamfile was not found in your PATH. It is used in the WebSubmit module to retrieve the size of images extracted from PDFs for OCR. You can continue without it but you will miss some Invenio functionality. We recommend you to install it first and to rerun the configure script. pamfile is available as part of the netpbm utilities from: .]) fi ## Check for ocroscript: AC_PATH_PROG(OCROSCRIPT, ocroscript) if test -z "$OCROSCRIPT"; then AC_MSG_WARN([ If you plan to run OCR on your PDFs, then please install ocroscript now. Otherwise you can safely continue. You have also an option to install ocroscript later and edit invenio-local.conf to let Invenio know the path to ocroscript. ocroscript is available as part of OCROpus from . NOTE: Since OCROpus is being actively developed and its api is continuosly changing, please install relase 0.3.1]) fi ## Check for pstotext: AC_PATH_PROG(PSTOTEXT, pstotext) if test -z "$PSTOTEXT"; then AC_MSG_WARN([ pstotext was not found in your PATH. It is used for the fulltext indexation of PDF and PostScript files. Please install pstotext. Otherwise you can safely continue. You have also an option to install pstotext later and edit invenio-local.conf to let Invenio know the path to pstotext. pstotext is available from . ]) fi ## Check for ps2ascii: AC_PATH_PROG(PSTOASCII, ps2ascii) if test -z "$PSTOASCII"; then AC_MSG_WARN([ ps2ascii was not found in your PATH. It is used for the fulltext indexation of PostScript files. Please install ps2ascii. Otherwise you can safely continue. You have also an option to install ps2ascii later and edit invenio-local.conf to let Invenio know the path to ps2ascii. ps2ascii is available from . ]) fi ## Check for any2djvu: AC_PATH_PROG(ANY2DJVU, any2djvu) if test -z "$ANY2DJVU"; then AC_MSG_WARN([ any2djvu was not found in your PATH. It is used in the WebSubmit module to convert documents to DJVU. Please install any2djvu. Otherwise you can safely continue. You have also an option to install any2djvu later and edit invenio-local.conf to let Invenio know the path to any2djvu. any2djvu is available from .]) fi ## Check for DJVUPS: AC_PATH_PROG(DJVUPS, djvups) if test -z "$DJVUPS"; then AC_MSG_WARN([ djvups was not found in your PATH. It is used in the WebSubmit module to convert documents from DJVU. Please install djvups. Otherwise you can safely continue. You have also an option to install djvups later and edit invenio-local.conf to let Invenio know the path to djvups. djvups is available from .]) fi ## Check for DJVUTXT: AC_PATH_PROG(DJVUTXT, djvutxt) if test -z "$DJVUTXT"; then AC_MSG_WARN([ djvutxt was not found in your PATH. It is used in the WebSubmit module to extract text from DJVU documents. You can continue without it but you will miss some Invenio functionality. We recommend you to install it first and to rerun the configure script. djvutxt is available from .]) fi ## Check for file: AC_PATH_PROG(FILE, file) if test -z "$FILE"; then AC_MSG_WARN([ File was not found in your PATH. It is used in the WebSubmit module to check the validity of the submitted files. You can continue without it but you will miss some Invenio functionality. We recommend you to install it first and to rerun the configure script. File is available from .]) fi ## Check for convert: AC_PATH_PROG(CONVERT, convert) if test -z "$CONVERT"; then AC_MSG_WARN([ Convert was not found in your PATH. It is used in the WebSubmit module to create an icon from a submitted picture. You can continue without it but you will miss some Invenio functionality. We recommend you to install it first and to rerun the configure script. Convert is available from .]) fi ## Check for CLISP: AC_MSG_CHECKING(for clisp) AC_ARG_WITH(clisp, AC_HELP_STRING([--with-clisp], [path to a specific CLISP binary (optional)]), CLISP=${withval}) if test -n "$CLISP"; then AC_MSG_RESULT($CLISP) else AC_PATH_PROG(CLISP, clisp) if test -z "$CLISP"; then AC_MSG_WARN([ GNU CLISP was not found in your PATH. It is used by the WebStat module to produce statistics about Invenio usage. (Alternatively, SBCL or CMUCL can be used instead of CLISP.) You can continue without it but you will miss this feature. We recommend you to install it first (if you don't have neither CMUCL nor SBCL) and to rerun the configure script. GNU CLISP is available from .]) fi fi ## Check for CMUCL: AC_MSG_CHECKING(for cmucl) AC_ARG_WITH(cmucl, AC_HELP_STRING([--with-cmucl], [path to a specific CMUCL binary (optional)]), CMUCL=${withval}) if test -n "$CMUCL"; then AC_MSG_RESULT($CMUCL) else AC_PATH_PROG(CMUCL, cmucl) if test -z "$CMUCL"; then AC_MSG_CHECKING(for lisp) # CMUCL can also be installed under `lisp' exec name AC_PATH_PROG(CMUCL, lisp) fi if test -z "$CMUCL"; then AC_MSG_WARN([ CMUCL was not found in your PATH. It is used by the WebStat module to produce statistics about Invenio usage. (Alternatively, CLISP or SBCL can be used instead of CMUCL.) You can continue without it but you will miss this feature. We recommend you to install it first (if you don't have neither CLISP nor SBCL) and to rerun the configure script. CMUCL is available from .]) fi fi ## Check for SBCL: AC_MSG_CHECKING(for sbcl) AC_ARG_WITH(sbcl, AC_HELP_STRING([--with-sbcl], [path to a specific SBCL binary (optional)]), SBCL=${withval}) if test -n "$SBCL"; then AC_MSG_RESULT($SBCL) else AC_PATH_PROG(SBCL, sbcl) if test -z "$SBCL"; then AC_MSG_WARN([ SBCL was not found in your PATH. It is used by the WebStat module to produce statistics about Invenio usage. (Alternatively, CLISP or CMUCL can be used instead of SBCL.) You can continue without it but you will miss this feature. We recommend you to install it first (if you don't have neither CLISP nor CMUCL) and to rerun the configure script. SBCL is available from .]) fi fi ## Check for gnuplot: AC_PATH_PROG(GNUPLOT, gnuplot) if test -z "$GNUPLOT"; then AC_MSG_WARN([ Gnuplot was not found in your PATH. It is used by the BibRank module to produce graphs about download and citation history. You can continue without it but you will miss these graphs. We recommend you to install it first and to rerun the configure script. Gnuplot is available from .]) fi ## Check for ffmpeg: AC_PATH_PROG(FFMPEG, ffmpeg) AC_PATH_PROG(FFPROBE, ffprobe) if test -z "$FFMPEG"; then AC_MSG_WARN([ FFmpeg was not found in your PATH. It is used by the BibEncode module to for video encoding. You can continue without but you will not be able to use BibEncode and no video submission workflows are thereby possible. We recommend you to install it first if you would like to support video submissions and to rerun the configure script. FFmpeg is available from .]) fi ## Check for mediainfo: AC_PATH_PROG(MEDIAINFO, mediainfo) if test -z "$MEDIAINFO"; then AC_MSG_WARN([ Mediainfo was not found in your PATH. It is used by the BibEncode module to for video encoding and media metadata handling. You can continue without but you will not be able to use BibEncode and no video submission workflows are thereby possible. We recommend you to install it first if you would like to support video submissions and to rerun the configure script. Mediainfo is available from .]) fi ## Check for ffmpeg ## Substitute variables: AC_SUBST(VERSION) AC_SUBST(OPENOFFICE_PYTHON) AC_SUBST(MYSQL) AC_SUBST(PYTHON) AC_SUBST(GZIP) AC_SUBST(GUNZIP) AC_SUBST(TAR) AC_SUBST(WGET) AC_SUBST(MD5SUM) AC_SUBST(PS2PDF) AC_SUBST(GS) AC_SUBST(PDFTOTEXT) AC_SUBST(PDFTK) AC_SUBST(PDF2PS) AC_SUBST(PDFTOPS) AC_SUBST(PDFOPT) AC_SUBST(PDFTOPPM) AC_SUBST(OCROSCRIPT) AC_SUBST(PSTOTEXT) AC_SUBST(PSTOASCII) AC_SUBST(ANY2DJVU) AC_SUBST(DJVUPS) AC_SUBST(DJVUTXT) AC_SUBST(FILE) AC_SUBST(CONVERT) AC_SUBST(GNUPLOT) AC_SUBST(CLISP) AC_SUBST(CMUCL) AC_SUBST(SBCL) AC_SUBST(CACHEDIR) AC_SUBST(FFMPEG) AC_SUBST(MEDIAINFO) AC_SUBST(FFPROBE) AC_SUBST(localstatedir, `eval echo "${localstatedir}"`) ## Define output files: AC_CONFIG_FILES([config.nice \ Makefile \ po/Makefile.in \ config/Makefile \ config/invenio-autotools.conf \ modules/Makefile \ modules/webauthorprofile/Makefile \ modules/webauthorprofile/lib/Makefile \ modules/webauthorprofile/bin/Makefile \ modules/webauthorprofile/bin/webauthorprofile \ modules/bibauthorid/Makefile \ modules/bibauthorid/bin/Makefile \ modules/bibauthorid/bin/bibauthorid \ modules/bibauthorid/doc/Makefile \ modules/bibauthorid/doc/admin/Makefile \ modules/bibauthorid/doc/hacking/Makefile \ modules/bibauthorid/lib/Makefile \ modules/bibauthorid/etc/Makefile \ modules/bibauthorid/etc/name_authority_files/Makefile \ modules/bibauthorid/web/Makefile \ modules/bibcatalog/Makefile \ modules/bibcatalog/doc/Makefile \ modules/bibcatalog/doc/admin/Makefile \ modules/bibcatalog/doc/hacking/Makefile modules/bibcatalog/lib/Makefile \ modules/bibcheck/Makefile \ modules/bibcheck/doc/Makefile \ modules/bibcheck/doc/admin/Makefile \ modules/bibcheck/doc/hacking/Makefile \ modules/bibcheck/etc/Makefile \ modules/bibcheck/web/Makefile \ modules/bibcheck/web/admin/Makefile \ modules/bibcirculation/Makefile \ modules/bibcirculation/bin/Makefile \ modules/bibcirculation/doc/Makefile \ modules/bibcirculation/doc/admin/Makefile \ modules/bibcirculation/doc/hacking/Makefile modules/bibcirculation/lib/Makefile \ modules/bibcirculation/web/Makefile \ modules/bibcirculation/web/admin/Makefile \ modules/bibclassify/Makefile \ modules/bibclassify/bin/Makefile \ modules/bibclassify/bin/bibclassify \ modules/bibclassify/doc/Makefile \ modules/bibclassify/doc/admin/Makefile \ modules/bibclassify/doc/hacking/Makefile \ modules/bibclassify/etc/Makefile \ modules/bibclassify/lib/Makefile \ modules/bibconvert/Makefile \ modules/bibconvert/bin/Makefile \ modules/bibconvert/bin/bibconvert \ modules/bibconvert/doc/Makefile \ modules/bibconvert/doc/admin/Makefile \ modules/bibconvert/doc/hacking/Makefile \ modules/bibconvert/etc/Makefile \ modules/bibconvert/lib/Makefile \ + modules/bibdocfile/Makefile \ + modules/bibdocfile/bin/bibdocfile \ + modules/bibdocfile/bin/Makefile \ + modules/bibdocfile/doc/Makefile \ + modules/bibdocfile/doc/hacking/Makefile \ + modules/bibdocfile/lib/Makefile \ modules/bibrecord/Makefile \ modules/bibrecord/bin/Makefile \ modules/bibrecord/bin/xmlmarc2textmarc \ modules/bibrecord/bin/textmarc2xmlmarc \ modules/bibrecord/bin/xmlmarclint \ modules/bibrecord/doc/Makefile \ modules/bibrecord/doc/admin/Makefile \ modules/bibrecord/doc/hacking/Makefile \ modules/bibrecord/etc/Makefile \ modules/bibrecord/lib/Makefile \ modules/refextract/Makefile \ modules/refextract/bin/Makefile \ modules/refextract/bin/refextract \ modules/refextract/doc/Makefile \ modules/refextract/doc/admin/Makefile \ modules/refextract/doc/hacking/Makefile \ modules/refextract/etc/Makefile \ modules/refextract/lib/Makefile \ modules/bibedit/Makefile \ modules/bibedit/bin/Makefile \ modules/bibedit/bin/bibedit \ modules/bibedit/doc/Makefile \ modules/bibedit/doc/admin/Makefile \ modules/bibedit/doc/hacking/Makefile \ modules/bibedit/etc/Makefile \ modules/bibedit/lib/Makefile \ modules/bibedit/web/Makefile \ modules/bibencode/Makefile \ modules/bibencode/bin/Makefile \ modules/bibencode/bin/bibencode \ modules/bibencode/lib/Makefile \ modules/bibencode/etc/Makefile \ modules/bibencode/www/Makefile \ modules/bibexport/Makefile \ modules/bibexport/bin/Makefile \ modules/bibexport/bin/bibexport \ modules/bibexport/doc/Makefile \ modules/bibexport/doc/admin/Makefile \ modules/bibexport/doc/hacking/Makefile modules/bibexport/etc/Makefile \ modules/bibexport/lib/Makefile \ modules/bibexport/web/Makefile \ modules/bibexport/web/admin/Makefile \ modules/bibformat/Makefile \ modules/bibformat/bin/Makefile \ modules/bibformat/bin/bibreformat \ modules/bibformat/doc/Makefile \ modules/bibformat/doc/admin/Makefile \ modules/bibformat/doc/hacking/Makefile \ modules/bibformat/etc/Makefile \ modules/bibformat/etc/format_templates/Makefile \ modules/bibformat/etc/output_formats/Makefile \ modules/bibformat/lib/Makefile \ modules/bibformat/lib/elements/Makefile \ modules/bibformat/web/Makefile \ modules/bibformat/web/admin/Makefile \ modules/oaiharvest/Makefile \ modules/oaiharvest/bin/Makefile \ modules/oaiharvest/bin/oaiharvest \ modules/oaiharvest/doc/Makefile \ modules/oaiharvest/doc/admin/Makefile \ modules/oaiharvest/doc/hacking/Makefile \ modules/oaiharvest/lib/Makefile \ modules/oaiharvest/web/Makefile \ modules/oaiharvest/web/admin/Makefile \ modules/oairepository/Makefile \ modules/oairepository/bin/Makefile \ modules/oairepository/bin/oairepositoryupdater \ modules/oairepository/doc/Makefile \ modules/oairepository/doc/admin/Makefile \ modules/oairepository/doc/hacking/Makefile \ modules/oairepository/etc/Makefile \ modules/oairepository/lib/Makefile \ modules/oairepository/web/Makefile \ modules/oairepository/web/admin/Makefile \ modules/bibindex/Makefile \ modules/bibindex/bin/Makefile \ modules/bibindex/bin/bibindex \ modules/bibindex/bin/bibstat \ modules/bibindex/doc/Makefile \ modules/bibindex/doc/admin/Makefile \ modules/bibindex/doc/hacking/Makefile \ modules/bibindex/lib/Makefile \ modules/bibindex/web/Makefile \ modules/bibindex/web/admin/Makefile \ modules/bibknowledge/Makefile \ modules/bibknowledge/lib/Makefile \ modules/bibknowledge/doc/Makefile \ modules/bibknowledge/doc/admin/Makefile \ modules/bibknowledge/doc/hacking/Makefile \ modules/bibmatch/Makefile \ modules/bibmatch/bin/Makefile \ modules/bibmatch/bin/bibmatch \ modules/bibmatch/doc/Makefile \ modules/bibmatch/doc/admin/Makefile \ modules/bibmatch/doc/hacking/Makefile \ modules/bibmatch/etc/Makefile \ modules/bibmatch/lib/Makefile \ modules/bibmerge/Makefile \ modules/bibmerge/bin/Makefile \ modules/bibmerge/doc/Makefile \ modules/bibmerge/doc/admin/Makefile \ modules/bibmerge/doc/hacking/Makefile \ modules/bibmerge/lib/Makefile \ modules/bibmerge/web/Makefile \ modules/bibmerge/web/admin/Makefile \ modules/bibrank/Makefile \ modules/bibrank/bin/Makefile \ modules/bibrank/bin/bibrank \ modules/bibrank/bin/bibrankgkb \ modules/bibrank/doc/Makefile \ modules/bibrank/doc/admin/Makefile \ modules/bibrank/doc/hacking/Makefile \ modules/bibrank/etc/Makefile \ modules/bibrank/etc/bibrankgkb.cfg \ modules/bibrank/etc/demo_jif.cfg \ modules/bibrank/etc/template_single_tag_rank_method.cfg \ modules/bibrank/lib/Makefile \ modules/bibrank/web/Makefile \ modules/bibrank/web/admin/Makefile \ modules/bibsched/Makefile \ modules/bibsched/bin/Makefile \ modules/bibsched/bin/bibsched \ modules/bibsched/bin/bibtaskex \ modules/bibsched/bin/bibtasklet \ modules/bibsched/doc/Makefile \ modules/bibsched/doc/admin/Makefile \ modules/bibsched/doc/hacking/Makefile \ modules/bibsched/lib/Makefile \ modules/bibsched/lib/tasklets/Makefile \ modules/bibupload/Makefile \ modules/bibsort/Makefile \ modules/bibsort/bin/Makefile \ modules/bibsort/bin/bibsort \ modules/bibsort/lib/Makefile \ modules/bibsort/etc/Makefile \ modules/bibsort/doc/Makefile \ modules/bibsort/doc/admin/Makefile \ modules/bibsort/doc/hacking/Makefile \ modules/bibsort/web/Makefile \ modules/bibsort/web/admin/Makefile \ modules/bibsword/Makefile \ modules/bibsword/bin/Makefile \ modules/bibsword/bin/bibsword \ modules/bibsword/doc/Makefile \ modules/bibsword/doc/admin/Makefile \ modules/bibsword/doc/hacking/Makefile \ modules/bibsword/lib/Makefile \ modules/bibsword/etc/Makefile \ modules/bibupload/bin/Makefile \ modules/bibupload/bin/bibupload \ modules/bibupload/bin/batchuploader \ modules/bibupload/doc/Makefile \ modules/bibupload/doc/admin/Makefile \ modules/bibupload/doc/hacking/Makefile \ modules/bibupload/lib/Makefile \ modules/elmsubmit/Makefile \ modules/elmsubmit/bin/Makefile \ modules/elmsubmit/bin/elmsubmit \ modules/elmsubmit/doc/Makefile \ modules/elmsubmit/doc/admin/Makefile \ modules/elmsubmit/doc/hacking/Makefile \ modules/elmsubmit/etc/Makefile \ modules/elmsubmit/etc/elmsubmit.cfg \ modules/elmsubmit/lib/Makefile \ modules/miscutil/Makefile \ modules/miscutil/bin/Makefile \ modules/miscutil/bin/dbdump \ modules/miscutil/bin/dbexec \ modules/miscutil/bin/inveniocfg \ modules/miscutil/bin/plotextractor \ modules/miscutil/demo/Makefile \ modules/miscutil/doc/Makefile \ modules/miscutil/doc/hacking/Makefile \ modules/miscutil/etc/Makefile \ modules/miscutil/etc/bash_completion.d/Makefile \ modules/miscutil/etc/bash_completion.d/inveniocfg \ modules/miscutil/etc/ckeditor_scientificchar/Makefile \ modules/miscutil/etc/ckeditor_scientificchar/dialogs/Makefile \ modules/miscutil/etc/ckeditor_scientificchar/lang/Makefile \ modules/miscutil/lib/Makefile \ modules/miscutil/sql/Makefile \ modules/miscutil/web/Makefile \ modules/webaccess/Makefile \ modules/webaccess/bin/Makefile \ modules/webaccess/bin/authaction \ modules/webaccess/bin/webaccessadmin \ modules/webaccess/doc/Makefile \ modules/webaccess/doc/admin/Makefile \ modules/webaccess/doc/hacking/Makefile \ modules/webaccess/lib/Makefile \ modules/webaccess/web/Makefile \ modules/webaccess/web/admin/Makefile \ modules/webalert/Makefile \ modules/webalert/bin/Makefile \ modules/webalert/bin/alertengine \ modules/webalert/doc/Makefile \ modules/webalert/doc/admin/Makefile \ modules/webalert/doc/hacking/Makefile \ modules/webalert/lib/Makefile \ modules/webalert/web/Makefile \ modules/webbasket/Makefile \ modules/webbasket/doc/Makefile \ modules/webbasket/doc/admin/Makefile \ modules/webbasket/doc/hacking/Makefile \ modules/webbasket/lib/Makefile \ modules/webbasket/web/Makefile \ modules/webcomment/Makefile \ modules/webcomment/doc/Makefile \ modules/webcomment/doc/admin/Makefile \ modules/webcomment/doc/hacking/Makefile \ modules/webcomment/lib/Makefile \ modules/webcomment/web/Makefile \ modules/webcomment/web/admin/Makefile \ modules/webhelp/Makefile \ modules/webhelp/web/Makefile \ modules/webhelp/web/admin/Makefile \ modules/webhelp/web/admin/howto/Makefile \ modules/webhelp/web/hacking/Makefile \ modules/webjournal/Makefile \ modules/webjournal/etc/Makefile \ modules/webjournal/doc/Makefile \ modules/webjournal/doc/admin/Makefile \ modules/webjournal/doc/hacking/Makefile \ modules/webjournal/lib/Makefile \ modules/webjournal/lib/elements/Makefile \ modules/webjournal/lib/widgets/Makefile \ modules/webjournal/web/Makefile \ modules/webjournal/web/admin/Makefile \ modules/webmessage/Makefile \ modules/webmessage/bin/Makefile \ modules/webmessage/bin/webmessageadmin \ modules/webmessage/doc/Makefile \ modules/webmessage/doc/admin/Makefile \ modules/webmessage/doc/hacking/Makefile \ modules/webmessage/lib/Makefile \ modules/webmessage/web/Makefile \ modules/websearch/Makefile \ modules/websearch/bin/Makefile \ modules/websearch/bin/webcoll \ modules/websearch/doc/Makefile \ modules/websearch/doc/admin/Makefile \ modules/websearch/doc/hacking/Makefile \ modules/websearch/lib/Makefile \ modules/websearch/web/Makefile \ modules/websearch/web/admin/Makefile \ modules/websession/Makefile \ modules/websession/bin/Makefile \ modules/websession/bin/inveniogc \ modules/websession/doc/Makefile \ modules/websession/doc/admin/Makefile \ modules/websession/doc/hacking/Makefile \ modules/websession/lib/Makefile \ modules/websession/web/Makefile \ modules/webstat/Makefile \ modules/webstat/bin/Makefile \ modules/webstat/bin/webstat \ modules/webstat/bin/webstatadmin \ modules/webstat/doc/Makefile \ modules/webstat/doc/admin/Makefile \ modules/webstat/doc/hacking/Makefile \ modules/webstat/etc/Makefile \ modules/webstat/lib/Makefile \ modules/webstyle/Makefile \ modules/webstyle/bin/Makefile \ modules/webstyle/bin/webdoc \ modules/webstyle/css/Makefile \ modules/webstyle/doc/Makefile \ modules/webstyle/doc/admin/Makefile \ modules/webstyle/doc/hacking/Makefile \ modules/webstyle/etc/Makefile \ modules/webstyle/img/Makefile \ modules/webstyle/lib/Makefile \ modules/websubmit/Makefile \ modules/websubmit/bin/Makefile \ - modules/websubmit/bin/bibdocfile \ modules/websubmit/bin/inveniounoconv \ modules/websubmit/doc/Makefile \ modules/websubmit/doc/admin/Makefile \ modules/websubmit/doc/hacking/Makefile \ modules/websubmit/etc/Makefile \ modules/websubmit/lib/Makefile \ modules/websubmit/lib/functions/Makefile \ modules/websubmit/web/Makefile \ modules/websubmit/web/admin/Makefile \ ]) ## Finally, write output files: AC_OUTPUT ## Write help: AC_MSG_RESULT([****************************************************************************]) AC_MSG_RESULT([** Your Invenio installation is now ready for building. **]) AC_MSG_RESULT([** You have entered the following parameters: **]) AC_MSG_RESULT([** - Invenio main install directory: ${prefix}]) AC_MSG_RESULT([** - Python executable: $PYTHON]) AC_MSG_RESULT([** - MySQL client executable: $MYSQL]) AC_MSG_RESULT([** - CLISP executable: $CLISP]) AC_MSG_RESULT([** - CMUCL executable: $CMUCL]) AC_MSG_RESULT([** - SBCL executable: $SBCL]) AC_MSG_RESULT([** Here are the steps to continue the building process: **]) AC_MSG_RESULT([** 1) Type 'make' to build your Invenio system. **]) AC_MSG_RESULT([** 2) Type 'make install' to install your Invenio system. **]) AC_MSG_RESULT([** After that you can start customizing your installation as documented **]) AC_MSG_RESULT([** in the INSTALL file (i.e. edit invenio.conf, run inveniocfg, etc). **]) AC_MSG_RESULT([** Good luck, and thanks for choosing Invenio. **]) AC_MSG_RESULT([** -- Invenio Development Team **]) AC_MSG_RESULT([****************************************************************************]) ## end of file diff --git a/modules/Makefile.am b/modules/Makefile.am index 12c8307dd..94379415e 100644 --- a/modules/Makefile.am +++ b/modules/Makefile.am @@ -1,57 +1,58 @@ ## This file is part of Invenio. ## Copyright (C) 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011, 2012 CERN. ## ## Invenio is free software; you can redistribute it and/or ## modify it under the terms of the GNU General Public License as ## published by the Free Software Foundation; either version 2 of the ## License, or (at your option) any later version. ## ## Invenio is distributed in the hope that it will be useful, but ## WITHOUT ANY WARRANTY; without even the implied warranty of ## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ## General Public License for more details. ## ## You should have received a copy of the GNU General Public License ## along with Invenio; if not, write to the Free Software Foundation, Inc., ## 59 Temple Place, Suite 330, Boston, MA 02111-1307, USA. SUBDIRS = bibauthorid \ bibcatalog \ bibcheck \ bibcirculation \ bibclassify \ bibconvert \ + bibdocfile \ bibedit \ bibencode \ bibexport \ bibformat \ bibindex \ bibknowledge \ bibmatch \ bibmerge \ bibrank \ bibrecord \ bibsched \ bibsort \ bibsword \ bibupload \ elmsubmit \ miscutil \ oaiharvest \ oairepository \ refextract \ webaccess \ webalert \ webauthorprofile \ webbasket \ webcomment \ webhelp \ webjournal \ webmessage \ websearch \ websession \ webstat \ webstyle \ websubmit CLEANFILES = *~ diff --git a/modules/websubmit/bin/bibdocfile.in b/modules/bibdocfile/Makefile.am similarity index 75% copy from modules/websubmit/bin/bibdocfile.in copy to modules/bibdocfile/Makefile.am index 997632dae..baaa01b4a 100644 --- a/modules/websubmit/bin/bibdocfile.in +++ b/modules/bibdocfile/Makefile.am @@ -1,33 +1,20 @@ -#!@PYTHON@ -## ## This file is part of Invenio. -## Copyright (C) 2008, 2010, 2011 CERN. +## Copyright (C) 2012 CERN. ## ## Invenio is free software; you can redistribute it and/or ## modify it under the terms of the GNU General Public License as ## published by the Free Software Foundation; either version 2 of the ## License, or (at your option) any later version. ## ## Invenio is distributed in the hope that it will be useful, but ## WITHOUT ANY WARRANTY; without even the implied warranty of ## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ## General Public License for more details. ## ## You should have received a copy of the GNU General Public License ## along with Invenio; if not, write to the Free Software Foundation, Inc., ## 59 Temple Place, Suite 330, Boston, MA 02111-1307, USA. -""" -BibDocFile CLI tool. -""" - -__revision__ = "$Id$" - -try: - from invenio.bibdocfilecli import main -except ImportError, e: - print "Error: %s" % e - import sys - sys.exit(1) +SUBDIRS = bin doc lib -main() +CLEANFILES = *~ diff --git a/modules/websubmit/bin/bibdocfile.in b/modules/bibdocfile/bin/Makefile.am similarity index 75% copy from modules/websubmit/bin/bibdocfile.in copy to modules/bibdocfile/bin/Makefile.am index 997632dae..90b4ac814 100644 --- a/modules/websubmit/bin/bibdocfile.in +++ b/modules/bibdocfile/bin/Makefile.am @@ -1,33 +1,22 @@ -#!@PYTHON@ -## ## This file is part of Invenio. -## Copyright (C) 2008, 2010, 2011 CERN. +## Copyright (C) 2012 CERN. ## ## Invenio is free software; you can redistribute it and/or ## modify it under the terms of the GNU General Public License as ## published by the Free Software Foundation; either version 2 of the ## License, or (at your option) any later version. ## ## Invenio is distributed in the hope that it will be useful, but ## WITHOUT ANY WARRANTY; without even the implied warranty of ## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ## General Public License for more details. ## ## You should have received a copy of the GNU General Public License ## along with Invenio; if not, write to the Free Software Foundation, Inc., ## 59 Temple Place, Suite 330, Boston, MA 02111-1307, USA. -""" -BibDocFile CLI tool. -""" - -__revision__ = "$Id$" +CLEANFILES = *~ *.tmp -try: - from invenio.bibdocfilecli import main -except ImportError, e: - print "Error: %s" % e - import sys - sys.exit(1) +EXTRA_DIST = bibdocfile.in -main() +bin_SCRIPTS = bibdocfile diff --git a/modules/websubmit/bin/bibdocfile.in b/modules/bibdocfile/bin/bibdocfile similarity index 98% copy from modules/websubmit/bin/bibdocfile.in copy to modules/bibdocfile/bin/bibdocfile index 997632dae..2556038de 100644 --- a/modules/websubmit/bin/bibdocfile.in +++ b/modules/bibdocfile/bin/bibdocfile @@ -1,33 +1,33 @@ -#!@PYTHON@ +#!/usr/bin/python ## ## This file is part of Invenio. ## Copyright (C) 2008, 2010, 2011 CERN. ## ## Invenio is free software; you can redistribute it and/or ## modify it under the terms of the GNU General Public License as ## published by the Free Software Foundation; either version 2 of the ## License, or (at your option) any later version. ## ## Invenio is distributed in the hope that it will be useful, but ## WITHOUT ANY WARRANTY; without even the implied warranty of ## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ## General Public License for more details. ## ## You should have received a copy of the GNU General Public License ## along with Invenio; if not, write to the Free Software Foundation, Inc., ## 59 Temple Place, Suite 330, Boston, MA 02111-1307, USA. """ BibDocFile CLI tool. """ __revision__ = "$Id$" try: from invenio.bibdocfilecli import main except ImportError, e: print "Error: %s" % e import sys sys.exit(1) main() diff --git a/modules/websubmit/bin/bibdocfile.in b/modules/bibdocfile/bin/bibdocfile.in similarity index 100% copy from modules/websubmit/bin/bibdocfile.in copy to modules/bibdocfile/bin/bibdocfile.in diff --git a/modules/websubmit/bin/bibdocfile.in b/modules/bibdocfile/doc/Makefile.am similarity index 75% rename from modules/websubmit/bin/bibdocfile.in rename to modules/bibdocfile/doc/Makefile.am index 997632dae..9046c550c 100644 --- a/modules/websubmit/bin/bibdocfile.in +++ b/modules/bibdocfile/doc/Makefile.am @@ -1,33 +1,20 @@ -#!@PYTHON@ -## ## This file is part of Invenio. -## Copyright (C) 2008, 2010, 2011 CERN. +## Copyright (C) 2003, 2004, 2005, 2006, 2007, 2008, 2010, 2011 CERN. ## ## Invenio is free software; you can redistribute it and/or ## modify it under the terms of the GNU General Public License as ## published by the Free Software Foundation; either version 2 of the ## License, or (at your option) any later version. ## ## Invenio is distributed in the hope that it will be useful, but ## WITHOUT ANY WARRANTY; without even the implied warranty of ## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ## General Public License for more details. ## ## You should have received a copy of the GNU General Public License ## along with Invenio; if not, write to the Free Software Foundation, Inc., ## 59 Temple Place, Suite 330, Boston, MA 02111-1307, USA. -""" -BibDocFile CLI tool. -""" - -__revision__ = "$Id$" - -try: - from invenio.bibdocfilecli import main -except ImportError, e: - print "Error: %s" % e - import sys - sys.exit(1) +SUBDIRS = hacking -main() +CLEANFILES = *~ *.tmp diff --git a/modules/websubmit/doc/hacking/Makefile.am b/modules/bibdocfile/doc/hacking/Makefile.am similarity index 75% copy from modules/websubmit/doc/hacking/Makefile.am copy to modules/bibdocfile/doc/hacking/Makefile.am index 1512aaef4..08635a17a 100644 --- a/modules/websubmit/doc/hacking/Makefile.am +++ b/modules/bibdocfile/doc/hacking/Makefile.am @@ -1,29 +1,24 @@ ## This file is part of Invenio. ## Copyright (C) 2004, 2008, 2009, 2010, 2011 CERN. ## ## Invenio is free software; you can redistribute it and/or ## modify it under the terms of the GNU General Public License as ## published by the Free Software Foundation; either version 2 of the ## License, or (at your option) any later version. ## ## Invenio is distributed in the hope that it will be useful, but ## WITHOUT ANY WARRANTY; without even the implied warranty of ## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ## General Public License for more details. ## ## You should have received a copy of the GNU General Public License ## along with Invenio; if not, write to the Free Software Foundation, Inc., ## 59 Temple Place, Suite 330, Boston, MA 02111-1307, USA. webdoclibdir = $(libdir)/webdoc/invenio/hacking -webdoclib_DATA = bibdocfile-api.webdoc \ - websubmit-internals.webdoc \ - websubmit-file-converter.webdoc \ - websubmit-file-stamper.webdoc \ - websubmit-icon-creator.webdoc \ - websubmit-file-metadata.webdoc +webdoclib_DATA = bibdocfile-api.webdoc bibdocfile-internals.webdoc EXTRA_DIST = $(webdoclib_DATA) CLEANFILES = *~ *.tmp diff --git a/modules/websubmit/doc/hacking/bibdocfile-api.webdoc b/modules/bibdocfile/doc/hacking/bibdocfile-api.webdoc similarity index 100% rename from modules/websubmit/doc/hacking/bibdocfile-api.webdoc rename to modules/bibdocfile/doc/hacking/bibdocfile-api.webdoc diff --git a/modules/websubmit/doc/hacking/websubmit-internals.webdoc b/modules/bibdocfile/doc/hacking/bibdocfile-internals.webdoc similarity index 71% copy from modules/websubmit/doc/hacking/websubmit-internals.webdoc copy to modules/bibdocfile/doc/hacking/bibdocfile-internals.webdoc index 82388c334..64a1a3fac 100644 --- a/modules/websubmit/doc/hacking/websubmit-internals.webdoc +++ b/modules/bibdocfile/doc/hacking/bibdocfile-internals.webdoc @@ -1,42 +1,33 @@ ## -*- mode: html; coding: utf-8; -*- ## This file is part of Invenio. ## Copyright (C) 2010, 2011 CERN. ## ## Invenio is free software; you can redistribute it and/or ## modify it under the terms of the GNU General Public License as ## published by the Free Software Foundation; either version 2 of the ## License, or (at your option) any later version. ## ## Invenio is distributed in the hope that it will be useful, but ## WITHOUT ANY WARRANTY; without even the implied warranty of ## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ## General Public License for more details. ## ## You should have received a copy of the GNU General Public License ## along with Invenio; if not, write to the Free Software Foundation, Inc., ## 59 Temple Place, Suite 330, Boston, MA 02111-1307, USA. - + This page summarizes all the information suitable to dig inside -the WebSubmit internals. +the BibDocFile internals.
BibDocFile API
Explains the fulltext document management library.
- -
Conversion tools
-
Explains how to convert from a file format to an other, and how to perform OCR.
- -
Stamping fulltextes
-
Explains how to stamp fulltextes.
- -
Icon creation tools
-
Explains how to create icons from fulltextes.
diff --git a/modules/websubmit/doc/hacking/Makefile.am b/modules/bibdocfile/lib/Makefile.am similarity index 60% copy from modules/websubmit/doc/hacking/Makefile.am copy to modules/bibdocfile/lib/Makefile.am index 1512aaef4..071fbf909 100644 --- a/modules/websubmit/doc/hacking/Makefile.am +++ b/modules/bibdocfile/lib/Makefile.am @@ -1,29 +1,32 @@ ## This file is part of Invenio. -## Copyright (C) 2004, 2008, 2009, 2010, 2011 CERN. +## Copyright (C) 2012 CERN. ## ## Invenio is free software; you can redistribute it and/or ## modify it under the terms of the GNU General Public License as ## published by the Free Software Foundation; either version 2 of the ## License, or (at your option) any later version. ## ## Invenio is distributed in the hope that it will be useful, but ## WITHOUT ANY WARRANTY; without even the implied warranty of ## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ## General Public License for more details. ## ## You should have received a copy of the GNU General Public License ## along with Invenio; if not, write to the Free Software Foundation, Inc., ## 59 Temple Place, Suite 330, Boston, MA 02111-1307, USA. -webdoclibdir = $(libdir)/webdoc/invenio/hacking +pylibdir = $(libdir)/python/invenio -webdoclib_DATA = bibdocfile-api.webdoc \ - websubmit-internals.webdoc \ - websubmit-file-converter.webdoc \ - websubmit-file-stamper.webdoc \ - websubmit-icon-creator.webdoc \ - websubmit-file-metadata.webdoc +pylib_DATA = bibdocfile_config.py file.py \ + bibdocfile_webinterface.py \ + bibdocfile_templates.py \ + bibdocfile_managedocfiles.py \ + bibdocfile.py \ + bibdocfilecli.py \ + bibdocfile_regression_tests.py -EXTRA_DIST = $(webdoclib_DATA) +noinst_DATA = fulltext_files_migration_kit.py icon_migration_kit.py -CLEANFILES = *~ *.tmp +EXTRA_DIST = $(pylib_DATA) $(noinst_DATA) + +CLEANFILES = *~ *.tmp *.pyc diff --git a/modules/websubmit/lib/bibdocfile.py b/modules/bibdocfile/lib/bibdocfile.py similarity index 93% rename from modules/websubmit/lib/bibdocfile.py rename to modules/bibdocfile/lib/bibdocfile.py index 4714f5036..b04aa9ba3 100644 --- a/modules/websubmit/lib/bibdocfile.py +++ b/modules/bibdocfile/lib/bibdocfile.py @@ -1,4055 +1,4060 @@ ## This file is part of Invenio. ## Copyright (C) 2007, 2008, 2009, 2010, 2011, 2012 CERN. ## ## Invenio is free software; you can redistribute it and/or ## modify it under the terms of the GNU General Public License as ## published by the Free Software Foundation; either version 2 of the ## License, or (at your option) any later version. ## ## Invenio is distributed in the hope that it will be useful, but ## WITHOUT ANY WARRANTY; without even the implied warranty of ## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ## General Public License for more details. ## ## You should have received a copy of the GNU General Public License ## along with Invenio; if not, write to the Free Software Foundation, Inc., ## 59 Temple Place, Suite 330, Boston, MA 02111-1307, USA. """ This module implements the low-level API for dealing with fulltext files. - All the files associated to a I{record} (identified by a I{recid}) can be managed via an instance of the C{BibRecDocs} class. - A C{BibRecDocs} is a wrapper of the list of I{documents} attached to the record. - Each document is represented by an instance of the C{BibDoc} class. - A document is identified by a C{docid} and name (C{docname}). The docname must be unique within the record. A document is the set of all the formats and revisions of a piece of information. - A document has a type called C{doctype} and can have a restriction. - Each physical file, i.e. the concretization of a document into a particular I{version} and I{format} is represented by an instance of the C{BibDocFile} class. - The format is infact the extension of the physical file. - A comment and a description and other information can be associated to a BibDocFile. - A C{bibdoc} is a synonim for a document, while a C{bibdocfile} is a synonim for a physical file. @group Main classes: BibRecDocs,BibDoc,BibDocFile -@group Other classes: BibDocMoreInfo,Md5Folder,InvenioWebSubmitFileError +@group Other classes: BibDocMoreInfo,Md5Folder,InvenioBibDocFileError @group Main functions: decompose_file,stream_file,bibdocfile_*,download_url @group Configuration Variables: CFG_* """ __revision__ = "$Id$" import os import re import shutil import filecmp import time import random import socket import urllib2 import urllib import tempfile import cPickle import base64 import binascii import cgi import sys from warnings import warn if sys.hexversion < 0x2060000: from md5 import md5 else: from hashlib import md5 try: import magic if not hasattr(magic, "open"): raise ImportError CFG_HAS_MAGIC = True except ImportError: CFG_HAS_MAGIC = False ## The above flag controls whether HTTP range requests are supported or not ## when serving static files via Python. This is disabled by default as ## it currently breaks support for opening PDF files on Windows platforms ## using Acrobat reader brower plugin. CFG_ENABLE_HTTP_RANGE_REQUESTS = False from datetime import datetime from mimetypes import MimeTypes from thread import get_ident from invenio import webinterface_handler_config as apache ## Let's set a reasonable timeout for URL request (e.g. FFT) socket.setdefaulttimeout(40) if sys.hexversion < 0x2040000: # pylint: disable=W0622 from sets import Set as set # pylint: enable=W0622 from invenio.shellutils import escape_shell_arg from invenio.dbquery import run_sql, DatabaseError, blob_to_string from invenio.errorlib import register_exception from invenio.bibrecord import record_get_field_instances, \ field_get_subfield_values, field_get_subfield_instances, \ encode_for_xml from invenio.urlutils import create_url, make_user_agent_string from invenio.textutils import nice_size from invenio.access_control_engine import acc_authorize_action from invenio.webuser import collect_user_info from invenio.access_control_admin import acc_is_user_in_role, acc_get_role_id from invenio.access_control_firerole import compile_role_definition, acc_firerole_check_user from invenio.access_control_config import SUPERADMINROLE, CFG_WEBACCESS_WARNING_MSGS from invenio.config import CFG_SITE_LANG, CFG_SITE_URL, \ - CFG_WEBDIR, CFG_WEBSUBMIT_FILEDIR,\ - CFG_WEBSUBMIT_ADDITIONAL_KNOWN_FILE_EXTENSIONS, \ - CFG_WEBSUBMIT_FILESYSTEM_BIBDOC_GROUP_LIMIT, CFG_SITE_SECURE_URL, \ + CFG_WEBDIR, CFG_BIBDOCFILE_FILEDIR,\ + CFG_BIBDOCFILE_ADDITIONAL_KNOWN_FILE_EXTENSIONS, \ + CFG_BIBDOCFILE_FILESYSTEM_BIBDOC_GROUP_LIMIT, CFG_SITE_SECURE_URL, \ CFG_BIBUPLOAD_FFT_ALLOWED_LOCAL_PATHS, \ CFG_TMPDIR, CFG_TMPSHAREDDIR, CFG_PATH_MD5SUM, \ CFG_WEBSUBMIT_STORAGEDIR, \ CFG_BIBDOCFILE_USE_XSENDFILE, \ CFG_BIBDOCFILE_MD5_CHECK_PROBABILITY, \ CFG_SITE_RECORD, \ CFG_BIBUPLOAD_FFT_ALLOWED_EXTERNAL_URLS, \ CFG_BIBDOCFILE_ENABLE_BIBDOCFSINFO_CACHE -from invenio.websubmit_config import CFG_WEBSUBMIT_ICON_SUBFORMAT_RE, \ - CFG_WEBSUBMIT_DEFAULT_ICON_SUBFORMAT +from invenio.bibdocfile_config import CFG_BIBDOCFILE_ICON_SUBFORMAT_RE, \ + CFG_BIBDOCFILE_DEFAULT_ICON_SUBFORMAT import invenio.template -websubmit_templates = invenio.template.load('websubmit') -websearch_templates = invenio.template.load('websearch') +bibdocfile_templates = invenio.template.load('bibdocfile') #: block size when performing I/O. CFG_BIBDOCFILE_BLOCK_SIZE = 1024 * 8 #: threshold used do decide when to use Python MD5 of CLI MD5 algorithm. CFG_BIBDOCFILE_MD5_THRESHOLD = 256 * 1024 #: chunks loaded by the Python MD5 algorithm. CFG_BIBDOCFILE_MD5_BUFFER = 1024 * 1024 #: whether to normalize e.g. ".JPEG" and ".jpg" into .jpeg. CFG_BIBDOCFILE_STRONG_FORMAT_NORMALIZATION = False #: flags that can be associated to files. CFG_BIBDOCFILE_AVAILABLE_FLAGS = ( 'PDF/A', 'STAMPED', 'PDFOPT', 'HIDDEN', 'CONVERTED', 'PERFORM_HIDE_PREVIOUS', 'OCRED' ) #: constant used if FFT correct with the obvious meaning. KEEP_OLD_VALUE = 'KEEP-OLD-VALUE' _CFG_BIBUPLOAD_FFT_ALLOWED_EXTERNAL_URLS = [(re.compile(_regex), _headers) for _regex, _headers in CFG_BIBUPLOAD_FFT_ALLOWED_EXTERNAL_URLS] _mimes = MimeTypes(strict=False) _mimes.suffix_map.update({'.tbz2' : '.tar.bz2'}) _mimes.encodings_map.update({'.bz2' : 'bzip2'}) _magic_cookies = {} def _get_magic_cookies(): """ @return: a tuple of magic object. @rtype: (MAGIC_NONE, MAGIC_COMPRESS, MAGIC_MIME, MAGIC_COMPRESS + MAGIC_MIME) @note: ... not real magic. Just see: man file(1) """ thread_id = get_ident() if thread_id not in _magic_cookies: _magic_cookies[thread_id] = { magic.MAGIC_NONE : magic.open(magic.MAGIC_NONE), magic.MAGIC_COMPRESS : magic.open(magic.MAGIC_COMPRESS), magic.MAGIC_MIME : magic.open(magic.MAGIC_MIME), magic.MAGIC_COMPRESS + magic.MAGIC_MIME : magic.open(magic.MAGIC_COMPRESS + magic.MAGIC_MIME) } for key in _magic_cookies[thread_id].keys(): _magic_cookies[thread_id][key].load() return _magic_cookies[thread_id] def _generate_extensions(): """ Generate the regular expression to match all the known extensions. @return: the regular expression. @rtype: regular expression object """ _tmp_extensions = _mimes.encodings_map.keys() + \ _mimes.suffix_map.keys() + \ _mimes.types_map[1].keys() + \ - CFG_WEBSUBMIT_ADDITIONAL_KNOWN_FILE_EXTENSIONS + CFG_BIBDOCFILE_ADDITIONAL_KNOWN_FILE_EXTENSIONS extensions = [] for ext in _tmp_extensions: if ext.startswith('.'): extensions.append(ext) else: extensions.append('.' + ext) extensions.sort() extensions.reverse() extensions = set([ext.lower() for ext in extensions]) extensions = '\\' + '$|\\'.join(extensions) + '$' extensions = extensions.replace('+', '\\+') return re.compile(extensions, re.I) #: Regular expression to recognized extensions. _extensions = _generate_extensions() -class InvenioWebSubmitFileError(Exception): +class InvenioBibDocFileError(Exception): """ Exception raised in case of errors related to fulltext files. """ pass -class InvenioBibdocfileUnauthorizedURL(Exception): +class InvenioWebSubmitFileError(InvenioBibDocFileError): """ - Exception raised when one tries to download an unauthorized external URL. + Exception raised in case of errors related to fulltext files. """ + ## NOTE: this is a legacy Exception pass +class InvenioBibdocfileUnauthorizedURL(Exception): + """ + Exception raised in case it is not possible to access a certain URL + """ + pass def file_strip_ext(afile, skip_version=False, only_known_extensions=False, allow_subformat=True): """ Strip in the best way the extension from a filename. >>> file_strip_ext("foo.tar.gz") 'foo' >>> file_strip_ext("foo.buz.gz") 'foo.buz' >>> file_strip_ext("foo.buz") 'foo' >>> file_strip_ext("foo.buz", only_known_extensions=True) 'foo.buz' >>> file_strip_ext("foo.buz;1", skip_version=False, ... only_known_extensions=True) 'foo.buz;1' >>> file_strip_ext("foo.gif;icon") 'foo' >>> file_strip_ext("foo.gif:icon", allow_subformat=False) 'foo.gif:icon' @param afile: the path/name of a file. @type afile: string @param skip_version: whether to skip a trailing ";version". @type skip_version: bool @param only_known_extensions: whether to strip out only known extensions or to consider as extension anything that follows a dot. @type only_known_extensions: bool @param allow_subformat: whether to consider also subformats as part of the extension. @type allow_subformat: bool @return: the name/path without the extension (and version). @rtype: string """ if skip_version or allow_subformat: afile = afile.split(';')[0] nextfile = _extensions.sub('', afile) if nextfile == afile and not only_known_extensions: nextfile = os.path.splitext(afile)[0] while nextfile != afile: afile = nextfile nextfile = _extensions.sub('', afile) return nextfile def normalize_format(format, allow_subformat=True): """ Normalize the format, e.g. by adding a dot in front. @param format: the format/extension to be normalized. @type format: string @param allow_subformat: whether to consider also subformats as part of the extension. @type allow_subformat: bool @return: the normalized format. @rtype; string """ if allow_subformat: subformat = format[format.rfind(';'):] format = format[:format.rfind(';')] else: subformat = '' if format and format[0] != '.': format = '.' + format if CFG_BIBDOCFILE_STRONG_FORMAT_NORMALIZATION: if format not in ('.Z', '.H', '.C', '.CC'): format = format.lower() format = { '.jpg' : '.jpeg', '.htm' : '.html', '.tif' : '.tiff' }.get(format, format) return format + subformat def guess_format_from_url(url): """ Given a URL tries to guess it's extension. Different method will be used, including HTTP HEAD query, downloading the resource and using mime @param url: the URL for which the extension shuld be guessed. @type url: string @return: the recognized extension or empty string if it's impossible to recognize it. @rtype: string """ ## Let's try to guess the extension by considering the URL as a filename ext = decompose_file(url, skip_version=True, only_known_extensions=True)[2] if ext.startswith('.'): return ext if is_url_a_local_file(url) and CFG_HAS_MAGIC: ## if the URL corresponds to a local file, let's try to use ## the Python magic library to guess it try: magic_cookie = _get_magic_cookies()[magic.MAGIC_MIME] mimetype = magic_cookie.file(url) ext = _mimes.guess_extension(mimetype) if ext: return normalize_format(ext) except Exception: pass else: ## Since the URL is remote, let's try to perform a HEAD request ## and see the corresponding headers try: response = open_url(url, head_request=True) except (InvenioBibdocfileUnauthorizedURL, urllib2.URLError): return "" format = get_format_from_http_response(response) if format: return format if CFG_HAS_MAGIC: ## Last solution: let's download the remote resource ## and use the Python magic library to guess the extension try: filename = download_url(url, format='') magic_cookie = _get_magic_cookies()[magic.MAGIC_MIME] mimetype = magic_cookie.file(filename) os.remove(filename) ext = _mimes.guess_extension(mimetype) if ext: return normalize_format(ext) except Exception: pass return "" _docname_re = re.compile(r'[^-\w.]*') def normalize_docname(docname): """ Normalize the docname. At the moment the normalization is just returning the same string. @param docname: the docname to be normalized. @type docname: string @return: the normalized docname. @rtype: string """ #return _docname_re.sub('', docname) return docname def normalize_version(version): """ Normalize the version. The version can be either an integer or the keyword 'all'. Any other value will be transformed into the empty string. @param version: the version (either a number or 'all'). @type version: integer or string @return: the normalized version. @rtype: string """ try: int(version) except ValueError: if version.lower().strip() == 'all': return 'all' else: return '' return str(version) def compose_file(dirname, docname, extension, subformat=None, version=None): """ Construct back a fullpath given the separate components. """ if version: version = ";%i" % int(version) else: version = "" if subformat: if not subformat.startswith(";"): subformat = ";%s" % subformat else: subformat = "" if extension and not extension.startswith("."): extension = ".%s" % extension return os.path.join(dirname, docname + extension + subformat + version) def compose_format(extension, subformat=None): """ Construct the format string """ if not extension.startswith("."): extension = ".%s" % extension if subformat: if not subformat.startswith(";"): subformat = ";%s" % subformat else: subformat = "" return extension + subformat def decompose_file(afile, skip_version=False, only_known_extensions=False, allow_subformat=True): """ Decompose a file/path into its components dirname, basename and extension. >>> decompose_file('/tmp/foo.tar.gz') ('/tmp', 'foo', '.tar.gz') >>> decompose_file('/tmp/foo.tar.gz;1', skip_version=True) ('/tmp', 'foo', '.tar.gz') >>> decompose_file('http://www.google.com/index.html') ('http://www.google.com', 'index', '.html') @param afile: the path/name of a file. @type afile: string @param skip_version: whether to skip a trailing ";version". @type skip_version: bool @param only_known_extensions: whether to strip out only known extensions or to consider as extension anything that follows a dot. @type only_known_extensions: bool @param allow_subformat: whether to consider also subformats as part of the extension. @type allow_subformat: bool @return: a tuple with the directory name, the docname and extension. @rtype: (dirname, docname, extension) @note: if a URL is provided, the scheme will be part of the dirname. @see: L{file_strip_ext} for the algorithm used to retrieve the extension. """ if skip_version: version = afile.split(';')[-1] try: int(version) afile = afile[:-len(version)-1] except ValueError: pass basename = os.path.basename(afile) dirname = afile[:-len(basename)-1] base = file_strip_ext( basename, only_known_extensions=only_known_extensions, allow_subformat=allow_subformat) extension = basename[len(base) + 1:] if extension: extension = '.' + extension return (dirname, base, extension) def decompose_file_with_version(afile): """ Decompose a file into dirname, basename, extension and version. >>> decompose_file_with_version('/tmp/foo.tar.gz;1') ('/tmp', 'foo', '.tar.gz', 1) @param afile: the path/name of a file. @type afile: string @return: a tuple with the directory name, the docname, extension and version. @rtype: (dirname, docname, extension, version) @raise ValueError: in case version does not exist it will. @note: if a URL is provided, the scheme will be part of the dirname. """ version_str = afile.split(';')[-1] version = int(version_str) afile = afile[:-len(version_str)-1] basename = os.path.basename(afile) dirname = afile[:-len(basename)-1] base = file_strip_ext(basename) extension = basename[len(base) + 1:] if extension: extension = '.' + extension return (dirname, base, extension, version) def get_subformat_from_format(format): """ @return the subformat if any. @rtype: string >>> get_superformat_from_format('foo;bar') 'bar' >>> get_superformat_from_format('foo') '' """ try: return format[format.rindex(';') + 1:] except ValueError: return '' def get_superformat_from_format(format): """ @return the superformat if any. @rtype: string >>> get_superformat_from_format('foo;bar') 'foo' >>> get_superformat_from_format('foo') 'foo' """ try: return format[:format.rindex(';')] except ValueError: return format def propose_next_docname(docname): """ Given a I{docname}, suggest a new I{docname} (useful when trying to generate a unique I{docname}). >>> propose_next_docname('foo') 'foo_1' >>> propose_next_docname('foo_1') 'foo_2' >>> propose_next_docname('foo_10') 'foo_11' @param docname: the base docname. @type docname: string @return: the next possible docname based on the given one. @rtype: string """ if '_' in docname: split_docname = docname.split('_') try: split_docname[-1] = str(int(split_docname[-1]) + 1) docname = '_'.join(split_docname) except ValueError: docname += '_1' else: docname += '_1' return docname class BibRecDocs: """ This class represents all the files attached to one record. @param recid: the record identifier. @type recid: integer @param deleted_too: whether to consider deleted documents as normal documents (useful when trying to recover deleted information). @type deleted_too: bool @param human_readable: whether numbers should be printed in human readable format (e.g. 2048 bytes -> 2Kb) @ivar id: the record identifier as passed to the constructor. @type id: integer @ivar human_readable: the human_readable flag as passed to the constructor. @type human_readable: bool @ivar deleted_too: the deleted_too flag as passed to the constructor. @type deleted_too: bool @ivar bibdocs: the list of documents attached to the record. @type bibdocs: list of BibDoc """ def __init__(self, recid, deleted_too=False, human_readable=False): try: self.id = int(recid) except ValueError: raise ValueError("BibRecDocs: recid is %s but must be an integer." % repr(recid)) self.human_readable = human_readable self.deleted_too = deleted_too self.bibdocs = [] self.build_bibdoc_list() def __repr__(self): """ @return: the canonical string representation of the C{BibRecDocs}. @rtype: string """ return 'BibRecDocs(%s%s%s)' % (self.id, self.deleted_too and ', True' or '', self.human_readable and ', True' or '' ) def __str__(self): """ @return: an easy to be I{grepped} string representation of the whole C{BibRecDocs} content. @rtype: string """ out = '%i::::total bibdocs attached=%i\n' % (self.id, len(self.bibdocs)) out += '%i::::total size latest version=%s\n' % (self.id, nice_size(self.get_total_size_latest_version())) out += '%i::::total size all files=%s\n' % (self.id, nice_size(self.get_total_size())) for bibdoc in self.bibdocs: out += str(bibdoc) return out def empty_p(self): """ @return: True when the record has no attached documents. @rtype: bool """ return len(self.bibdocs) == 0 def deleted_p(self): """ @return: True if the corresponding record has been deleted. @rtype: bool """ from invenio.search_engine import record_exists return record_exists(self.id) == -1 def get_xml_8564(self): """ Return a snippet of I{MARCXML} representing the I{8564} fields corresponding to the current state. @return: the MARCXML representation. @rtype: string """ from invenio.search_engine import get_record out = '' record = get_record(self.id) fields = record_get_field_instances(record, '856', '4', ' ') for field in fields: urls = field_get_subfield_values(field, 'u') if urls and not bibdocfile_url_p(urls[0]): out += '\t\n' for subfield, value in field_get_subfield_instances(field): out += '\t\t%s\n' % (subfield, encode_for_xml(value)) out += '\t\n' for afile in self.list_latest_files(list_hidden=False): out += '\t\n' url = afile.get_url() description = afile.get_description() comment = afile.get_comment() if url: out += '\t\t%s\n' % encode_for_xml(url) if description: out += '\t\t%s\n' % encode_for_xml(description) if comment: out += '\t\t%s\n' % encode_for_xml(comment) out += '\t\n' return out def get_total_size_latest_version(self): """ Returns the total size used on disk by all the files belonging to this record and corresponding to the latest version. @return: the total size. @rtype: integer """ size = 0 for bibdoc in self.bibdocs: size += bibdoc.get_total_size_latest_version() return size def get_total_size(self): """ Return the total size used on disk of all the files belonging to this record of any version (not only the last as in L{get_total_size_latest_version}). @return: the total size. @rtype: integer """ size = 0 for bibdoc in self.bibdocs: size += bibdoc.get_total_size() return size def build_bibdoc_list(self): """ This method must be called everytime a I{bibdoc} is added, removed or modified. """ self.bibdocs = [] if self.deleted_too: res = run_sql("""SELECT id_bibdoc, type FROM bibrec_bibdoc JOIN bibdoc ON id=id_bibdoc WHERE id_bibrec=%s ORDER BY docname ASC""", (self.id,)) else: res = run_sql("""SELECT id_bibdoc, type FROM bibrec_bibdoc JOIN bibdoc ON id=id_bibdoc WHERE id_bibrec=%s AND status<>'DELETED' ORDER BY docname ASC""", (self.id,)) for row in res: cur_doc = BibDoc(docid=row[0], recid=self.id, doctype=row[1], human_readable=self.human_readable) self.bibdocs.append(cur_doc) def list_bibdocs(self, doctype=''): """ Returns the list all bibdocs object belonging to a recid. If C{doctype} is set, it returns just the bibdocs of that doctype. @param doctype: the optional doctype. @type doctype: string @return: the list of bibdocs. @rtype: list of BibDoc """ if not doctype: return self.bibdocs else: return [bibdoc for bibdoc in self.bibdocs if doctype == bibdoc.doctype] def get_bibdoc_names(self, doctype=''): """ Returns all the names of the documents associated with the bibdoc. If C{doctype} is set, restrict the result to all the matching doctype. @param doctype: the optional doctype. @type doctype: string @return: the list of document names. @rtype: list of string """ return [bibdoc.docname for bibdoc in self.list_bibdocs(doctype)] def propose_unique_docname(self, docname): """ Given C{docname}, return a new docname that is not already attached to the record. @param docname: the reference docname. @type docname: string @return: a docname not already attached. @rtype: string """ docname = normalize_docname(docname) goodname = docname i = 1 while goodname in self.get_bibdoc_names(): i += 1 goodname = "%s_%s" % (docname, i) return goodname def merge_bibdocs(self, docname1, docname2): """ This method merge C{docname2} into C{docname1}. 1. Given all the formats of the latest version of the files attached to C{docname2}, these files are added as new formats into C{docname1}. 2. C{docname2} is marked as deleted. - @raise InvenioWebSubmitFileError: if at least one format in C{docname2} + @raise InvenioBibDocFileError: if at least one format in C{docname2} already exists in C{docname1}. (In this case the two bibdocs are preserved) @note: comments and descriptions are also copied. @note: if C{docname2} has a I{restriction}(i.e. if the I{status} is set) and C{docname1} doesn't, the restriction is imported. """ bibdoc1 = self.get_bibdoc(docname1) bibdoc2 = self.get_bibdoc(docname2) ## Check for possibility for bibdocfile in bibdoc2.list_latest_files(): format = bibdocfile.get_format() if bibdoc1.format_already_exists_p(format): - raise InvenioWebSubmitFileError('Format %s already exists in bibdoc %s of record %s. It\'s impossible to merge bibdoc %s into it.' % (format, docname1, self.id, docname2)) + raise InvenioBibDocFileError('Format %s already exists in bibdoc %s of record %s. It\'s impossible to merge bibdoc %s into it.' % (format, docname1, self.id, docname2)) ## Importing restriction if needed. restriction1 = bibdoc1.get_status() restriction2 = bibdoc2.get_status() if restriction2 and not restriction1: bibdoc1.set_status(restriction2) ## Importing formats for bibdocfile in bibdoc2.list_latest_files(): format = bibdocfile.get_format() comment = bibdocfile.get_comment() description = bibdocfile.get_description() bibdoc1.add_file_new_format(bibdocfile.get_full_path(), description=description, comment=comment, format=format) ## Finally deleting old bibdoc2 bibdoc2.delete() self.build_bibdoc_list() def get_docid(self, docname): """ @param docname: the document name. @type docname: string @return: the identifier corresponding to the given C{docname}. @rtype: integer - @raise InvenioWebSubmitFileError: if the C{docname} does not + @raise InvenioBibDocFileError: if the C{docname} does not corresponds to a document attached to this record. """ for bibdoc in self.bibdocs: if bibdoc.docname == docname: return bibdoc.id - raise InvenioWebSubmitFileError, "Recid '%s' is not connected with a " \ + raise InvenioBibDocFileError, "Recid '%s' is not connected with a " \ "docname '%s'" % (self.id, docname) def get_docname(self, docid): """ @param docid: the document identifier. @type docid: integer @return: the name of the document corresponding to the given document identifier. @rtype: string - @raise InvenioWebSubmitFileError: if the C{docid} does not + @raise InvenioBibDocFileError: if the C{docid} does not corresponds to a document attached to this record. """ for bibdoc in self.bibdocs: if bibdoc.id == docid: return bibdoc.docname - raise InvenioWebSubmitFileError, "Recid '%s' is not connected with a " \ + raise InvenioBibDocFileError, "Recid '%s' is not connected with a " \ "docid '%s'" % (self.id, docid) def has_docname_p(self, docname): """ @param docname: the document name, @type docname: string @return: True if a document with the given name is attached to this record. @rtype: bool """ for bibdoc in self.bibdocs: if bibdoc.docname == docname: return True return False def get_bibdoc(self, docname): """ @return: the bibdoc with a particular docname associated with this recid""" for bibdoc in self.bibdocs: if bibdoc.docname == docname: return bibdoc - raise InvenioWebSubmitFileError, "Recid '%s' is not connected with " \ + raise InvenioBibDocFileError, "Recid '%s' is not connected with " \ " docname '%s'" % (self.id, docname) def delete_bibdoc(self, docname): """ Deletes the document with the specified I{docname}. @param docname: the document name. @type docname: string """ for bibdoc in self.bibdocs: if bibdoc.docname == docname: bibdoc.delete() self.build_bibdoc_list() def add_bibdoc(self, doctype="Main", docname='file', never_fail=False): """ Add a new empty document object (a I{bibdoc}) to the list of documents of this record. @param doctype: the document type. @type doctype: string @param docname: the document name. @type docname: string @param never_fail: if True, this procedure will not fail, even if a document with the given name is already attached to this record. In this case a new name will be generated (see L{propose_unique_docname}). @type never_fail: bool @return: the newly created document object. @rtype: BibDoc - @raise InvenioWebSubmitFileError: in case of any error. + @raise InvenioBibDocFileError: in case of any error. """ try: docname = normalize_docname(docname) if never_fail: docname = self.propose_unique_docname(docname) if docname in self.get_bibdoc_names(): - raise InvenioWebSubmitFileError, "%s has already a bibdoc with docname %s" % (self.id, docname) + raise InvenioBibDocFileError, "%s has already a bibdoc with docname %s" % (self.id, docname) else: bibdoc = BibDoc(recid=self.id, doctype=doctype, docname=docname, human_readable=self.human_readable) self.build_bibdoc_list() return bibdoc except Exception, e: register_exception() - raise InvenioWebSubmitFileError(str(e)) + raise InvenioBibDocFileError(str(e)) def add_new_file(self, fullpath, doctype="Main", docname=None, never_fail=False, description=None, comment=None, format=None, flags=None, modification_date=None): """ Directly add a new file to this record. Adds a new file with the following policy: - if the C{docname} is not set it is retrieved from the name of the file. - If a bibdoc with the given docname doesn't already exist, it is created and the file is added to it. - It it exist but it doesn't contain the format that is being added, the new format is added. - If the format already exists then if C{never_fail} is True a new bibdoc is created with a similar name but with a progressive number as a suffix and the file is added to it (see L{propose_unique_docname}). @param fullpath: the filesystme path of the document to be added. @type fullpath: string @param doctype: the type of the document. @type doctype: string @param docname: the document name. @type docname: string @param never_fail: if True, this procedure will not fail, even if a document with the given name is already attached to this record. In this case a new name will be generated (see L{propose_unique_docname}). @type never_fail: bool @param description: an optional description of the file. @type description: string @param comment: an optional comment to the file. @type comment: string @param format: the extension of the file. If not specified it will be guessed (see L{guess_format_from_url}). @type format: string @param flags: a set of flags to be associated with the file (see L{CFG_BIBDOCFILE_AVAILABLE_FLAGS}) @type flags: list of string @return: the elaborated document object. @rtype: BibDoc - @raise InvenioWebSubmitFileError: in case of error. + @raise InvenioBibDocFileError: in case of error. """ if docname is None: docname = decompose_file(fullpath)[1] if format is None: format = decompose_file(fullpath)[2] docname = normalize_docname(docname) try: bibdoc = self.get_bibdoc(docname) - except InvenioWebSubmitFileError: + except InvenioBibDocFileError: # bibdoc doesn't already exists! bibdoc = self.add_bibdoc(doctype, docname, False) bibdoc.add_file_new_version(fullpath, description=description, comment=comment, format=format, flags=flags, modification_date=modification_date) self.build_bibdoc_list() else: try: bibdoc.add_file_new_format(fullpath, description=description, comment=comment, format=format, flags=flags, modification_date=modification_date) self.build_bibdoc_list() - except InvenioWebSubmitFileError, e: + except InvenioBibDocFileError, e: # Format already exist! if never_fail: bibdoc = self.add_bibdoc(doctype, docname, True) bibdoc.add_file_new_version(fullpath, description=description, comment=comment, format=format, flags=flags, modification_date=modification_date) self.build_bibdoc_list() else: raise return bibdoc def add_new_version(self, fullpath, docname=None, description=None, comment=None, format=None, flags=None): """ Adds a new file to an already existent document object as a new version. @param fullpath: the filesystem path of the file to be added. @type fullpath: string @param docname: the document name. If not specified it will be extracted from C{fullpath} (see L{decompose_file}). @type docname: string @param description: an optional description for the file. @type description: string @param comment: an optional comment to the file. @type comment: string @param format: the extension of the file. If not specified it will be guessed (see L{guess_format_from_url}). @type format: string @param flags: a set of flags to be associated with the file (see L{CFG_BIBDOCFILE_AVAILABLE_FLAGS}) @type flags: list of string @return: the elaborated document object. @rtype: BibDoc - @raise InvenioWebSubmitFileError: in case of error. + @raise InvenioBibDocFileError: in case of error. @note: previous files associated with the same document will be considered obsolete. """ if docname is None: docname = decompose_file(fullpath)[1] if format is None: format = decompose_file(fullpath)[2] if flags is None: flags = [] if 'pdfa' in get_subformat_from_format(format).split(';') and not 'PDF/A' in flags: flags.append('PDF/A') bibdoc = self.get_bibdoc(docname=docname) bibdoc.add_file_new_version(fullpath, description=description, comment=comment, format=format, flags=flags) self.build_bibdoc_list() return bibdoc def add_new_format(self, fullpath, docname=None, description=None, comment=None, format=None, flags=None, modification_date=None): """ Adds a new file to an already existent document object as a new format. @param fullpath: the filesystem path of the file to be added. @type fullpath: string @param docname: the document name. If not specified it will be extracted from C{fullpath} (see L{decompose_file}). @type docname: string @param description: an optional description for the file. @type description: string @param comment: an optional comment to the file. @type comment: string @param format: the extension of the file. If not specified it will be guessed (see L{guess_format_from_url}). @type format: string @param flags: a set of flags to be associated with the file (see L{CFG_BIBDOCFILE_AVAILABLE_FLAGS}) @type flags: list of string @return: the elaborated document object. @rtype: BibDoc - @raise InvenioWebSubmitFileError: in case the same format already + @raise InvenioBibDocFileError: in case the same format already exists. """ if docname is None: docname = decompose_file(fullpath)[1] if format is None: format = decompose_file(fullpath)[2] if flags is None: flags = [] if 'pdfa' in get_subformat_from_format(format).split(';') and not 'PDF/A' in flags: flags.append('PDF/A') bibdoc = self.get_bibdoc(docname=docname) bibdoc.add_file_new_format(fullpath, description=description, comment=comment, format=format, flags=flags, modification_date=modification_date) self.build_bibdoc_list() return bibdoc def list_latest_files(self, doctype='', list_hidden=True): """ Returns a list of the latest files. @param doctype: if set, only document of the given type will be listed. @type doctype: string @param list_hidden: if True, will list also files with the C{HIDDEN} flag being set. @type list_hidden: bool @return: the list of latest files. @rtype: list of BibDocFile """ docfiles = [] for bibdoc in self.list_bibdocs(doctype): docfiles += bibdoc.list_latest_files(list_hidden=list_hidden) return docfiles def display(self, docname="", version="", doctype="", ln=CFG_SITE_LANG, verbose=0, display_hidden=True): """ Returns an HTML representation of the the attached documents. @param docname: if set, include only the requested document. @type docname: string @param version: if not set, only the last version will be displayed. If 'all', all versions will be displayed. @type version: string (integer or 'all') @param doctype: is set, include only documents of the requested type. @type doctype: string @param ln: the language code. @type ln: string @param verbose: if greater than 0, includes debug information. @type verbose: integer @param display_hidden: whether to include hidden files as well. @type display_hidden: bool @return: the formatted representation. @rtype: HTML string """ t = "" if docname: try: bibdocs = [self.get_bibdoc(docname)] - except InvenioWebSubmitFileError: + except InvenioBibDocFileError: bibdocs = self.list_bibdocs(doctype) else: bibdocs = self.list_bibdocs(doctype) if bibdocs: types = list_types_from_array(bibdocs) fulltypes = [] for mytype in types: if mytype in ('Plot', 'PlotMisc'): # FIXME: quick hack to ignore plot-like doctypes # on Files tab continue fulltype = { 'name' : mytype, 'content' : [], } for bibdoc in bibdocs: if mytype == bibdoc.get_type(): fulltype['content'].append(bibdoc.display(version, ln=ln, display_hidden=display_hidden)) fulltypes.append(fulltype) if verbose >= 9: verbose_files = str(self) else: verbose_files = '' - t = websubmit_templates.tmpl_bibrecdoc_filelist( + t = bibdocfile_templates.tmpl_bibrecdoc_filelist( ln=ln, types = fulltypes, verbose_files=verbose_files ) return t def fix(self, docname): """ Algorithm that transform a broken/old bibdoc into a coherent one. Think of it as being the fsck of BibDocs. - All the files in the bibdoc directory will be renamed according to the document name. Proper .recid, .type, .md5 files will be created/updated. - In case of more than one file with the same format version a new bibdoc will be created in order to put does files. @param docname: the document name that need to be fixed. @type docname: string @return: the list of newly created bibdocs if any. @rtype: list of BibDoc - @raise InvenioWebSubmitFileError: in case of issues that can not be + @raise InvenioBibDocFileError: in case of issues that can not be fixed automatically. """ bibdoc = self.get_bibdoc(docname) versions = {} res = [] new_bibdocs = [] # List of files with the same version/format of # existing file which need new bibdoc. counter = 0 zero_version_bug = False if os.path.exists(bibdoc.basedir): for filename in os.listdir(bibdoc.basedir): if filename[0] != '.' and ';' in filename: name, version = filename.split(';') try: version = int(version) except ValueError: # Strange name register_exception() - raise InvenioWebSubmitFileError, "A file called %s exists under %s. This is not a valid name. After the ';' there must be an integer representing the file version. Please, manually fix this file either by renaming or by deleting it." % (filename, bibdoc.basedir) + raise InvenioBibDocFileError, "A file called %s exists under %s. This is not a valid name. After the ';' there must be an integer representing the file version. Please, manually fix this file either by renaming or by deleting it." % (filename, bibdoc.basedir) if version == 0: zero_version_bug = True format = name[len(file_strip_ext(name)):] format = normalize_format(format) if not versions.has_key(version): versions[version] = {} new_name = 'FIXING-%s-%s' % (str(counter), name) try: shutil.move('%s/%s' % (bibdoc.basedir, filename), '%s/%s' % (bibdoc.basedir, new_name)) except Exception, e: register_exception() - raise InvenioWebSubmitFileError, "Error in renaming '%s' to '%s': '%s'" % ('%s/%s' % (bibdoc.basedir, filename), '%s/%s' % (bibdoc.basedir, new_name), e) + raise InvenioBibDocFileError, "Error in renaming '%s' to '%s': '%s'" % ('%s/%s' % (bibdoc.basedir, filename), '%s/%s' % (bibdoc.basedir, new_name), e) if versions[version].has_key(format): new_bibdocs.append((new_name, version)) else: versions[version][format] = new_name counter += 1 elif filename[0] != '.': # Strange name register_exception() - raise InvenioWebSubmitFileError, "A file called %s exists under %s. This is not a valid name. There should be a ';' followed by an integer representing the file version. Please, manually fix this file either by renaming or by deleting it." % (filename, bibdoc.basedir) + raise InvenioBibDocFileError, "A file called %s exists under %s. This is not a valid name. There should be a ';' followed by an integer representing the file version. Please, manually fix this file either by renaming or by deleting it." % (filename, bibdoc.basedir) else: # we create the corresponding storage directory old_umask = os.umask(022) os.makedirs(bibdoc.basedir) # and save the father record id if it exists try: if self.id != "": recid_fd = open("%s/.recid" % bibdoc.basedir, "w") recid_fd.write(str(self.id)) recid_fd.close() if bibdoc.doctype != "": type_fd = open("%s/.type" % bibdoc.basedir, "w") type_fd.write(str(bibdoc.doctype)) type_fd.close() except Exception, e: register_exception() - raise InvenioWebSubmitFileError, e + raise InvenioBibDocFileError, e os.umask(old_umask) if not versions: bibdoc.delete() else: for version, formats in versions.iteritems(): if zero_version_bug: version += 1 for format, filename in formats.iteritems(): destination = '%s%s;%i' % (docname, format, version) try: shutil.move('%s/%s' % (bibdoc.basedir, filename), '%s/%s' % (bibdoc.basedir, destination)) except Exception, e: register_exception() - raise InvenioWebSubmitFileError, "Error in renaming '%s' to '%s': '%s'" % ('%s/%s' % (bibdoc.basedir, filename), '%s/%s' % (bibdoc.basedir, destination), e) + raise InvenioBibDocFileError, "Error in renaming '%s' to '%s': '%s'" % ('%s/%s' % (bibdoc.basedir, filename), '%s/%s' % (bibdoc.basedir, destination), e) try: recid_fd = open("%s/.recid" % bibdoc.basedir, "w") recid_fd.write(str(self.id)) recid_fd.close() type_fd = open("%s/.type" % bibdoc.basedir, "w") type_fd.write(str(bibdoc.doctype)) type_fd.close() except Exception, e: register_exception() - raise InvenioWebSubmitFileError, "Error in creating .recid and .type file for '%s' folder: '%s'" % (bibdoc.basedir, e) + raise InvenioBibDocFileError, "Error in creating .recid and .type file for '%s' folder: '%s'" % (bibdoc.basedir, e) self.build_bibdoc_list() res = [] for (filename, version) in new_bibdocs: if zero_version_bug: version += 1 new_bibdoc = self.add_bibdoc(doctype=bibdoc.doctype, docname=docname, never_fail=True) new_bibdoc.add_file_new_format('%s/%s' % (bibdoc.basedir, filename), version) res.append(new_bibdoc) try: os.remove('%s/%s' % (bibdoc.basedir, filename)) except Exception, e: register_exception() - raise InvenioWebSubmitFileError, "Error in removing '%s': '%s'" % ('%s/%s' % (bibdoc.basedir, filename), e) + raise InvenioBibDocFileError, "Error in removing '%s': '%s'" % ('%s/%s' % (bibdoc.basedir, filename), e) Md5Folder(bibdoc.basedir).update(only_new=False) bibdoc._build_file_list() self.build_bibdoc_list() for bibdoc in self.bibdocs: if not run_sql('SELECT more_info FROM bibdoc WHERE id=%s', (bibdoc.id,)): ## Import from MARC only if the bibdoc has never had ## its more_info initialized. try: bibdoc.import_descriptions_and_comments_from_marc() except Exception, e: register_exception() - raise InvenioWebSubmitFileError, "Error in importing description and comment from %s for record %s: %s" % (repr(bibdoc), self.id, e) + raise InvenioBibDocFileError, "Error in importing description and comment from %s for record %s: %s" % (repr(bibdoc), self.id, e) return res def check_format(self, docname): """ Check for any format related issue. - In case L{CFG_WEBSUBMIT_ADDITIONAL_KNOWN_FILE_EXTENSIONS} is + In case L{CFG_BIBDOCFILE_ADDITIONAL_KNOWN_FILE_EXTENSIONS} is altered or Python version changes, it might happen that a docname contains files which are no more docname + .format ; version, simply because the .format is now recognized (and it was not before, so it was contained into the docname). This algorithm verify if it is necessary to fix (seel L{fix_format}). @param docname: the document name whose formats should be verified. @type docname: string @return: True if format is correct. False if a fix is needed. @rtype: bool - @raise InvenioWebSubmitFileError: in case of any error. + @raise InvenioBibDocFileError: in case of any error. """ bibdoc = self.get_bibdoc(docname) correct_docname = decompose_file(docname + '.pdf')[1] if docname != correct_docname: return False for filename in os.listdir(bibdoc.basedir): if not filename.startswith('.'): try: dummy, dummy, format, version = decompose_file_with_version(filename) except Exception: - raise InvenioWebSubmitFileError('Incorrect filename "%s" for docname %s for recid %i' % (filename, docname, self.id)) + raise InvenioBibDocFileError('Incorrect filename "%s" for docname %s for recid %i' % (filename, docname, self.id)) if '%s%s;%i' % (correct_docname, format, version) != filename: return False return True def check_duplicate_docnames(self): """ Check wethever the record is connected with at least tho documents with the same name. @return: True if everything is fine. @rtype: bool """ docnames = set() for docname in self.get_bibdoc_names(): if docname in docnames: return False else: docnames.add(docname) return True def uniformize_bibdoc(self, docname): """ This algorithm correct wrong file name belonging to a bibdoc. @param docname: the document name whose formats should be verified. @type docname: string """ bibdoc = self.get_bibdoc(docname) for filename in os.listdir(bibdoc.basedir): if not filename.startswith('.'): try: dummy, dummy, format, version = decompose_file_with_version(filename) except ValueError: register_exception(alert_admin=True, prefix= "Strange file '%s' is stored in %s" % (filename, bibdoc.basedir)) else: os.rename(os.path.join(bibdoc.basedir, filename), os.path.join(bibdoc.basedir, '%s%s;%i' % (docname, format, version))) Md5Folder(bibdoc.basedir).update() bibdoc.touch() bibdoc._build_file_list('rename') def fix_format(self, docname, skip_check=False): """ Fixes format related inconsistencies. @param docname: the document name whose formats should be verified. @type docname: string @param skip_check: if True assume L{check_format} has already been called and the need for fix has already been found. If False, will implicitly call L{check_format} and skip fixing if no error is found. @type skip_check: bool @return: in case merging two bibdocs is needed but it's not possible. @rtype: bool """ if not skip_check: if self.check_format(docname): return True bibdoc = self.get_bibdoc(docname) correct_docname = decompose_file(docname + '.pdf')[1] need_merge = False if correct_docname != docname: need_merge = self.has_docname_p(correct_docname) if need_merge: proposed_docname = self.propose_unique_docname(correct_docname) run_sql('UPDATE bibdoc SET docname=%s WHERE id=%s', (proposed_docname, bibdoc.id)) self.build_bibdoc_list() self.uniformize_bibdoc(proposed_docname) try: self.merge_bibdocs(docname, proposed_docname) - except InvenioWebSubmitFileError: + except InvenioBibDocFileError: return False else: run_sql('UPDATE bibdoc SET docname=%s WHERE id=%s', (correct_docname, bibdoc.id)) self.build_bibdoc_list() self.uniformize_bibdoc(correct_docname) else: self.uniformize_bibdoc(docname) return True def fix_duplicate_docnames(self, skip_check=False): """ Algotirthm to fix duplicate docnames. If a record is connected with at least two bibdoc having the same docname, the algorithm will try to merge them. @param skip_check: if True assume L{check_duplicate_docnames} has already been called and the need for fix has already been found. If False, will implicitly call L{check_duplicate_docnames} and skip fixing if no error is found. @type skip_check: bool """ if not skip_check: if self.check_duplicate_docnames(): return docnames = set() for bibdoc in self.list_bibdocs(): docname = bibdoc.docname if docname in docnames: new_docname = self.propose_unique_docname(bibdoc.docname) bibdoc.change_name(new_docname) self.merge_bibdocs(docname, new_docname) docnames.add(docname) def check_file_exists(self, path, format): """ Check if a file with the same content of the file pointed in C{path} is already attached to this record. @param path: the file to be checked against. @type path: string @return: True if a file with the requested content is already attached to the record. @rtype: bool """ # Let's consider all the latest files for bibdoc in self.list_bibdocs(): if bibdoc.check_file_exists(path, format): return True return False class BibDoc: """ This class represents one document (i.e. a set of files with different formats and with versioning information that consitutes a piece of information. To instanciate a new document, the recid and the docname are mandatory. To instanciate an already existing document, either the recid and docname or the docid alone are sufficient to retrieve it. @param docid: the document identifier. @type docid: integer @param recid: the record identifier of the record to which this document belongs to. If the C{docid} is specified the C{recid} is automatically retrieven from the database. @type recid: integer @param docname: the document name. @type docname: string @param doctype: the document type (used when instanciating a new document). @type doctype: string @param human_readable: whether sizes should be represented in a human readable format. @type human_readable: bool - @raise InvenioWebSubmitFileError: in case of error. + @raise InvenioBibDocFileError: in case of error. """ def __init__ (self, docid=None, recid=None, docname=None, doctype='Main', human_readable=False): """Constructor of a bibdoc. At least the docid or the recid/docname pair is needed.""" # docid is known, the document already exists if docname: docname = normalize_docname(docname) self.docfiles = [] self.md5s = None self.human_readable = human_readable if docid: if not recid: res = run_sql("SELECT id_bibrec,type FROM bibrec_bibdoc WHERE id_bibdoc=%s LIMIT 1", (docid,), 1) if res: recid = res[0][0] doctype = res[0][1] else: warn("Docid %s is orphan" % docid) else: res = run_sql("SELECT type FROM bibrec_bibdoc WHERE id_bibrec=%s AND id_bibdoc=%s LIMIT 1", (recid, docid,), 1) if res: doctype = res[0][0] else: #this bibdoc isn't associated with the corresponding bibrec. - raise InvenioWebSubmitFileError, "Docid %s is not associated with the recid %s" % (docid, recid) + raise InvenioBibDocFileError, "Docid %s is not associated with the recid %s" % (docid, recid) # gather the other information res = run_sql("SELECT id,status,docname,creation_date,modification_date,text_extraction_date,more_info FROM bibdoc WHERE id=%s LIMIT 1", (docid,), 1) if res: self.cd = res[0][3] self.md = res[0][4] self.td = res[0][5] self.recid = recid self.docname = res[0][2] self.id = docid self.status = res[0][1] self.more_info = BibDocMoreInfo(docid, blob_to_string(res[0][6])) self.basedir = _make_base_dir(self.id) self.doctype = doctype else: # this bibdoc doesn't exist - raise InvenioWebSubmitFileError, "The docid %s does not exist." % docid + raise InvenioBibDocFileError, "The docid %s does not exist." % docid # else it is a new document else: if not docname: - raise InvenioWebSubmitFileError, "You should specify the docname when creating a new bibdoc" + raise InvenioBibDocFileError, "You should specify the docname when creating a new bibdoc" else: self.recid = recid self.doctype = doctype self.docname = docname self.status = '' if recid: res = run_sql("SELECT b.id FROM bibrec_bibdoc bb JOIN bibdoc b on bb.id_bibdoc=b.id WHERE bb.id_bibrec=%s AND b.docname=%s LIMIT 1", (recid, docname), 1) if res: - raise InvenioWebSubmitFileError("A bibdoc called %s already exists for recid %s" % (docname, recid)) + raise InvenioBibDocFileError("A bibdoc called %s already exists for recid %s" % (docname, recid)) self.id = run_sql("INSERT INTO bibdoc (status,docname,creation_date,modification_date) " "values(%s,%s,NOW(),NOW())", (self.status, docname)) if self.id: # we link the document to the record if a recid was # specified self.more_info = BibDocMoreInfo(self.id) res = run_sql("SELECT creation_date, modification_date, text_extraction_date FROM bibdoc WHERE id=%s", (self.id,)) self.cd = res[0][0] self.md = res[0][1] self.td = res[0][2] else: - raise InvenioWebSubmitFileError, "New docid cannot be created" + raise InvenioBibDocFileError, "New docid cannot be created" try: self.basedir = _make_base_dir(self.id) # we create the corresponding storage directory if not os.path.exists(self.basedir): old_umask = os.umask(022) os.makedirs(self.basedir) # and save the father record id if it exists try: if self.recid: recid_fd = open("%s/.recid" % self.basedir, "w") recid_fd.write(str(self.recid)) recid_fd.close() if self.doctype: type_fd = open("%s/.type" % self.basedir, "w") type_fd.write(str(self.doctype)) type_fd.close() except Exception, e: register_exception(alert_admin=True) - raise InvenioWebSubmitFileError, e + raise InvenioBibDocFileError, e os.umask(old_umask) if self.recid: run_sql("INSERT INTO bibrec_bibdoc (id_bibrec, id_bibdoc, type) VALUES (%s,%s,%s)", (recid, self.id, self.doctype,)) except Exception, e: run_sql('DELETE FROM bibdoc WHERE id=%s', (self.id, )) run_sql('DELETE FROM bibrec_bibdoc WHERE id_bibdoc=%s', (self.id, )) register_exception(alert_admin=True) - raise InvenioWebSubmitFileError, e + raise InvenioBibDocFileError, e # build list of attached files self._build_file_list('init') def __repr__(self): """ @return: the canonical string representation of the C{BibDoc}. @rtype: string """ return 'BibDoc(%s, %s, %s, %s, %s)' % (repr(self.id), repr(self.recid), repr(self.docname), repr(self.doctype), repr(self.human_readable)) def __str__(self): """ @return: an easy to be I{grepped} string representation of the whole C{BibDoc} content. @rtype: string """ out = '%s:%i:::docname=%s\n' % (self.recid or '', self.id, self.docname) out += '%s:%i:::doctype=%s\n' % (self.recid or '', self.id, self.doctype) out += '%s:%i:::status=%s\n' % (self.recid or '', self.id, self.status) out += '%s:%i:::basedir=%s\n' % (self.recid or '', self.id, self.basedir) out += '%s:%i:::creation date=%s\n' % (self.recid or '', self.id, self.cd) out += '%s:%i:::modification date=%s\n' % (self.recid or '', self.id, self.md) out += '%s:%i:::text extraction date=%s\n' % (self.recid or '', self.id, self.td) out += '%s:%i:::total file attached=%s\n' % (self.recid or '', self.id, len(self.docfiles)) if self.human_readable: out += '%s:%i:::total size latest version=%s\n' % (self.recid or '', self.id, nice_size(self.get_total_size_latest_version())) out += '%s:%i:::total size all files=%s\n' % (self.recid or '', self.id, nice_size(self.get_total_size())) else: out += '%s:%i:::total size latest version=%s\n' % (self.recid or '', self.id, self.get_total_size_latest_version()) out += '%s:%i:::total size all files=%s\n' % (self.recid or '', self.id, self.get_total_size()) for docfile in self.docfiles: out += str(docfile) return out def format_already_exists_p(self, format): """ @param format: a format to be checked. @type format: string @return: True if a file of the given format already exists among the latest files. @rtype: bool """ format = normalize_format(format) for afile in self.list_latest_files(): if format == afile.get_format(): return True return False def get_status(self): """ @return: the status information. @rtype: string """ return self.status def get_text(self, version=None): """ @param version: the requested version. If not set, the latest version will be used. @type version: integer @return: the textual content corresponding to the specified version of the document. @rtype: string """ if version is None: version = self.get_latest_version() if self.has_text(version): return open(os.path.join(self.basedir, '.text;%i' % version)).read() else: return "" def get_text_path(self, version=None): """ @param version: the requested version. If not set, the latest version will be used. @type version: int @return: the full path to the textual content corresponding to the specified version of the document. @rtype: string """ if version is None: version = self.get_latest_version() if self.has_text(version): return os.path.join(self.basedir, '.text;%i' % version) else: return "" def extract_text(self, version=None, perform_ocr=False, ln='en'): """ Try what is necessary to extract the textual information of a document. @param version: the version of the document for which text is required. If not specified the text will be retrieved from the last version. @type version: integer @param perform_ocr: whether to perform OCR. @type perform_ocr: bool @param ln: a two letter language code to give as a hint to the OCR procedure. @type ln: string - @raise InvenioWebSubmitFileError: in case of error. + @raise InvenioBibDocFileError: in case of error. @note: the text is extracted and cached for later use. Use L{get_text} to retrieve it. """ from invenio.websubmit_file_converter import get_best_format_to_extract_text_from, convert_file, InvenioWebSubmitFileConverterError if version is None: version = self.get_latest_version() docfiles = self.list_version_files(version) ## We try to extract text only from original or OCRed documents. filenames = [docfile.get_full_path() for docfile in docfiles if 'CONVERTED' not in docfile.flags or 'OCRED' in docfile.flags] try: filename = get_best_format_to_extract_text_from(filenames) except InvenioWebSubmitFileConverterError: ## We fall back on considering all the documents filenames = [docfile.get_full_path() for docfile in docfiles] try: filename = get_best_format_to_extract_text_from(filenames) except InvenioWebSubmitFileConverterError: open(os.path.join(self.basedir, '.text;%i' % version), 'w').write('') return try: convert_file(filename, os.path.join(self.basedir, '.text;%i' % version), '.txt', perform_ocr=perform_ocr, ln=ln) if version == self.get_latest_version(): run_sql("UPDATE bibdoc SET text_extraction_date=NOW() WHERE id=%s", (self.id, )) except InvenioWebSubmitFileConverterError, e: register_exception(alert_admin=True, prefix="Error in extracting text from bibdoc %i, version %i" % (self.id, version)) - raise InvenioWebSubmitFileError, str(e) + raise InvenioBibDocFileError, str(e) def touch(self): """ Update the modification time of the bibdoc (as in the UNIX command C{touch}). """ run_sql('UPDATE bibdoc SET modification_date=NOW() WHERE id=%s', (self.id, )) #if self.recid: #run_sql('UPDATE bibrec SET modification_date=NOW() WHERE id=%s', (self.recid, )) def set_status(self, new_status): """ Set a new status. A document with a status information is a restricted document that can be accessed only to user which as an authorization to the I{viewrestrdoc} WebAccess action with keyword status with value C{new_status}. @param new_status: the new status. If empty the document will be unrestricted. @type new_status: string - @raise InvenioWebSubmitFileError: in case the reserved word + @raise InvenioBibDocFileError: in case the reserved word 'DELETED' is used. """ if new_status != KEEP_OLD_VALUE: if new_status == 'DELETED': - raise InvenioWebSubmitFileError('DELETED is a reserved word and can not be used for setting the status') + raise InvenioBibDocFileError('DELETED is a reserved word and can not be used for setting the status') run_sql('UPDATE bibdoc SET status=%s WHERE id=%s', (new_status, self.id)) self.status = new_status self.touch() self._build_file_list() def add_file_new_version(self, filename, description=None, comment=None, format=None, flags=None, modification_date=None): """ Add a new version of a file. If no physical file is already attached to the document a the given file will have version 1. Otherwise the new file will have the current version number plus one. @param filename: the local path of the file. @type filename: string @param description: an optional description for the file. @type description: string @param comment: an optional comment to the file. @type comment: string @param format: the extension of the file. If not specified it will be retrieved from the filename (see L{decompose_file}). @type format: string @param flags: a set of flags to be associated with the file (see L{CFG_BIBDOCFILE_AVAILABLE_FLAGS}) @type flags: list of string - @raise InvenioWebSubmitFileError: in case of error. + @raise InvenioBibDocFileError: in case of error. """ try: latestVersion = self.get_latest_version() if latestVersion == 0: myversion = 1 else: myversion = latestVersion + 1 if os.path.exists(filename): if not os.path.getsize(filename) > 0: - raise InvenioWebSubmitFileError, "%s seems to be empty" % filename + raise InvenioBibDocFileError, "%s seems to be empty" % filename if format is None: format = decompose_file(filename)[2] else: format = normalize_format(format) destination = "%s/%s%s;%i" % (self.basedir, self.docname, format, myversion) if run_sql("SELECT id_bibdoc FROM bibdocfsinfo WHERE id_bibdoc=%s AND version=%s AND format=%s", (self.id, myversion, format)): - raise InvenioWebSubmitFileError("According to the database a file of format %s is already attached to the docid %s" % (format, self.id)) + raise InvenioBibDocFileError("According to the database a file of format %s is already attached to the docid %s" % (format, self.id)) try: shutil.copyfile(filename, destination) os.chmod(destination, 0644) if modification_date: # if the modification time of the file needs to be changed update_modification_date_of_file(destination, modification_date) except Exception, e: register_exception() - raise InvenioWebSubmitFileError, "Encountered an exception while copying '%s' to '%s': '%s'" % (filename, destination, e) + raise InvenioBibDocFileError, "Encountered an exception while copying '%s' to '%s': '%s'" % (filename, destination, e) self.more_info.set_description(description, format, myversion) self.more_info.set_comment(comment, format, myversion) if flags is None: flags = [] if 'pdfa' in get_subformat_from_format(format).split(';') and not 'PDF/A' in flags: flags.append('PDF/A') for flag in flags: if flag == 'PERFORM_HIDE_PREVIOUS': for afile in self.list_all_files(): format = afile.get_format() version = afile.get_version() if version < myversion: self.more_info.set_flag('HIDDEN', format, myversion) else: self.more_info.set_flag(flag, format, myversion) else: - raise InvenioWebSubmitFileError, "'%s' does not exists!" % filename + raise InvenioBibDocFileError, "'%s' does not exists!" % filename finally: self.touch() Md5Folder(self.basedir).update() self._build_file_list() just_added_file = self.get_file(format, myversion) run_sql("INSERT INTO bibdocfsinfo(id_bibdoc, version, format, last_version, cd, md, checksum, filesize, mime) VALUES(%s, %s, %s, true, %s, %s, %s, %s, %s)", (self.id, myversion, format, just_added_file.cd, just_added_file.md, just_added_file.get_checksum(), just_added_file.get_size(), just_added_file.mime)) run_sql("UPDATE bibdocfsinfo SET last_version=false WHERE id_bibdoc=%s AND version<%s", (self.id, myversion)) def add_file_new_format(self, filename, version=None, description=None, comment=None, format=None, flags=None, modification_date=None): """ Add a file as a new format. @param filename: the local path of the file. @type filename: string @param version: an optional specific version to which the new format should be added. If None, the last version will be used. @type version: integer @param description: an optional description for the file. @type description: string @param comment: an optional comment to the file. @type comment: string @param format: the extension of the file. If not specified it will be retrieved from the filename (see L{decompose_file}). @type format: string @param flags: a set of flags to be associated with the file (see L{CFG_BIBDOCFILE_AVAILABLE_FLAGS}) @type flags: list of string - @raise InvenioWebSubmitFileError: if the given format already exists. + @raise InvenioBibDocFileError: if the given format already exists. """ try: if version is None: version = self.get_latest_version() if version == 0: version = 1 if os.path.exists(filename): if not os.path.getsize(filename) > 0: - raise InvenioWebSubmitFileError, "%s seems to be empty" % filename + raise InvenioBibDocFileError, "%s seems to be empty" % filename if format is None: format = decompose_file(filename)[2] else: format = normalize_format(format) if run_sql("SELECT id_bibdoc FROM bibdocfsinfo WHERE id_bibdoc=%s AND version=%s AND format=%s", (self.id, version, format)): - raise InvenioWebSubmitFileError("According to the database a file of format %s is already attached to the docid %s" % (format, self.id)) + raise InvenioBibDocFileError("According to the database a file of format %s is already attached to the docid %s" % (format, self.id)) destination = "%s/%s%s;%i" % (self.basedir, self.docname, format, version) if os.path.exists(destination): - raise InvenioWebSubmitFileError, "A file for docname '%s' for the recid '%s' already exists for the format '%s'" % (self.docname, self.recid, format) + raise InvenioBibDocFileError, "A file for docname '%s' for the recid '%s' already exists for the format '%s'" % (self.docname, self.recid, format) try: shutil.copyfile(filename, destination) os.chmod(destination, 0644) if modification_date: # if the modification time of the file needs to be changed update_modification_date_of_file(destination, modification_date) except Exception, e: register_exception() - raise InvenioWebSubmitFileError, "Encountered an exception while copying '%s' to '%s': '%s'" % (filename, destination, e) + raise InvenioBibDocFileError, "Encountered an exception while copying '%s' to '%s': '%s'" % (filename, destination, e) self.more_info.set_comment(comment, format, version) self.more_info.set_description(description, format, version) if flags is None: flags = [] if 'pdfa' in get_subformat_from_format(format).split(';') and not 'PDF/A' in flags: flags.append('PDF/A') for flag in flags: if flag != 'PERFORM_HIDE_PREVIOUS': self.more_info.set_flag(flag, format, version) else: - raise InvenioWebSubmitFileError, "'%s' does not exists!" % filename + raise InvenioBibDocFileError, "'%s' does not exists!" % filename finally: Md5Folder(self.basedir).update() self.touch() self._build_file_list() just_added_file = self.get_file(format, version) run_sql("INSERT INTO bibdocfsinfo(id_bibdoc, version, format, last_version, cd, md, checksum, filesize, mime) VALUES(%s, %s, %s, true, %s, %s, %s, %s, %s)", (self.id, version, format, just_added_file.cd, just_added_file.md, just_added_file.get_checksum(), just_added_file.get_size(), just_added_file.mime)) def purge(self): """ Physically removes all the previous version of the given bibdoc. Everything but the last formats will be erased. """ version = self.get_latest_version() if version > 1: for afile in self.docfiles: if afile.get_version() < version: self.more_info.unset_comment(afile.get_format(), afile.get_version()) self.more_info.unset_description(afile.get_format(), afile.get_version()) for flag in CFG_BIBDOCFILE_AVAILABLE_FLAGS: self.more_info.unset_flag(flag, afile.get_format(), afile.get_version()) try: os.remove(afile.get_full_path()) except Exception, e: register_exception() Md5Folder(self.basedir).update() self.touch() self._build_file_list() run_sql("DELETE FROM bibdocfsinfo WHERE id_bibdoc=%s AND version<%s", (self.id, version)) def expunge(self): """ Physically remove all the traces of a given document. @note: an expunged BibDoc object shouldn't be used anymore or the result might be unpredicted. """ del self.md5s del self.more_info os.system('rm -rf %s' % escape_shell_arg(self.basedir)) run_sql('DELETE FROM bibrec_bibdoc WHERE id_bibdoc=%s', (self.id, )) run_sql('DELETE FROM bibdoc_bibdoc WHERE id_bibdoc1=%s OR id_bibdoc2=%s', (self.id, self.id)) run_sql('DELETE FROM bibdoc WHERE id=%s', (self.id, )) run_sql('INSERT DELAYED INTO hstDOCUMENT(action, id_bibdoc, docname, doctimestamp) VALUES("EXPUNGE", %s, %s, NOW())', (self.id, self.docname)) run_sql('DELETE FROM bibdocfsinfo WHERE id_bibdoc=%s', (self.id, )) del self.docfiles del self.id del self.cd del self.md del self.td del self.basedir del self.recid del self.doctype del self.docname def revert(self, version): """ Revert the document to a given version. All the formats corresponding to that version are copied forward to a new version. @param version: the version to revert to. @type version: integer - @raise InvenioWebSubmitFileError: in case of errors + @raise InvenioBibDocFileError: in case of errors """ version = int(version) docfiles = self.list_version_files(version) if docfiles: self.add_file_new_version(docfiles[0].get_full_path(), description=docfiles[0].get_description(), comment=docfiles[0].get_comment(), format=docfiles[0].get_format(), flags=docfiles[0].flags) for docfile in docfiles[1:]: self.add_file_new_format(docfile.filename, description=docfile.get_description(), comment=docfile.get_comment(), format=docfile.get_format(), flags=docfile.flags) def import_descriptions_and_comments_from_marc(self, record=None): """ Import descriptions and comments from the corresponding MARC metadata. @param record: the record (if None it will be calculated). @type record: bibrecord recstruct @note: If record is passed it is directly used, otherwise it is retrieved from the MARCXML stored in the database. """ ## Let's get the record from invenio.search_engine import get_record if record is None: record = get_record(self.id) fields = record_get_field_instances(record, '856', '4', ' ') global_comment = None global_description = None local_comment = {} local_description = {} for field in fields: url = field_get_subfield_values(field, 'u') if url: ## Given a url url = url[0] if url == '%s/%s/%s/files/' % (CFG_SITE_URL, CFG_SITE_RECORD, self.recid): ## If it is a traditional /CFG_SITE_RECORD/1/files/ one ## We have global description/comment for all the formats description = field_get_subfield_values(field, 'y') if description: global_description = description[0] comment = field_get_subfield_values(field, 'z') if comment: global_comment = comment[0] elif bibdocfile_url_p(url): ## Otherwise we have description/comment per format dummy, docname, format = decompose_bibdocfile_url(url) if docname == self.docname: description = field_get_subfield_values(field, 'y') if description: local_description[format] = description[0] comment = field_get_subfield_values(field, 'z') if comment: local_comment[format] = comment[0] ## Let's update the tables version = self.get_latest_version() for docfile in self.list_latest_files(): format = docfile.get_format() if format in local_comment: self.set_comment(local_comment[format], format, version) else: self.set_comment(global_comment, format, version) if format in local_description: self.set_description(local_description[format], format, version) else: self.set_description(global_description, format, version) self._build_file_list('init') - def get_icon(self, subformat_re=CFG_WEBSUBMIT_ICON_SUBFORMAT_RE, display_hidden=True): + def get_icon(self, subformat_re=CFG_BIBDOCFILE_ICON_SUBFORMAT_RE, display_hidden=True): """ @param subformat_re: by default the convention is that - L{CFG_WEBSUBMIT_ICON_SUBFORMAT_RE} is used as a subformat indicator to + L{CFG_BIBDOCFILE_ICON_SUBFORMAT_RE} is used as a subformat indicator to mean that a particular format is to be used as an icon. Specifiy a different subformat if you need to use a different convention. @type subformat_re: compiled regular expression @return: the bibdocfile corresponding to the icon of this document, or None if any icon exists for this document. @rtype: BibDocFile @warning: before I{subformat} were introduced this method was returning a BibDoc, while now is returning a BibDocFile. Check if your client code is compatible with this. """ for docfile in self.list_latest_files(list_hidden=display_hidden): if subformat_re.match(docfile.get_subformat()): return docfile return None - def add_icon(self, filename, format=None, subformat=CFG_WEBSUBMIT_DEFAULT_ICON_SUBFORMAT, modification_date=None): + def add_icon(self, filename, format=None, subformat=CFG_BIBDOCFILE_DEFAULT_ICON_SUBFORMAT, modification_date=None): """ Attaches icon to this document. @param filename: the local filesystem path to the icon. @type filename: string @param format: an optional format for the icon. If not specified it will be calculated after the filesystem path. @type format: string @param subformat: by default the convention is that - CFG_WEBSUBMIT_DEFAULT_ICON_SUBFORMAT is used as a subformat indicator to + CFG_BIBDOCFILE_DEFAULT_ICON_SUBFORMAT is used as a subformat indicator to mean that a particular format is to be used as an icon. Specifiy a different subformat if you need to use a different convention. @type subformat: string - @raise InvenioWebSubmitFileError: in case of errors. + @raise InvenioBibDocFileError: in case of errors. """ #first check if an icon already exists if not format: format = decompose_file(filename)[2] if subformat: format += ";%s" % subformat self.add_file_new_format(filename, format=format, modification_date=modification_date) - def delete_icon(self, subformat_re=CFG_WEBSUBMIT_ICON_SUBFORMAT_RE): + def delete_icon(self, subformat_re=CFG_BIBDOCFILE_ICON_SUBFORMAT_RE): """ @param subformat_re: by default the convention is that - L{CFG_WEBSUBMIT_ICON_SUBFORMAT_RE} is used as a subformat indicator to + L{CFG_BIBDOCFILE_ICON_SUBFORMAT_RE} is used as a subformat indicator to mean that a particular format is to be used as an icon. Specifiy a different subformat if you need to use a different convention. @type subformat: compiled regular expression Removes the icon attached to the document if it exists. """ for docfile in self.list_latest_files(): if subformat_re.match(docfile.get_subformat()): self.delete_file(docfile.get_format(), docfile.get_version()) def display(self, version="", ln=CFG_SITE_LANG, display_hidden=True): """ Returns an HTML representation of the this document. @param version: if not set, only the last version will be displayed. If 'all', all versions will be displayed. @type version: string (integer or 'all') @param ln: the language code. @type ln: string @param display_hidden: whether to include hidden files as well. @type display_hidden: bool @return: the formatted representation. @rtype: HTML string """ t = "" if version == "all": docfiles = self.list_all_files(list_hidden=display_hidden) elif version != "": version = int(version) docfiles = self.list_version_files(version, list_hidden=display_hidden) else: docfiles = self.list_latest_files(list_hidden=display_hidden) icon = self.get_icon(display_hidden=display_hidden) if icon: imageurl = icon.get_url() else: imageurl = "%s/img/smallfiles.gif" % CFG_SITE_URL versions = [] for version in list_versions_from_array(docfiles): currversion = { 'version' : version, 'previous' : 0, 'content' : [] } if version == self.get_latest_version() and version != 1: currversion['previous'] = 1 for docfile in docfiles: if docfile.get_version() == version: currversion['content'].append(docfile.display(ln = ln)) versions.append(currversion) if versions: - return websubmit_templates.tmpl_bibdoc_filelist( + return bibdocfile_templates.tmpl_bibdoc_filelist( ln = ln, versions = versions, imageurl = imageurl, docname = self.docname, recid = self.recid, status = self.status ) else: return "" def change_name(self, newname): """ Renames this document name. @param newname: the new name. @type newname: string - @raise InvenioWebSubmitFileError: if the new name corresponds to + @raise InvenioBibDocFileError: if the new name corresponds to a document already attached to the record owning this document. """ try: newname = normalize_docname(newname) res = run_sql("SELECT b.id FROM bibrec_bibdoc bb JOIN bibdoc b on bb.id_bibdoc=b.id WHERE bb.id_bibrec=%s AND b.docname=%s", (self.recid, newname)) if res: - raise InvenioWebSubmitFileError, "A bibdoc called %s already exists for recid %s" % (newname, self.recid) + raise InvenioBibDocFileError, "A bibdoc called %s already exists for recid %s" % (newname, self.recid) try: for f in os.listdir(self.basedir): if not f.startswith('.'): try: (dummy, base, extension, version) = decompose_file_with_version(f) except ValueError: register_exception(alert_admin=True, prefix="Strange file '%s' is stored in %s" % (f, self.basedir)) else: shutil.move(os.path.join(self.basedir, f), os.path.join(self.basedir, '%s%s;%i' % (newname, extension, version))) except Exception, e: register_exception() - raise InvenioWebSubmitFileError("Error in renaming the bibdoc %s to %s for recid %s: %s" % (self.docname, newname, self.recid, e)) + raise InvenioBibDocFileError("Error in renaming the bibdoc %s to %s for recid %s: %s" % (self.docname, newname, self.recid, e)) run_sql("update bibdoc set docname=%s where id=%s", (newname, self.id,)) self.docname = newname finally: Md5Folder(self.basedir).update() self.touch() self._build_file_list('rename') def set_comment(self, comment, format, version=None): """ Updates the comment of a specific format/version of the document. @param comment: the new comment. @type comment: string @param format: the specific format for which the comment should be updated. @type format: string @param version: the specific version for which the comment should be updated. If not specified the last version will be used. @type version: integer """ if version is None: version = self.get_latest_version() format = normalize_format(format) self.more_info.set_comment(comment, format, version) self.touch() self._build_file_list('init') def set_description(self, description, format, version=None): """ Updates the description of a specific format/version of the document. @param description: the new description. @type description: string @param format: the specific format for which the description should be updated. @type format: string @param version: the specific version for which the description should be updated. If not specified the last version will be used. @type version: integer """ if version is None: version = self.get_latest_version() format = normalize_format(format) self.more_info.set_description(description, format, version) self.touch() self._build_file_list('init') def set_flag(self, flagname, format, version=None): """ Sets a flag for a specific format/version of the document. @param flagname: a flag from L{CFG_BIBDOCFILE_AVAILABLE_FLAGS}. @type flagname: string @param format: the specific format for which the flag should be set. @type format: string @param version: the specific version for which the flag should be set. If not specified the last version will be used. @type version: integer """ if version is None: version = self.get_latest_version() format = normalize_format(format) self.more_info.set_flag(flagname, format, version) self.touch() self._build_file_list('init') def has_flag(self, flagname, format, version=None): """ Checks if a particular flag for a format/version is set. @param flagname: a flag from L{CFG_BIBDOCFILE_AVAILABLE_FLAGS}. @type flagname: string @param format: the specific format for which the flag should be set. @type format: string @param version: the specific version for which the flag should be set. If not specified the last version will be used. @type version: integer @return: True if the flag is set. @rtype: bool """ if version is None: version = self.get_latest_version() format = normalize_format(format) return self.more_info.has_flag(flagname, format, version) def unset_flag(self, flagname, format, version=None): """ Unsets a flag for a specific format/version of the document. @param flagname: a flag from L{CFG_BIBDOCFILE_AVAILABLE_FLAGS}. @type flagname: string @param format: the specific format for which the flag should be unset. @type format: string @param version: the specific version for which the flag should be unset. If not specified the last version will be used. @type version: integer """ if version is None: version = self.get_latest_version() format = normalize_format(format) self.more_info.unset_flag(flagname, format, version) self.touch() self._build_file_list('init') def get_comment(self, format, version=None): """ Retrieve the comment of a specific format/version of the document. @param format: the specific format for which the comment should be retrieved. @type format: string @param version: the specific version for which the comment should be retrieved. If not specified the last version will be used. @type version: integer @return: the comment. @rtype: string """ if version is None: version = self.get_latest_version() format = normalize_format(format) return self.more_info.get_comment(format, version) def get_description(self, format, version=None): """ Retrieve the description of a specific format/version of the document. @param format: the specific format for which the description should be retrieved. @type format: string @param version: the specific version for which the description should be retrieved. If not specified the last version will be used. @type version: integer @return: the description. @rtype: string """ if version is None: version = self.get_latest_version() format = normalize_format(format) return self.more_info.get_description(format, version) def hidden_p(self, format, version=None): """ Returns True if the file specified by the given format/version is hidden. @param format: the specific format for which the description should be retrieved. @type format: string @param version: the specific version for which the description should be retrieved. If not specified the last version will be used. @type version: integer @return: True if hidden. @rtype: bool """ if version is None: version = self.get_latest_version() return self.more_info.has_flag('HIDDEN', format, version) def get_docname(self): """ @return: the name of this document. @rtype: string """ return self.docname def get_base_dir(self): """ @return: the base directory on the local filesystem for this document (e.g. C{/soft/cdsweb/var/data/files/g0/123}) @rtype: string """ return self.basedir def get_type(self): """ @return: the type of this document. @rtype: string""" return self.doctype def get_recid(self): """ @return: the record id of the record to which this document is attached. @rtype: integer """ return self.recid def get_id(self): """ @return: the id of this document. @rtype: integer """ return self.id def pdf_a_p(self): """ @return: True if this document contains a PDF in PDF/A format. @rtype: bool""" return self.has_flag('PDF/A', 'pdf') def has_text(self, require_up_to_date=False, version=None): """ Return True if the text of this document has already been extracted. @param require_up_to_date: if True check the text was actually extracted after the most recent format of the given version. @type require_up_to_date: bool @param version: a version for which the text should have been extracted. If not specified the latest version is considered. @type version: integer @return: True if the text has already been extracted. @rtype: bool """ if version is None: version = self.get_latest_version() if os.path.exists(os.path.join(self.basedir, '.text;%i' % version)): if not require_up_to_date: return True else: docfiles = self.list_version_files(version) text_md = datetime.fromtimestamp(os.path.getmtime(os.path.join(self.basedir, '.text;%i' % version))) for docfile in docfiles: if text_md <= docfile.md: return False return True return False def get_file(self, format, version=""): """ Returns a L{BibDocFile} instance of this document corresponding to the specific format and version. @param format: the specific format. @type format: string @param version: the specific version for which the description should be retrieved. If not specified the last version will be used. @type version: integer @return: the L{BibDocFile} instance. @rtype: BibDocFile """ if version == "": docfiles = self.list_latest_files() else: version = int(version) docfiles = self.list_version_files(version) format = normalize_format(format) for docfile in docfiles: if (docfile.get_format()==format or not format): return docfile ## Let's skip the subformat specification and consider just the ## superformat superformat = get_superformat_from_format(format) for docfile in docfiles: if get_superformat_from_format(docfile.get_format()) == superformat: return docfile - raise InvenioWebSubmitFileError, "No file called '%s' of format '%s', version '%s'" % (self.docname, format, version) + raise InvenioBibDocFileError, "No file called '%s' of format '%s', version '%s'" % (self.docname, format, version) def list_versions(self): """ @return: the list of existing version numbers for this document. @rtype: list of integer """ versions = [] for docfile in self.docfiles: if not docfile.get_version() in versions: versions.append(docfile.get_version()) versions.sort() return versions def delete(self): """ Delete this document. @see: L{undelete} for how to undelete the document. - @raise InvenioWebSubmitFileError: in case of errors. + @raise InvenioBibDocFileError: in case of errors. """ try: today = datetime.today() self.change_name('DELETED-%s%s-%s' % (today.strftime('%Y%m%d%H%M%S'), today.microsecond, self.docname)) run_sql("UPDATE bibdoc SET status='DELETED' WHERE id=%s", (self.id,)) self.status = 'DELETED' except Exception, e: register_exception() - raise InvenioWebSubmitFileError, "It's impossible to delete bibdoc %s: %s" % (self.id, e) + raise InvenioBibDocFileError, "It's impossible to delete bibdoc %s: %s" % (self.id, e) def deleted_p(self): """ @return: True if this document has been deleted. @rtype: bool """ return self.status == 'DELETED' def empty_p(self): """ @return: True if this document is empty, i.e. it has no bibdocfile connected. @rtype: bool """ return len(self.docfiles) == 0 def undelete(self, previous_status=''): """ Undelete a deleted file (only if it was actually deleted via L{delete}). The previous C{status}, i.e. the restriction key can be provided. Otherwise the undeleted document will be public. @param previous_status: the previous status the should be restored. @type previous_status: string - @raise InvenioWebSubmitFileError: in case of any error. + @raise InvenioBibDocFileError: in case of any error. """ bibrecdocs = BibRecDocs(self.recid) try: run_sql("UPDATE bibdoc SET status=%s WHERE id=%s AND status='DELETED'", (previous_status, self.id)) except Exception, e: - raise InvenioWebSubmitFileError, "It's impossible to undelete bibdoc %s: %s" % (self.id, e) + raise InvenioBibDocFileError, "It's impossible to undelete bibdoc %s: %s" % (self.id, e) if self.docname.startswith('DELETED-'): try: # Let's remove DELETED-20080214144322- in front of the docname original_name = '-'.join(self.docname.split('-')[2:]) original_name = bibrecdocs.propose_unique_docname(original_name) self.change_name(original_name) except Exception, e: - raise InvenioWebSubmitFileError, "It's impossible to restore the previous docname %s. %s kept as docname because: %s" % (original_name, self.docname, e) + raise InvenioBibDocFileError, "It's impossible to restore the previous docname %s. %s kept as docname because: %s" % (original_name, self.docname, e) else: - raise InvenioWebSubmitFileError, "Strange just undeleted docname isn't called DELETED-somedate-docname but %s" % self.docname + raise InvenioBibDocFileError, "Strange just undeleted docname isn't called DELETED-somedate-docname but %s" % self.docname def delete_file(self, format, version): """ Delete a specific format/version of this document on the filesystem. @param format: the particular format to be deleted. @type format: string @param version: the particular version to be deleted. @type version: integer @note: this operation is not reversible!""" try: afile = self.get_file(format, version) - except InvenioWebSubmitFileError: + except InvenioBibDocFileError: return try: os.remove(afile.get_full_path()) run_sql("DELETE FROM bibdocfsinfo WHERE id_bibdoc=%s AND version=%s AND format=%s", (self.id, afile.get_version(), afile.get_format())) last_version = run_sql("SELECT max(version) FROM bibdocfsinfo WHERE id_bibdoc=%s", (self.id, ))[0][0] if last_version: ## Updating information about last version run_sql("UPDATE bibdocfsinfo SET last_version=true WHERE id_bibdoc=%s AND version=%s", (self.id, last_version)) run_sql("UPDATE bibdocfsinfo SET last_version=false WHERE id_bibdoc=%s AND version<>%s", (self.id, last_version)) except OSError: pass self.touch() self._build_file_list() def get_history(self): """ @return: a human readable and parsable string that represent the history of this document. @rtype: string """ ret = [] hst = run_sql("""SELECT action, docname, docformat, docversion, docsize, docchecksum, doctimestamp FROM hstDOCUMENT WHERE id_bibdoc=%s ORDER BY doctimestamp ASC""", (self.id, )) for row in hst: ret.append("%s %s '%s', format: '%s', version: %i, size: %s, checksum: '%s'" % (row[6].strftime('%Y-%m-%d %H:%M:%S'), row[0], row[1], row[2], row[3], nice_size(row[4]), row[5])) return ret def _build_file_list(self, context=''): """ Lists all files attached to the bibdoc. This function should be called everytime the bibdoc is modified. As a side effect it log everything that has happened to the bibdocfiles in the log facility, according to the context: "init": means that the function has been called; for the first time by a constructor, hence no logging is performed "": by default means to log every deleted file as deleted and every added file as added; "rename": means that every appearently deleted file is logged as renamef and every new file as renamet. """ def log_action(action, docid, docname, format, version, size, checksum, timestamp=''): """Log an action into the bibdoclog table.""" try: if timestamp: run_sql('INSERT DELAYED INTO hstDOCUMENT(action, id_bibdoc, docname, docformat, docversion, docsize, docchecksum, doctimestamp) VALUES(%s, %s, %s, %s, %s, %s, %s, %s)', (action, docid, docname, format, version, size, checksum, timestamp)) else: run_sql('INSERT DELAYED INTO hstDOCUMENT(action, id_bibdoc, docname, docformat, docversion, docsize, docchecksum, doctimestamp) VALUES(%s, %s, %s, %s, %s, %s, %s, NOW())', (action, docid, docname, format, version, size, checksum)) except DatabaseError: register_exception() def make_removed_added_bibdocfiles(previous_file_list): """Internal function for build the log of changed files.""" # Let's rebuild the previous situation old_files = {} for bibdocfile in previous_file_list: old_files[(bibdocfile.name, bibdocfile.format, bibdocfile.version)] = (bibdocfile.size, bibdocfile.checksum, bibdocfile.md) # Let's rebuild the new situation new_files = {} for bibdocfile in self.docfiles: new_files[(bibdocfile.name, bibdocfile.format, bibdocfile.version)] = (bibdocfile.size, bibdocfile.checksum, bibdocfile.md) # Let's subtract from added file all the files that are present in # the old list, and let's add to deleted files that are not present # added file. added_files = dict(new_files) deleted_files = {} for key, value in old_files.iteritems(): if added_files.has_key(key): del added_files[key] else: deleted_files[key] = value return (added_files, deleted_files) if context != ('init', 'init_from_disk'): previous_file_list = list(self.docfiles) res = run_sql("SELECT status,docname,creation_date," "modification_date,more_info FROM bibdoc WHERE id=%s", (self.id,)) self.cd = res[0][2] self.md = res[0][3] self.docname = res[0][1] self.status = res[0][0] self.more_info = BibDocMoreInfo(self.id, blob_to_string(res[0][4])) self.docfiles = [] if CFG_BIBDOCFILE_ENABLE_BIBDOCFSINFO_CACHE and context == 'init': ## In normal init context we read from DB res = run_sql("SELECT version, format, cd, md, checksum, filesize FROM bibdocfsinfo WHERE id_bibdoc=%s", (self.id, )) for version, format, cd, md, checksum, size in res: self.docfiles.append(BibDocFile( os.path.join(self.basedir, self.docname + format + ";%s" % version), self.doctype, version, self.docname, format, self.recid, self.id, self.status, checksum, self.more_info, human_readable=self.human_readable, cd=cd, md=md, size=size)) else: if os.path.exists(self.basedir): self.md5s = Md5Folder(self.basedir) files = os.listdir(self.basedir) files.sort() for afile in files: if not afile.startswith('.'): try: filepath = os.path.join(self.basedir, afile) dirname, basename, format, fileversion = decompose_file_with_version(filepath) checksum = self.md5s.get_checksum(afile) # we can append file: self.docfiles.append(BibDocFile(filepath, self.doctype, fileversion, basename, format, self.recid, self.id, self.status, checksum, self.more_info, human_readable=self.human_readable)) except Exception, e: register_exception() if context in ('init', 'init_from_disk'): return else: added_files, deleted_files = make_removed_added_bibdocfiles(previous_file_list) deletedstr = "DELETED" addedstr = "ADDED" if context == 'rename': deletedstr = "RENAMEDFROM" addedstr = "RENAMEDTO" for (docname, format, version), (size, checksum, md) in added_files.iteritems(): if context == 'rename': md = '' # No modification time log_action(addedstr, self.id, docname, format, version, size, checksum, md) for (docname, format, version), (size, checksum, md) in deleted_files.iteritems(): if context == 'rename': md = '' # No modification time log_action(deletedstr, self.id, docname, format, version, size, checksum, md) def _sync_to_db(self): """ Update the content of the bibdocfile table by taking what is available on the filesystem. """ self._build_file_list('init_from_disk') run_sql("DELETE FROM bibdocfsinfo WHERE id_bibdoc=%s", (self.id,)) for afile in self.docfiles: run_sql("INSERT INTO bibdocfsinfo(id_bibdoc, version, format, last_version, cd, md, checksum, filesize, mime) VALUES(%s, %s, %s, false, %s, %s, %s, %s, %s)", (self.id, afile.get_version(), afile.get_format(), afile.cd, afile.md, afile.get_checksum(), afile.get_size(), afile.mime)) run_sql("UPDATE bibdocfsinfo SET last_version=true WHERE id_bibdoc=%s AND version=%s", (self.id, self.get_latest_version())) def get_total_size_latest_version(self): """Return the total size used on disk of all the files belonging to this bibdoc and corresponding to the latest version.""" ret = 0 for bibdocfile in self.list_latest_files(): ret += bibdocfile.get_size() return ret def get_total_size(self): """Return the total size used on disk of all the files belonging to this bibdoc.""" ret = 0 for bibdocfile in self.list_all_files(): ret += bibdocfile.get_size() return ret def list_all_files(self, list_hidden=True): """Returns all the docfiles linked with the given bibdoc.""" if list_hidden: return self.docfiles else: return [afile for afile in self.docfiles if not afile.hidden_p()] def list_latest_files(self, list_hidden=True): """Returns all the docfiles within the last version.""" return self.list_version_files(self.get_latest_version(), list_hidden=list_hidden) def list_version_files(self, version, list_hidden=True): """Return all the docfiles of a particular version.""" version = int(version) return [docfile for docfile in self.docfiles if docfile.get_version() == version and (list_hidden or not docfile.hidden_p())] def check_file_exists(self, path, format): """ Check if a file with the same content of the file pointed in C{path} is already attached to this record. @param path: the file to be checked against. @type path: string @return: True if a file with the requested content is already attached to the record. @rtype: bool """ # Let's consider all the latest files for afile in self.list_latest_files(): if afile.is_identical_to(path, format): return True return False def get_latest_version(self): """ Returns the latest existing version number for the given bibdoc. If no file is associated to this bibdoc, returns '0'. """ version = 0 for bibdocfile in self.docfiles: if bibdocfile.get_version() > version: version = bibdocfile.get_version() return version def get_file_number(self): """Return the total number of files.""" return len(self.docfiles) def register_download(self, ip_address, version, format, userid=0): """Register the information about a download of a particular file.""" format = normalize_format(format) if format[:1] == '.': format = format[1:] format = format.upper() return run_sql("INSERT DELAYED INTO rnkDOWNLOADS " "(id_bibrec,id_bibdoc,file_version,file_format," "id_user,client_host,download_time) VALUES " "(%s,%s,%s,%s,%s,INET_ATON(%s),NOW())", (self.recid, self.id, version, format, userid, ip_address,)) def generic_path2bidocfile(fullpath): """ Returns a BibDocFile objects that wraps the given fullpath. @note: the object will contain the minimum information that can be guessed from the fullpath (e.g. docname, format, subformat, version, md5, creation_date, modification_date). It won't contain for example a comment, a description, a doctype, a restriction. """ fullpath = os.path.abspath(fullpath) try: path, name, format, version = decompose_file_with_version(fullpath) except ValueError: ## There is no version version = 0 path, name, format = decompose_file(fullpath) md5folder = Md5Folder(path) checksum = md5folder.get_checksum(os.path.basename(fullpath)) return BibDocFile(fullpath=fullpath, doctype=None, version=version, name=name, format=format, recid=0, docid=0, status=None, checksum=checksum, more_info=None) class BibDocFile: """This class represents a physical file in the Invenio filesystem. It should never be instantiated directly""" def __init__(self, fullpath, doctype, version, name, format, recid, docid, status, checksum, more_info=None, human_readable=False, cd=None, md=None, size=None): self.fullpath = os.path.abspath(fullpath) self.doctype = doctype self.docid = docid self.recid = recid self.version = version self.status = status self.checksum = checksum self.human_readable = human_readable if more_info: self.description = more_info.get_description(format, version) self.comment = more_info.get_comment(format, version) self.flags = more_info.get_flags(format, version) else: self.description = None self.comment = None self.flags = [] self.format = normalize_format(format) self.superformat = get_superformat_from_format(self.format) self.subformat = get_subformat_from_format(self.format) self.fullname = name if format: self.fullname += self.superformat self.mime, self.encoding = _mimes.guess_type(self.fullname) if self.mime is None: self.mime = "application/octet-stream" self.more_info = more_info self.hidden = 'HIDDEN' in self.flags self.size = size or os.path.getsize(fullpath) self.md = md or datetime.fromtimestamp(os.path.getmtime(fullpath)) try: self.cd = cd or datetime.fromtimestamp(os.path.getctime(fullpath)) except OSError: self.cd = self.md self.name = name self.dir = os.path.dirname(fullpath) if self.subformat: self.url = create_url('%s/%s/%s/files/%s%s' % (CFG_SITE_URL, CFG_SITE_RECORD, self.recid, self.name, self.superformat), {'subformat' : self.subformat}) self.fullurl = create_url('%s/%s/%s/files/%s%s' % (CFG_SITE_URL, CFG_SITE_RECORD, self.recid, self.name, self.superformat), {'subformat' : self.subformat, 'version' : self.version}) else: self.url = create_url('%s/%s/%s/files/%s%s' % (CFG_SITE_URL, CFG_SITE_RECORD, self.recid, self.name, self.superformat), {}) self.fullurl = create_url('%s/%s/%s/files/%s%s' % (CFG_SITE_URL, CFG_SITE_RECORD, self.recid, self.name, self.superformat), {'version' : self.version}) self.etag = '"%i%s%i"' % (self.docid, self.format, self.version) self.magic = None def __repr__(self): return ('BibDocFile(%s, %s, %i, %s, %s, %i, %i, %s, %s, %s, %s)' % (repr(self.fullpath), repr(self.doctype), self.version, repr(self.name), repr(self.format), self.recid, self.docid, repr(self.status), repr(self.checksum), repr(self.more_info), repr(self.human_readable))) def __str__(self): out = '%s:%s:%s:%s:fullpath=%s\n' % (self.recid, self.docid, self.version, self.format, self.fullpath) out += '%s:%s:%s:%s:fullname=%s\n' % (self.recid, self.docid, self.version, self.format, self.fullname) out += '%s:%s:%s:%s:name=%s\n' % (self.recid, self.docid, self.version, self.format, self.name) out += '%s:%s:%s:%s:subformat=%s\n' % (self.recid, self.docid, self.version, self.format, get_subformat_from_format(self.format)) out += '%s:%s:%s:%s:status=%s\n' % (self.recid, self.docid, self.version, self.format, self.status) out += '%s:%s:%s:%s:checksum=%s\n' % (self.recid, self.docid, self.version, self.format, self.checksum) if self.human_readable: out += '%s:%s:%s:%s:size=%s\n' % (self.recid, self.docid, self.version, self.format, nice_size(self.size)) else: out += '%s:%s:%s:%s:size=%s\n' % (self.recid, self.docid, self.version, self.format, self.size) out += '%s:%s:%s:%s:creation time=%s\n' % (self.recid, self.docid, self.version, self.format, self.cd) out += '%s:%s:%s:%s:modification time=%s\n' % (self.recid, self.docid, self.version, self.format, self.md) out += '%s:%s:%s:%s:magic=%s\n' % (self.recid, self.docid, self.version, self.format, self.get_magic()) out += '%s:%s:%s:%s:mime=%s\n' % (self.recid, self.docid, self.version, self.format, self.mime) out += '%s:%s:%s:%s:encoding=%s\n' % (self.recid, self.docid, self.version, self.format, self.encoding) out += '%s:%s:%s:%s:url=%s\n' % (self.recid, self.docid, self.version, self.format, self.url) out += '%s:%s:%s:%s:fullurl=%s\n' % (self.recid, self.docid, self.version, self.format, self.fullurl) out += '%s:%s:%s:%s:description=%s\n' % (self.recid, self.docid, self.version, self.format, self.description) out += '%s:%s:%s:%s:comment=%s\n' % (self.recid, self.docid, self.version, self.format, self.comment) out += '%s:%s:%s:%s:hidden=%s\n' % (self.recid, self.docid, self.version, self.format, self.hidden) out += '%s:%s:%s:%s:flags=%s\n' % (self.recid, self.docid, self.version, self.format, self.flags) out += '%s:%s:%s:%s:etag=%s\n' % (self.recid, self.docid, self.version, self.format, self.etag) return out def display(self, ln = CFG_SITE_LANG): """Returns a formatted representation of this docfile.""" - return websubmit_templates.tmpl_bibdocfile_filelist( + return bibdocfile_templates.tmpl_bibdocfile_filelist( ln = ln, recid = self.recid, version = self.version, md = self.md, name = self.name, superformat = self.superformat, subformat = self.subformat, nice_size = nice_size(self.size), description = self.description or '' ) def is_identical_to(self, path, format): """ @path: the path of another file on disk. @return: True if L{path} is contains bitwise the same content. """ if self.format != format: return False if os.path.getsize(path) != self.size: return False if calculate_md5(path) != self.checksum: return False return filecmp.cmp(self.get_full_path(), path) def is_restricted(self, user_info): """Returns restriction state. (see acc_authorize_action return values)""" if self.status not in ('', 'DELETED'): return check_bibdoc_authorization(user_info, status=self.status) elif self.status == 'DELETED': return (1, 'File has ben deleted') else: return (0, '') - def is_icon(self, subformat_re=CFG_WEBSUBMIT_ICON_SUBFORMAT_RE): + def is_icon(self, subformat_re=CFG_BIBDOCFILE_ICON_SUBFORMAT_RE): """ @param subformat_re: by default the convention is that - L{CFG_WEBSUBMIT_ICON_SUBFORMAT_RE} is used as a subformat indicator to + L{CFG_BIBDOCFILE_ICON_SUBFORMAT_RE} is used as a subformat indicator to mean that a particular format is to be used as an icon. Specifiy a different subformat if you need to use a different convention. @type subformat: compiled regular expression @return: True if this file is an icon. @rtype: bool """ return bool(subformat_re.match(self.subformat)) def hidden_p(self): return self.hidden def get_url(self): return self.url def get_type(self): return self.doctype def get_path(self): return self.fullpath def get_bibdocid(self): return self.docid def get_name(self): return self.name def get_full_name(self): return self.fullname def get_full_path(self): return self.fullpath def get_format(self): return self.format def get_subformat(self): return self.subformat def get_superformat(self): return self.superformat def get_size(self): return self.size def get_version(self): return self.version def get_checksum(self): return self.checksum def get_description(self): return self.description def get_comment(self): return self.comment def get_content(self): """Returns the binary content of the file.""" content_fd = open(self.fullpath, 'rb') content = content_fd.read() content_fd.close() return content def get_recid(self): """Returns the recid connected with the bibdoc of this file.""" return self.recid def get_status(self): """Returns the status of the file, i.e. either '', 'DELETED' or a restriction keyword.""" return self.status def get_magic(self): """Return all the possible guesses from the magic library about the content of the file.""" if self.magic is None and CFG_HAS_MAGIC: magic_cookies = _get_magic_cookies() magic_result = [] for key in magic_cookies.keys(): magic_result.append(magic_cookies[key].file(self.fullpath)) self.magic = tuple(magic_result) return self.magic def check(self): """Return True if the checksum corresponds to the file.""" return calculate_md5(self.fullpath) == self.checksum def stream(self, req, download=False): """Stream the file. Note that no restriction check is being done here, since restrictions have been checked previously inside websubmit_webinterface.py.""" if os.path.exists(self.fullpath): if random.random() < CFG_BIBDOCFILE_MD5_CHECK_PROBABILITY and calculate_md5(self.fullpath) != self.checksum: - raise InvenioWebSubmitFileError, "File %s, version %i, for record %s is corrupted!" % (self.fullname, self.version, self.recid) + raise InvenioBibDocFileError, "File %s, version %i, for record %s is corrupted!" % (self.fullname, self.version, self.recid) stream_file(req, self.fullpath, "%s%s" % (self.name, self.superformat), self.mime, self.encoding, self.etag, self.checksum, self.fullurl, download=download) raise apache.SERVER_RETURN, apache.DONE else: req.status = apache.HTTP_NOT_FOUND - raise InvenioWebSubmitFileError, "%s does not exists!" % self.fullpath + raise InvenioBibDocFileError, "%s does not exists!" % self.fullpath _RE_STATUS_PARSER = re.compile(r'^(?Pemail|group|egroup|role|firerole|status):\s*(?P.*)$', re.S + re.I) def check_bibdoc_authorization(user_info, status): """ Check if the user is authorized to access a document protected with the given status. L{status} is a string of the form:: auth_type: auth_value where C{auth_type} can have values in:: email, group, role, firerole, status and C{auth_value} has a value interpreted againsta C{auth_type}: - C{email}: the user can access the document if his/her email matches C{auth_value} - C{group}: the user can access the document if one of the groups (local or external) of which he/she is member matches C{auth_value} - C{role}: the user can access the document if he/she belongs to the WebAccess role specified in C{auth_value} - C{firerole}: the user can access the document if he/she is implicitly matched by the role described by the firewall like role definition in C{auth_value} - C{status}: the user can access the document if he/she is authorized to for the action C{viewrestrdoc} with C{status} paramter having value C{auth_value} @note: If no C{auth_type} is specified or if C{auth_type} is not one of the above, C{auth_value} will be set to the value contained in the parameter C{status}, and C{auth_type} will be considered to be C{status}. @param user_info: the user_info dictionary @type: dict @param status: the status of the document. @type status: string @return: a tuple, of the form C{(auth_code, auth_message)} where auth_code is 0 if the authorization is granted and greater than 0 otherwise. @rtype: (int, string) @raise ValueError: in case of unexpected parsing error. """ def parse_status(status): g = _RE_STATUS_PARSER.match(status) if g: return (g.group('type').lower(), g.group('value')) else: return ('status', status) if acc_is_user_in_role(user_info, acc_get_role_id(SUPERADMINROLE)): return (0, CFG_WEBACCESS_WARNING_MSGS[0]) auth_type, auth_value = parse_status(status) if auth_type == 'status': return acc_authorize_action(user_info, 'viewrestrdoc', status=auth_value) elif auth_type == 'email': if not auth_value.lower().strip() == user_info['email'].lower().strip(): return (1, 'You must be member of the group %s in order to access this document' % repr(auth_value)) elif auth_type == 'group': if not auth_value in user_info['group']: return (1, 'You must be member of the group %s in order to access this document' % repr(auth_value)) elif auth_type == 'role': if not acc_is_user_in_role(user_info, acc_get_role_id(auth_value)): return (1, 'You must be member in the role %s in order to access this document' % repr(auth_value)) elif auth_type == 'firerole': if not acc_firerole_check_user(user_info, compile_role_definition(auth_value)): return (1, 'You must be authorized in order to access this document') else: raise ValueError, 'Unexpected authorization type %s for %s' % (repr(auth_type), repr(auth_value)) return (0, CFG_WEBACCESS_WARNING_MSGS[0]) _RE_BAD_MSIE = re.compile("MSIE\s+(\d+\.\d+)") def stream_file(req, fullpath, fullname=None, mime=None, encoding=None, etag=None, md5=None, location=None, download=False): """This is a generic function to stream a file to the user. If fullname, mime, encoding, and location are not provided they will be guessed based on req and fullpath. md5 should be passed as an hexadecimal string. """ def normal_streaming(size): req.set_content_length(size) req.send_http_header() if not req.header_only: req.sendfile(fullpath) return "" def single_range(size, the_range): req.set_content_length(the_range[1]) req.headers_out['Content-Range'] = 'bytes %d-%d/%d' % (the_range[0], the_range[0] + the_range[1] - 1, size) req.status = apache.HTTP_PARTIAL_CONTENT req.send_http_header() if not req.header_only: req.sendfile(fullpath, the_range[0], the_range[1]) return "" def multiple_ranges(size, ranges, mime): req.status = apache.HTTP_PARTIAL_CONTENT boundary = '%s%04d' % (time.strftime('THIS_STRING_SEPARATES_%Y%m%d%H%M%S'), random.randint(0, 9999)) req.content_type = 'multipart/byteranges; boundary=%s' % boundary content_length = 0 for arange in ranges: content_length += len('--%s\r\n' % boundary) content_length += len('Content-Type: %s\r\n' % mime) content_length += len('Content-Range: bytes %d-%d/%d\r\n' % (arange[0], arange[0] + arange[1] - 1, size)) content_length += len('\r\n') content_length += arange[1] content_length += len('\r\n') content_length += len('--%s--\r\n' % boundary) req.set_content_length(content_length) req.send_http_header() if not req.header_only: for arange in ranges: req.write('--%s\r\n' % boundary, 0) req.write('Content-Type: %s\r\n' % mime, 0) req.write('Content-Range: bytes %d-%d/%d\r\n' % (arange[0], arange[0] + arange[1] - 1, size), 0) req.write('\r\n', 0) req.sendfile(fullpath, arange[0], arange[1]) req.write('\r\n', 0) req.write('--%s--\r\n' % boundary) req.flush() return "" def parse_date(date): """According to a date can come in three formats (in order of preference): Sun, 06 Nov 1994 08:49:37 GMT ; RFC 822, updated by RFC 1123 Sunday, 06-Nov-94 08:49:37 GMT ; RFC 850, obsoleted by RFC 1036 Sun Nov 6 08:49:37 1994 ; ANSI C's asctime() format Moreover IE is adding some trailing information after a ';'. Wrong dates should be simpled ignored. This function return the time in seconds since the epoch GMT or None in case of errors.""" if not date: return None try: date = date.split(';')[0].strip() # Because of IE ## Sun, 06 Nov 1994 08:49:37 GMT return time.mktime(time.strptime(date, '%a, %d %b %Y %X %Z')) except: try: ## Sun, 06 Nov 1994 08:49:37 GMT return time.mktime(time.strptime(date, '%A, %d-%b-%y %H:%M:%S %Z')) except: try: ## Sun, 06 Nov 1994 08:49:37 GMT return time.mktime(date) except: return None def parse_ranges(ranges): """According to a (multiple) range request comes in the form: bytes=20-30,40-60,70-,-80 with the meaning: from byte to 20 to 30 inclusive (11 bytes) from byte to 40 to 60 inclusive (21 bytes) from byte 70 to (size - 1) inclusive (size - 70 bytes) from byte size - 80 to (size - 1) inclusive (80 bytes) This function will return the list of ranges in the form: [[first_byte, last_byte], ...] If first_byte or last_byte aren't specified they'll be set to None If the list is not well formatted it will return None """ try: if ranges.startswith('bytes') and '=' in ranges: ranges = ranges.split('=')[1].strip() else: return None ret = [] for arange in ranges.split(','): arange = arange.strip() if arange.startswith('-'): ret.append([None, int(arange[1:])]) elif arange.endswith('-'): ret.append([int(arange[:-1]), None]) else: ret.append(map(int, arange.split('-'))) return ret except: return None def parse_tags(tags): """Return a list of tags starting from a comma separated list.""" return [tag.strip() for tag in tags.split(',')] def fix_ranges(ranges, size): """Complementary to parse_ranges it will transform all the ranges into (first_byte, length), adjusting all the value based on the actual size provided. """ ret = [] for arange in ranges: if (arange[0] is None and arange[1] > 0) or arange[0] < size: if arange[0] is None: arange[0] = size - arange[1] elif arange[1] is None: arange[1] = size - arange[0] else: arange[1] = arange[1] - arange[0] + 1 arange[0] = max(0, arange[0]) arange[1] = min(size - arange[0], arange[1]) if arange[1] > 0: ret.append(arange) return ret def get_normalized_headers(headers): """Strip and lowerize all the keys of the headers dictionary plus strip, lowerize and transform known headers value into their value.""" ret = { 'if-match' : None, 'unless-modified-since' : None, 'if-modified-since' : None, 'range' : None, 'if-range' : None, 'if-none-match' : None, } for key, value in req.headers_in.iteritems(): key = key.strip().lower() value = value.strip() if key in ('unless-modified-since', 'if-modified-since'): value = parse_date(value) elif key == 'range': value = parse_ranges(value) elif key == 'if-range': value = parse_date(value) or parse_tags(value) elif key in ('if-match', 'if-none-match'): value = parse_tags(value) if value: ret[key] = value return ret headers = get_normalized_headers(req.headers_in) g = _RE_BAD_MSIE.search(headers.get('user-agent', "MSIE 6.0")) bad_msie = g and float(g.group(1)) < 9.0 if CFG_BIBDOCFILE_USE_XSENDFILE: ## If XSendFile is supported by the server, let's use it. if os.path.exists(fullpath): if fullname is None: fullname = os.path.basename(fullpath) if bad_msie: ## IE is confused by quotes req.headers_out["Content-Disposition"] = 'attachment; filename=%s' % fullname.replace('"', '\\"') elif download: req.headers_out["Content-Disposition"] = 'attachment; filename="%s"' % fullname.replace('"', '\\"') else: ## IE is confused by inline req.headers_out["Content-Disposition"] = 'inline; filename="%s"' % fullname.replace('"', '\\"') req.headers_out["X-Sendfile"] = fullpath if mime is None: format = decompose_file(fullpath)[2] (mime, encoding) = _mimes.guess_type(fullpath) if mime is None: mime = "application/octet-stream" if not bad_msie: ## IE is confused by not supported mimetypes req.content_type = mime return "" else: raise apache.SERVER_RETURN, apache.HTTP_NOT_FOUND if headers['if-match']: if etag is not None and etag not in headers['if-match']: raise apache.SERVER_RETURN, apache.HTTP_PRECONDITION_FAILED if os.path.exists(fullpath): mtime = os.path.getmtime(fullpath) if fullname is None: fullname = os.path.basename(fullpath) if mime is None: (mime, encoding) = _mimes.guess_type(fullpath) if mime is None: mime = "application/octet-stream" if location is None: location = req.uri if not bad_msie: ## IE is confused by not supported mimetypes req.content_type = mime req.encoding = encoding req.filename = fullname req.headers_out["Last-Modified"] = time.strftime('%a, %d %b %Y %X GMT', time.gmtime(mtime)) if CFG_ENABLE_HTTP_RANGE_REQUESTS: req.headers_out["Accept-Ranges"] = "bytes" else: req.headers_out["Accept-Ranges"] = "none" req.headers_out["Content-Location"] = location if etag is not None: req.headers_out["ETag"] = etag if md5 is not None: req.headers_out["Content-MD5"] = base64.encodestring(binascii.unhexlify(md5.upper()))[:-1] if bad_msie: ## IE is confused by quotes req.headers_out["Content-Disposition"] = 'attachment; filename=%s' % fullname.replace('"', '\\"') elif download: req.headers_out["Content-Disposition"] = 'attachment; filename="%s"' % fullname.replace('"', '\\"') else: ## IE is confused by inline req.headers_out["Content-Disposition"] = 'inline; filename="%s"' % fullname.replace('"', '\\"') size = os.path.getsize(fullpath) if not size: try: raise Exception, '%s exists but is empty' % fullpath except Exception: register_exception(req=req, alert_admin=True) raise apache.SERVER_RETURN, apache.HTTP_NOT_FOUND if headers['if-modified-since'] and headers['if-modified-since'] >= mtime: raise apache.SERVER_RETURN, apache.HTTP_NOT_MODIFIED if headers['if-none-match']: if etag is not None and etag in headers['if-none-match']: raise apache.SERVER_RETURN, apache.HTTP_NOT_MODIFIED if headers['unless-modified-since'] and headers['unless-modified-since'] < mtime: return normal_streaming(size) if CFG_ENABLE_HTTP_RANGE_REQUESTS and headers['range']: try: if headers['if-range']: if etag is None or etag not in headers['if-range']: return normal_streaming(size) ranges = fix_ranges(headers['range'], size) except: return normal_streaming(size) if len(ranges) > 1: return multiple_ranges(size, ranges, mime) elif ranges: return single_range(size, ranges[0]) else: raise apache.SERVER_RETURN, apache.HTTP_RANGE_NOT_SATISFIABLE else: return normal_streaming(size) else: raise apache.SERVER_RETURN, apache.HTTP_NOT_FOUND def stream_restricted_icon(req): """Return the content of the "Restricted Icon" file.""" stream_file(req, '%s/img/restricted.gif' % CFG_WEBDIR) raise apache.SERVER_RETURN, apache.DONE def list_types_from_array(bibdocs): """Retrieves the list of types from the given bibdoc list.""" types = [] for bibdoc in bibdocs: if not bibdoc.get_type() in types: types.append(bibdoc.get_type()) types.sort() if 'Main' in types: ## Move 'Main' at the beginning types.remove('Main') types.insert(0, 'Main') return types def list_versions_from_array(docfiles): """Retrieve the list of existing versions from the given docfiles list.""" versions = [] for docfile in docfiles: if not docfile.get_version() in versions: versions.append(docfile.get_version()) versions.sort() versions.reverse() return versions def _make_base_dir(docid): """Given a docid it returns the complete path that should host its files.""" - group = "g" + str(int(int(docid) / CFG_WEBSUBMIT_FILESYSTEM_BIBDOC_GROUP_LIMIT)) - return os.path.join(CFG_WEBSUBMIT_FILEDIR, group, str(docid)) + group = "g" + str(int(int(docid) / CFG_BIBDOCFILE_FILESYSTEM_BIBDOC_GROUP_LIMIT)) + return os.path.join(CFG_BIBDOCFILE_FILEDIR, group, str(docid)) class Md5Folder: """Manage all the Md5 checksum about a folder""" def __init__(self, folder): """Initialize the class from the md5 checksum of a given path""" self.folder = folder try: self.load() - except InvenioWebSubmitFileError: + except InvenioBibDocFileError: self.md5s = {} self.update() def update(self, only_new = True): """Update the .md5 file with the current files. If only_new is specified then only not already calculated file are calculated.""" if not only_new: self.md5s = {} if os.path.exists(self.folder): for filename in os.listdir(self.folder): if filename not in self.md5s and not filename.startswith('.'): self.md5s[filename] = calculate_md5(os.path.join(self.folder, filename)) self.store() def store(self): """Store the current md5 dictionary into .md5""" try: old_umask = os.umask(022) md5file = open(os.path.join(self.folder, ".md5"), "w") for key, value in self.md5s.items(): md5file.write('%s *%s\n' % (value, key)) md5file.close() os.umask(old_umask) except Exception, e: register_exception() - raise InvenioWebSubmitFileError, "Encountered an exception while storing .md5 for folder '%s': '%s'" % (self.folder, e) + raise InvenioBibDocFileError, "Encountered an exception while storing .md5 for folder '%s': '%s'" % (self.folder, e) def load(self): """Load .md5 into the md5 dictionary""" self.md5s = {} try: md5file = open(os.path.join(self.folder, ".md5"), "r") for row in md5file: md5hash = row[:32] filename = row[34:].strip() self.md5s[filename] = md5hash md5file.close() except IOError: self.update() except Exception, e: register_exception() - raise InvenioWebSubmitFileError, "Encountered an exception while loading .md5 for folder '%s': '%s'" % (self.folder, e) + raise InvenioBibDocFileError, "Encountered an exception while loading .md5 for folder '%s': '%s'" % (self.folder, e) def check(self, filename = ''): """Check the specified file or all the files for which it exists a hash for being coherent with the stored hash.""" if filename and filename in self.md5s.keys(): try: return self.md5s[filename] == calculate_md5(os.path.join(self.folder, filename)) except Exception, e: register_exception() - raise InvenioWebSubmitFileError, "Encountered an exception while loading '%s': '%s'" % (os.path.join(self.folder, filename), e) + raise InvenioBibDocFileError, "Encountered an exception while loading '%s': '%s'" % (os.path.join(self.folder, filename), e) else: for filename, md5hash in self.md5s.items(): try: if calculate_md5(os.path.join(self.folder, filename)) != md5hash: return False except Exception, e: register_exception() - raise InvenioWebSubmitFileError, "Encountered an exception while loading '%s': '%s'" % (os.path.join(self.folder, filename), e) + raise InvenioBibDocFileError, "Encountered an exception while loading '%s': '%s'" % (os.path.join(self.folder, filename), e) return True def get_checksum(self, filename): """Return the checksum of a physical file.""" md5hash = self.md5s.get(filename, None) if md5hash is None: self.update() # Now it should not fail! md5hash = self.md5s[filename] return md5hash def calculate_md5_external(filename): """Calculate the md5 of a physical file through md5sum Command Line Tool. This is suitable for file larger than 256Kb.""" try: md5_result = os.popen(CFG_PATH_MD5SUM + ' -b %s' % escape_shell_arg(filename)) ret = md5_result.read()[:32] md5_result.close() if len(ret) != 32: # Error in running md5sum. Let's fallback to internal # algorithm. return calculate_md5(filename, force_internal=True) else: return ret except Exception, e: - raise InvenioWebSubmitFileError, "Encountered an exception while calculating md5 for file '%s': '%s'" % (filename, e) + raise InvenioBibDocFileError, "Encountered an exception while calculating md5 for file '%s': '%s'" % (filename, e) def calculate_md5(filename, force_internal=False): """Calculate the md5 of a physical file. This is suitable for files smaller than 256Kb.""" if not CFG_PATH_MD5SUM or force_internal or os.path.getsize(filename) < CFG_BIBDOCFILE_MD5_THRESHOLD: try: to_be_read = open(filename, "rb") computed_md5 = md5() while True: buf = to_be_read.read(CFG_BIBDOCFILE_MD5_BUFFER) if buf: computed_md5.update(buf) else: break to_be_read.close() return computed_md5.hexdigest() except Exception, e: register_exception() - raise InvenioWebSubmitFileError, "Encountered an exception while calculating md5 for file '%s': '%s'" % (filename, e) + raise InvenioBibDocFileError, "Encountered an exception while calculating md5 for file '%s': '%s'" % (filename, e) else: return calculate_md5_external(filename) def bibdocfile_url_to_bibrecdocs(url): """Given an URL in the form CFG_SITE_[SECURE_]URL/CFG_SITE_RECORD/xxx/files/... it returns a BibRecDocs object for the corresponding recid.""" recid = decompose_bibdocfile_url(url)[0] return BibRecDocs(recid) def bibdocfile_url_to_bibdoc(url): """Given an URL in the form CFG_SITE_[SECURE_]URL/CFG_SITE_RECORD/xxx/files/... it returns a BibDoc object for the corresponding recid/docname.""" docname = decompose_bibdocfile_url(url)[1] return bibdocfile_url_to_bibrecdocs(url).get_bibdoc(docname) def bibdocfile_url_to_bibdocfile(url): """Given an URL in the form CFG_SITE_[SECURE_]URL/CFG_SITE_RECORD/xxx/files/... it returns a BibDocFile object for the corresponding recid/docname/format.""" dummy, dummy, format = decompose_bibdocfile_url(url) return bibdocfile_url_to_bibdoc(url).get_file(format) def bibdocfile_url_to_fullpath(url): """Given an URL in the form CFG_SITE_[SECURE_]URL/CFG_SITE_RECORD/xxx/files/... it returns the fullpath for the corresponding recid/docname/format.""" return bibdocfile_url_to_bibdocfile(url).get_full_path() def bibdocfile_url_p(url): """Return True when the url is a potential valid url pointing to a fulltext owned by a system.""" if url.startswith('%s/getfile.py' % CFG_SITE_URL) or url.startswith('%s/getfile.py' % CFG_SITE_SECURE_URL): return True if not (url.startswith('%s/%s/' % (CFG_SITE_URL, CFG_SITE_RECORD)) or url.startswith('%s/%s/' % (CFG_SITE_SECURE_URL, CFG_SITE_RECORD))): return False splitted_url = url.split('/files/') return len(splitted_url) == 2 and splitted_url[0] != '' and splitted_url[1] != '' def get_docid_from_bibdocfile_fullpath(fullpath): - """Given a bibdocfile fullpath (e.g. "CFG_WEBSUBMIT_FILEDIR/g0/123/bar.pdf;1") + """Given a bibdocfile fullpath (e.g. "CFG_BIBDOCFILE_FILEDIR/g0/123/bar.pdf;1") returns the docid (e.g. 123).""" - if not fullpath.startswith(os.path.join(CFG_WEBSUBMIT_FILEDIR, 'g')): - raise InvenioWebSubmitFileError, "Fullpath %s doesn't correspond to a valid bibdocfile fullpath" % fullpath + if not fullpath.startswith(os.path.join(CFG_BIBDOCFILE_FILEDIR, 'g')): + raise InvenioBibDocFileError, "Fullpath %s doesn't correspond to a valid bibdocfile fullpath" % fullpath dirname, base, extension, version = decompose_file_with_version(fullpath) try: return int(dirname.split('/')[-1]) except: - raise InvenioWebSubmitFileError, "Fullpath %s doesn't correspond to a valid bibdocfile fullpath" % fullpath + raise InvenioBibDocFileError, "Fullpath %s doesn't correspond to a valid bibdocfile fullpath" % fullpath def decompose_bibdocfile_fullpath(fullpath): - """Given a bibdocfile fullpath (e.g. "CFG_WEBSUBMIT_FILEDIR/g0/123/bar.pdf;1") + """Given a bibdocfile fullpath (e.g. "CFG_BIBDOCFILE_FILEDIR/g0/123/bar.pdf;1") returns a quadruple (recid, docname, format, version).""" - if not fullpath.startswith(os.path.join(CFG_WEBSUBMIT_FILEDIR, 'g')): - raise InvenioWebSubmitFileError, "Fullpath %s doesn't correspond to a valid bibdocfile fullpath" % fullpath + if not fullpath.startswith(os.path.join(CFG_BIBDOCFILE_FILEDIR, 'g')): + raise InvenioBibDocFileError, "Fullpath %s doesn't correspond to a valid bibdocfile fullpath" % fullpath dirname, base, extension, version = decompose_file_with_version(fullpath) try: docid = int(dirname.split('/')[-1]) bibdoc = BibDoc(docid) recid = bibdoc.get_recid() docname = bibdoc.get_docname() return recid, docname, extension, version except: - raise InvenioWebSubmitFileError, "Fullpath %s doesn't correspond to a valid bibdocfile fullpath" % fullpath + raise InvenioBibDocFileError, "Fullpath %s doesn't correspond to a valid bibdocfile fullpath" % fullpath def decompose_bibdocfile_url(url): """Given a bibdocfile_url return a triple (recid, docname, format).""" if url.startswith('%s/getfile.py' % CFG_SITE_URL) or url.startswith('%s/getfile.py' % CFG_SITE_SECURE_URL): return decompose_bibdocfile_very_old_url(url) if url.startswith('%s/%s/' % (CFG_SITE_URL, CFG_SITE_RECORD)): recid_file = url[len('%s/%s/' % (CFG_SITE_URL, CFG_SITE_RECORD)):] elif url.startswith('%s/%s/' % (CFG_SITE_SECURE_URL, CFG_SITE_RECORD)): recid_file = url[len('%s/%s/' % (CFG_SITE_SECURE_URL, CFG_SITE_RECORD)):] else: - raise InvenioWebSubmitFileError, "Url %s doesn't correspond to a valid record inside the system." % url + raise InvenioBibDocFileError, "Url %s doesn't correspond to a valid record inside the system." % url recid_file = recid_file.replace('/files/', '/') recid, docname, format = decompose_file(urllib.unquote(recid_file)) if not recid and docname.isdigit(): ## If the URL was something similar to CFG_SITE_URL/CFG_SITE_RECORD/123 return (int(docname), '', '') return (int(recid), docname, format) re_bibdocfile_old_url = re.compile(r'/%s/(\d*)/files/' % CFG_SITE_RECORD) def decompose_bibdocfile_old_url(url): """Given a bibdocfile old url (e.g. CFG_SITE_URL/CFG_SITE_RECORD/123/files) it returns the recid.""" g = re_bibdocfile_old_url.search(url) if g: return int(g.group(1)) - raise InvenioWebSubmitFileError('%s is not a valid old bibdocfile url' % url) + raise InvenioBibDocFileError('%s is not a valid old bibdocfile url' % url) def decompose_bibdocfile_very_old_url(url): """Decompose an old /getfile.py? URL""" if url.startswith('%s/getfile.py' % CFG_SITE_URL) or url.startswith('%s/getfile.py' % CFG_SITE_SECURE_URL): params = urllib.splitquery(url)[1] if params: try: params = cgi.parse_qs(params) if 'docid' in params: docid = int(params['docid'][0]) bibdoc = BibDoc(docid) recid = bibdoc.get_recid() docname = bibdoc.get_docname() elif 'recid' in params: recid = int(params['recid'][0]) if 'name' in params: docname = params['name'][0] else: docname = '' else: - raise InvenioWebSubmitFileError('%s has not enough params to correspond to a bibdocfile.' % url) + raise InvenioBibDocFileError('%s has not enough params to correspond to a bibdocfile.' % url) format = normalize_format(params.get('format', [''])[0]) return (recid, docname, format) except Exception, e: - raise InvenioWebSubmitFileError('Problem with %s: %s' % (url, e)) + raise InvenioBibDocFileError('Problem with %s: %s' % (url, e)) else: - raise InvenioWebSubmitFileError('%s has no params to correspond to a bibdocfile.' % url) + raise InvenioBibDocFileError('%s has no params to correspond to a bibdocfile.' % url) else: - raise InvenioWebSubmitFileError('%s is not a valid very old bibdocfile url' % url) + raise InvenioBibDocFileError('%s is not a valid very old bibdocfile url' % url) def get_docname_from_url(url): """Return a potential docname given a url""" path = urllib2.urlparse.urlsplit(urllib.unquote(url))[2] filename = os.path.split(path)[-1] return file_strip_ext(filename) def get_format_from_url(url): """Return a potential format given a url""" path = urllib2.urlparse.urlsplit(urllib.unquote(url))[2] filename = os.path.split(path)[-1] return filename[len(file_strip_ext(filename)):] def clean_url(url): """Given a local url e.g. a local path it render it a realpath.""" if is_url_a_local_file(url): path = urllib2.urlparse.urlsplit(urllib.unquote(url))[2] return os.path.abspath(path) else: return url def is_url_a_local_file(url): """Return True if the given URL is pointing to a local file.""" protocol = urllib2.urlparse.urlsplit(url)[0] return protocol in ('', 'file') def check_valid_url(url): """ Check for validity of a url or a file. @param url: the URL to check @type url: string @raise StandardError: if the URL is not a valid URL. """ try: if is_url_a_local_file(url): path = urllib2.urlparse.urlsplit(urllib.unquote(url))[2] if os.path.abspath(path) != path: raise StandardError, "%s is not a normalized path (would be %s)." % (path, os.path.normpath(path)) for allowed_path in CFG_BIBUPLOAD_FFT_ALLOWED_LOCAL_PATHS + [CFG_TMPDIR, CFG_TMPSHAREDDIR, CFG_WEBSUBMIT_STORAGEDIR]: if path.startswith(allowed_path): dummy_fd = open(path) dummy_fd.close() return raise StandardError, "%s is not in one of the allowed paths." % path else: try: open_url(url) except InvenioBibdocfileUnauthorizedURL, e: raise StandardError, str(e) except Exception, e: raise StandardError, "%s is not a correct url: %s" % (url, e) def safe_mkstemp(suffix, prefix='bibdocfile_'): """Create a temporary filename that don't have any '.' inside a part from the suffix.""" tmpfd, tmppath = tempfile.mkstemp(suffix=suffix, prefix=prefix, dir=CFG_TMPDIR) # Close the file and leave the responsability to the client code to # correctly open/close it. os.close(tmpfd) if '.' not in suffix: # Just in case format is empty return tmppath while '.' in os.path.basename(tmppath)[:-len(suffix)]: os.remove(tmppath) tmpfd, tmppath = tempfile.mkstemp(suffix=suffix, prefix=prefix, dir=CFG_TMPDIR) os.close(tmpfd) return tmppath def download_local_file(filename, format=None): """ Copies a local file to Invenio's temporary directory. @param filename: the name of the file to copy @type filename: string @param format: the format of the file to copy (will be found if not specified) @type format: string @return: the path of the temporary file created @rtype: string @raise StandardError: if something went wrong """ # Make sure the format is OK. if format is None: format = guess_format_from_url(filename) else: format = normalize_format(format) tmppath = '' # Now try to copy. try: path = urllib2.urlparse.urlsplit(urllib.unquote(filename))[2] if os.path.abspath(path) != path: raise StandardError, "%s is not a normalized path (would be %s)." \ % (path, os.path.normpath(path)) for allowed_path in CFG_BIBUPLOAD_FFT_ALLOWED_LOCAL_PATHS + [CFG_TMPDIR, CFG_WEBSUBMIT_STORAGEDIR]: if path.startswith(allowed_path): tmppath = safe_mkstemp(format) shutil.copy(path, tmppath) if os.path.getsize(tmppath) == 0: os.remove(tmppath) raise StandardError, "%s seems to be empty" % filename break else: raise StandardError, "%s is not in one of the allowed paths." % path except Exception, e: raise StandardError, "Impossible to copy the local file '%s': %s" % \ (filename, str(e)) return tmppath def download_external_url(url, format=None): """ Download a url (if it corresponds to a remote file) and return a local url to it. @param url: the URL to download @type url: string @param format: the format of the file (will be found if not specified) @type format: string @return: the path to the download local file @rtype: string @raise StandardError: if the download failed """ tmppath = None # Make sure the format is OK. if format is None: # First try to find a known extension to the URL format = decompose_file(url, skip_version=True, only_known_extensions=True)[2] if not format: # No correct format could be found. Will try to get it from the # HTTP message headers. format = '' else: format = normalize_format(format) from_file, to_file, tmppath = None, None, '' try: from_file = open_url(url) except InvenioBibdocfileUnauthorizedURL, e: raise StandardError, str(e) except urllib2.URLError, e: raise StandardError, 'URL could not be opened: %s' % str(e) if not format: # We could not determine the format from the URL, so let's try # to read it from the HTTP headers. format = get_format_from_http_response(from_file) try: tmppath = safe_mkstemp(format) to_file = open(tmppath, 'w') while True: block = from_file.read(CFG_BIBDOCFILE_BLOCK_SIZE) if not block: break to_file.write(block) to_file.close() from_file.close() if os.path.getsize(tmppath) == 0: raise StandardError, "%s seems to be empty" % url except Exception, e: # Try to close and remove the temporary file. try: to_file.close() except Exception: pass try: os.remove(tmppath) except Exception: pass raise StandardError, "Error when downloading %s into %s: %s" % \ (url, tmppath, e) return tmppath def get_format_from_http_response(response): """ Tries to retrieve the format of the file from the message headers of the HTTP response. @param response: the HTTP response @type response: file-like object (as returned by urllib.urlopen) @return: the format of the remote resource @rtype: string """ def parse_content_type(text): return text.split(';')[0].strip() def parse_content_disposition(text): for item in text.split(';'): item = item.strip() if item.strip().startswith('filename='): return item[len('filename="'):-len('"')] info = response.info() format = '' content_disposition = info.getheader('Content-Disposition') if content_disposition: filename = parse_content_disposition(content_disposition) if filename: format = decompose_file(filename)[2] content_type = info.getheader('Content-Type') if content_type: content_type = parse_content_type(content_type) ext = _mimes.guess_extension(content_type) if ext: format = normalize_format(ext) return format def download_url(url, format=None): """ Download a url (if it corresponds to a remote file) and return a local url to it. """ tmppath = None try: if is_url_a_local_file(url): tmppath = download_local_file(url, format=format) else: tmppath = download_external_url(url, format=format) except StandardError: raise return tmppath class BibDocMoreInfo: """ This class wraps contextual information of the documents, such as the - comments - descriptions - flags. Such information is kept separately per every format/version instance of the corresponding document and is searialized in the database, ready to be retrieved (but not searched). @param docid: the document identifier. @type docid: integer @param more_info: a serialized version of an already existing more_info object. If not specified this information will be readed from the database, and othewise an empty dictionary will be allocated. @raise ValueError: if docid is not a positive integer. @ivar docid: the document identifier as passed to the constructor. @type docid: integer @ivar more_info: the more_info dictionary that will hold all the additional document information. @type more_info: dict of dict of dict @note: in general this class is never instanciated in client code and never used outside bibdocfile module. @note: this class will be extended in the future to hold all the new auxiliary information about a document. """ def __init__(self, docid, more_info=None): if not (type(docid) in (long, int) and docid > 0): raise ValueError("docid is not a positive integer, but %s." % docid) self.docid = docid if more_info is None: res = run_sql('SELECT more_info FROM bibdoc WHERE id=%s', (docid, )) if res and res[0][0]: self.more_info = cPickle.loads(blob_to_string(res[0][0])) else: self.more_info = {} else: self.more_info = cPickle.loads(more_info) if 'descriptions' not in self.more_info: self.more_info['descriptions'] = {} if 'comments' not in self.more_info: self.more_info['comments'] = {} if 'flags' not in self.more_info: self.more_info['flags'] = {} def __repr__(self): """ @return: the canonical string representation of the C{BibDocMoreInfo}. @rtype: string """ return 'BibDocMoreInfo(%i, %s)' % (self.docid, repr(cPickle.dumps(self.more_info))) def flush(self): """ Flush this object to the database. """ run_sql('UPDATE bibdoc SET more_info=%s WHERE id=%s', (cPickle.dumps(self.more_info), self.docid)) def set_flag(self, flagname, format, version): """ Sets a flag. @param flagname: the flag to set (see L{CFG_BIBDOCFILE_AVAILABLE_FLAGS}). @type flagname: string @param format: the format for which the flag should set. @type format: string @param version: the version for which the flag should set: @type version: integer @raise ValueError: if the flag is not in L{CFG_BIBDOCFILE_AVAILABLE_FLAGS} """ if flagname in CFG_BIBDOCFILE_AVAILABLE_FLAGS: if not flagname in self.more_info['flags']: self.more_info['flags'][flagname] = {} if not version in self.more_info['flags'][flagname]: self.more_info['flags'][flagname][version] = {} if not format in self.more_info['flags'][flagname][version]: self.more_info['flags'][flagname][version][format] = {} self.more_info['flags'][flagname][version][format] = True self.flush() else: raise ValueError, "%s is not in %s" % (flagname, CFG_BIBDOCFILE_AVAILABLE_FLAGS) def get_comment(self, format, version): """ Returns the specified comment. @param format: the format for which the comment should be retrieved. @type format: string @param version: the version for which the comment should be retrieved. @type version: integer @return: the specified comment. @rtype: string """ try: assert(type(version) is int) format = normalize_format(format) return self.more_info['comments'].get(version, {}).get(format) except: register_exception() raise def get_description(self, format, version): """ Returns the specified description. @param format: the format for which the description should be retrieved. @type format: string @param version: the version for which the description should be retrieved. @type version: integer @return: the specified description. @rtype: string """ try: assert(type(version) is int) format = normalize_format(format) return self.more_info['descriptions'].get(version, {}).get(format) except: register_exception() raise def has_flag(self, flagname, format, version): """ Return True if the corresponding has been set. @param flagname: the name of the flag (see L{CFG_BIBDOCFILE_AVAILABLE_FLAGS}). @type flagname: string @param format: the format for which the flag should be checked. @type format: string @param version: the version for which the flag should be checked. @type version: integer @return: True if the flag is set for the given format/version. @rtype: bool @raise ValueError: if the flagname is not in L{CFG_BIBDOCFILE_AVAILABLE_FLAGS} """ if flagname in CFG_BIBDOCFILE_AVAILABLE_FLAGS: return self.more_info['flags'].get(flagname, {}).get(version, {}).get(format, False) else: raise ValueError, "%s is not in %s" % (flagname, CFG_BIBDOCFILE_AVAILABLE_FLAGS) def get_flags(self, format, version): """ Return the list of all the enabled flags. @param format: the format for which the list should be returned. @type format: string @param version: the version for which the list should be returned. @type version: integer @return: the list of enabled flags (from L{CFG_BIBDOCFILE_AVAILABLE_FLAGS}). @rtype: list of string """ return [flag for flag in self.more_info['flags'] if format in self.more_info['flags'][flag].get(version, {})] def set_comment(self, comment, format, version): """ Set a comment. @param comment: the comment to be set. @type comment: string @param format: the format for which the comment should be set. @type format: string @param version: the version for which the comment should be set: @type version: integer """ try: assert(type(version) is int and version > 0) format = normalize_format(format) if comment == KEEP_OLD_VALUE: comment = self.get_comment(format, version) or self.get_comment(format, version - 1) if not comment: self.unset_comment(format, version) self.flush() return if not version in self.more_info['comments']: self.more_info['comments'][version] = {} self.more_info['comments'][version][format] = comment self.flush() except: register_exception() raise def set_description(self, description, format, version): """ Set a description. @param description: the description to be set. @type description: string @param format: the format for which the description should be set. @type format: string @param version: the version for which the description should be set: @type version: integer """ try: assert(type(version) is int and version > 0) format = normalize_format(format) if description == KEEP_OLD_VALUE: description = self.get_description(format, version) or self.get_description(format, version - 1) if not description: self.unset_description(format, version) self.flush() return if not version in self.more_info['descriptions']: self.more_info['descriptions'][version] = {} self.more_info['descriptions'][version][format] = description self.flush() except: register_exception() raise def unset_comment(self, format, version): """ Unset a comment. @param format: the format for which the comment should be unset. @type format: string @param version: the version for which the comment should be unset: @type version: integer """ try: assert(type(version) is int and version > 0) del self.more_info['comments'][version][format] self.flush() except KeyError: pass except: register_exception() raise def unset_description(self, format, version): """ Unset a description. @param format: the format for which the description should be unset. @type format: string @param version: the version for which the description should be unset: @type version: integer """ try: assert(type(version) is int and version > 0) del self.more_info['descriptions'][version][format] self.flush() except KeyError: pass except: register_exception() raise def unset_flag(self, flagname, format, version): """ Unset a flag. @param flagname: the flag to be unset (see L{CFG_BIBDOCFILE_AVAILABLE_FLAGS}). @type flagname: string @param format: the format for which the flag should be unset. @type format: string @param version: the version for which the flag should be unset: @type version: integer @raise ValueError: if the flag is not in L{CFG_BIBDOCFILE_AVAILABLE_FLAGS} """ if flagname in CFG_BIBDOCFILE_AVAILABLE_FLAGS: try: del self.more_info['flags'][flagname][version][format] self.flush() except KeyError: pass else: raise ValueError, "%s is not in %s" % (flagname, CFG_BIBDOCFILE_AVAILABLE_FLAGS) def serialize(self): """ @return: the serialized version of this object. @rtype: string """ return cPickle.dumps(self.more_info) def readfile(filename): """ Read a file. @param filename: the name of the file to be read. @type filename: string @return: the text contained in the file. @rtype: string @note: Returns empty string in case of any error. @note: this function is useful for quick implementation of websubmit functions. """ try: return open(filename).read() except Exception: return '' class HeadRequest(urllib2.Request): """ A request object to perform a HEAD request. """ def get_method(self): return 'HEAD' def read_cookie(cookiefile): """ Parses a cookie file and returns a string as needed for the urllib2 headers The file should respect the Netscape cookie specifications """ cookie_data = '' cfile = open(cookiefile, 'r') for line in cfile.readlines(): tokens = line.split('\t') if len(tokens) == 7: # we are on a cookie line cookie_data += '%s=%s; ' % (tokens[5], tokens[6].replace('\n', '')) cfile.close() return cookie_data def open_url(url, headers=None, head_request=False): """ Opens a URL. If headers are passed as argument, no check is performed and the URL will be opened. Otherwise checks if the URL is present in CFG_BIBUPLOAD_FFT_ALLOWED_EXTERNAL_URLS and uses the headers specified in the config variable. @param url: the URL to open @type url: string @param headers: the headers to use @type headers: dictionary @param head_request: if True, perform a HEAD request, otherwise a POST request @type head_request: boolean @return: a file-like object as returned by urllib2.urlopen. """ headers_to_use = None if headers is None: for regex, headers in _CFG_BIBUPLOAD_FFT_ALLOWED_EXTERNAL_URLS: if regex.match(url) is not None: headers_to_use = headers break if headers_to_use is None: # URL is not allowed. raise InvenioBibdocfileUnauthorizedURL, "%s is not an authorized " \ "external URL." % url else: headers_to_use = headers request_obj = head_request and HeadRequest or urllib2.Request request = request_obj(url) request.add_header('User-Agent', make_user_agent_string('bibdocfile')) for key, value in headers_to_use.items(): try: value = globals()[value['fnc']](**value['args']) except (KeyError, TypeError): pass request.add_header(key, value) return urllib2.urlopen(request) def update_modification_date_of_file(filepath, modification_date): """Update the modification time and date of the file with the modification_date @param filepath: the full path of the file that needs to be updated @type filepath: string @param modification_date: the new modification date and time @type modification_date: datetime.datetime object """ try: modif_date_in_seconds = time.mktime(modification_date.timetuple()) # try to get the time in seconds except (AttributeError, TypeError): modif_date_in_seconds = 0 if modif_date_in_seconds: statinfo = os.stat(filepath) # we need to keep the same access time os.utime(filepath, (statinfo.st_atime, modif_date_in_seconds)) #update the modification time diff --git a/modules/bibdocfile/lib/bibdocfile_config.py b/modules/bibdocfile/lib/bibdocfile_config.py new file mode 100644 index 000000000..b547d095e --- /dev/null +++ b/modules/bibdocfile/lib/bibdocfile_config.py @@ -0,0 +1,71 @@ +## This file is part of Invenio. +## Copyright (C) 2012 CERN. +## +## Invenio is free software; you can redistribute it and/or +## modify it under the terms of the GNU General Public License as +## published by the Free Software Foundation; either version 2 of the +## License, or (at your option) any later version. +## +## Invenio is distributed in the hope that it will be useful, but +## WITHOUT ANY WARRANTY; without even the implied warranty of +## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +## General Public License for more details. +## +## You should have received a copy of the GNU General Public License +## along with Invenio; if not, write to the Free Software Foundation, Inc., +## 59 Temple Place, Suite 330, Boston, MA 02111-1307, USA. + +import re + +try: + from invenio.config import CFG_BIBDOCFILE_DOCUMENT_FILE_MANAGER_MISC +except ImportError: + CFG_BIBDOCFILE_DOCUMENT_FILE_MANAGER_MISC = { + 'can_revise_doctypes': ['*'], + 'can_comment_doctypes': ['*'], + 'can_describe_doctypes': ['*'], + 'can_delete_doctypes': ['*'], + 'can_keep_doctypes': ['*'], + 'can_rename_doctypes': ['*'], + 'can_add_format_to_doctypes': ['*'], + 'can_restrict_doctypes': ['*']} + +try: + from invenio.config import CFG_BIBDOCFILE_DOCUMENT_FILE_MANAGER_DOCTYPES +except ImportError: + CFG_BIBDOCFILE_DOCUMENT_FILE_MANAGER_DOCTYPES = [ + ('Main', 'Main document'), + ('LaTeX', 'LaTeX'), + ('Source', 'Source'), + ('Additional', 'Additional File'), + ('Audio', 'Audio file'), + ('Video', 'Video file'), + ('Script', 'Script'), + ('Data', 'Data'), + ('Figure', 'Figure'), + ('Schema', 'Schema'), + ('Graph', 'Graph'), + ('Image', 'Image'), + ('Drawing', 'Drawing'), + ('Slides', 'Slides')] + +try: + from invenio.config import CFG_BIBDOCFILE_DOCUMENT_FILE_MANAGER_RESTRICTIONS +except ImportError: + CFG_BIBDOCFILE_DOCUMENT_FILE_MANAGER_RESTRICTIONS = [ + ('', 'Public'), + ('restricted', 'Restricted')] + +## CFG_BIBDOCFILE_ICON_SUBFORMAT_RE -- a subformat is an Invenio concept to give +## file formats more semantic. For example "foo.gif;icon" has ".gif;icon" +## 'format', ".gif" 'superformat' and "icon" 'subformat'. That means that this +## particular format/instance of the "foo" document, not only is a ".gif" but +## is in the shape of an "icon", i.e. most probably it will be low-resolution. +## This configuration variable let the administrator to decide which implicit +## convention will be used to know which formats will be meant to be used +## as an icon. +CFG_BIBDOCFILE_ICON_SUBFORMAT_RE = re.compile(r"icon.*") + +## CFG_BIBDOCFILE_DEFAULT_ICON_SUBFORMAT -- this is the default subformat used +## when creating new icons. +CFG_BIBDOCFILE_DEFAULT_ICON_SUBFORMAT = "icon" diff --git a/modules/websubmit/lib/websubmit_managedocfiles.py b/modules/bibdocfile/lib/bibdocfile_managedocfiles.py similarity index 98% rename from modules/websubmit/lib/websubmit_managedocfiles.py rename to modules/bibdocfile/lib/bibdocfile_managedocfiles.py index fef3ce0f3..d71eda21b 100644 --- a/modules/websubmit/lib/websubmit_managedocfiles.py +++ b/modules/bibdocfile/lib/bibdocfile_managedocfiles.py @@ -1,2783 +1,2786 @@ ## $Id: Revise_Files.py,v 1.37 2009/03/26 15:11:05 jerome Exp $ ## This file is part of Invenio. ## Copyright (C) 2010, 2011, 2012 CERN. ## ## Invenio is free software; you can redistribute it and/or ## modify it under the terms of the GNU General Public License as ## published by the Free Software Foundation; either version 2 of the ## License, or (at your option) any later version. ## ## Invenio is distributed in the hope that it will be useful, but ## WITHOUT ANY WARRANTY; without even the implied warranty of ## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ## General Public License for more details. ## ## You should have received a copy of the GNU General Public License ## along with Invenio; if not, write to the Free Software Foundation, Inc., ## 59 Temple Place, Suite 330, Boston, MA 02111-1307, USA. """ -WebSubmit Upload File Interface utils +BibDocFile Upload File Interface utils ===================================== Tools to help with creation of file management interfaces. Contains the two main functions `create_file_upload_interface' and `move_uploaded_files_to_storage', which must be run one after the other: - create_file_upload_interface: Generates the HTML of an interface to revise files of a given record. The actions on the files are recorded in a working directory, but not applied to the record. - move_uploaded_files_to_storage: Applies/executes the modifications on files as recorded by the `create_file_upload_interface' function. Theses functions are a complex interplay of HTML, Javascript and HTTP requests. They are not meant to be used in any type of scenario, but require to be used in extremely specific contexts (Currently in -WebSubmit Response Elements, WebSubmit functions and the WebSubmit +WebSubmit Response Elements, WebSubmit functions and the BibDocFile File Management interface). NOTES: ====== - Comments are not considered as a property of bibdocfiles, but bibdocs: this conflicts with the APIs FIXME: ====== - refactor into smaller components. Eg. form processing in create_file_upload_interface could be move outside the function. - better differentiate between revised file, and added format (currently when adding a format, the whole bibdoc is marked as updated, and all links are removed) - After a file has been revised or added, add a 'check' icon - One issue: if we allow deletion or renaming, we might lose track of a bibdoc: someone adds X, renames X->Y, and adds again another file with name X: when executing actions, we will add the second X, and rename it to Y -> need to go back in previous action when renaming... or check that name has never been used.. DEPENDENCIES: ============= - jQuery Form plugin U{http://jquery.malsup.com/form/} """ import cPickle import os import time import cgi from urllib import urlencode from invenio.config import \ CFG_SITE_LANG, \ CFG_SITE_URL, \ CFG_WEBSUBMIT_STORAGEDIR, \ CFG_TMPSHAREDDIR, \ CFG_SITE_SUPPORT_EMAIL, \ - CFG_CERN_SITE + CFG_CERN_SITE, \ + CFG_SITE_RECORD from invenio.messages import gettext_set_language from invenio.bibdocfilecli import cli_fix_marc from invenio.bibdocfile import BibRecDocs, \ decompose_file, calculate_md5, BibDocFile, \ - InvenioWebSubmitFileError, BibDocMoreInfo + InvenioBibDocFileError, BibDocMoreInfo from invenio.websubmit_functions.Shared_Functions import \ createRelatedFormats from invenio.errorlib import register_exception from invenio.dbquery import run_sql from invenio.websubmit_icon_creator import \ create_icon, InvenioWebSubmitIconCreatorError from invenio.urlutils import create_html_mailto -from invenio.websubmit_config import CFG_WEBSUBMIT_DEFAULT_ICON_SUBFORMAT +from invenio.bibdocfile_config import CFG_BIBDOCFILE_DEFAULT_ICON_SUBFORMAT CFG_ALLOWED_ACTIONS = ['revise', 'delete', 'add', 'addFormat'] def create_file_upload_interface(recid, form=None, print_outside_form_tag=True, print_envelope=True, include_headers=False, ln=CFG_SITE_LANG, minsize='', maxsize='', doctypes_and_desc=None, can_delete_doctypes=None, can_revise_doctypes=None, can_describe_doctypes=None, can_comment_doctypes=None, can_keep_doctypes=None, can_rename_doctypes=None, can_add_format_to_doctypes=None, create_related_formats=True, can_name_new_files=True, keep_default=True, show_links=True, file_label=None, filename_label=None, description_label=None, comment_label=None, restrictions_and_desc=None, can_restrict_doctypes=None, restriction_label=None, doctypes_to_default_filename=None, max_files_for_doctype=None, sbm_indir=None, sbm_doctype=None, sbm_access=None, uid=None, sbm_curdir=None, display_hidden_files=False, protect_hidden_files=True): """ Returns the HTML for the file upload interface. @param recid: the id of the record to edit files @type recid: int or None @param form: the form sent by the user's browser in response to a user action. This is used to read and record user's actions. @param form: as returned by the interface handler. @param print_outside_form_tag: display encapsulating
tag or not @type print_outside_form_tag: boolean @param print_envelope: (internal parameter) if True, return the encapsulating initial markup, otherwise skip it. @type print_envelope: boolean @param include_headers: include javascript and css headers in the body of the page. If you set this to False, you must take care of including these headers in your page header. Setting this parameter to True is useful if you cannot change the page header. @type include_headers: boolean @param ln: language @type ln: string @param minsize: the minimum size (in bytes) allowed for the uploaded files. Files not big enough are discarded. @type minsize: int @param maxsize: the maximum size (in bytes) allowed for the uploaded files. Files too big are discarded. @type maxsize: int @param doctypes_and_desc: the list of doctypes (like 'Main' or 'Additional') and their description that users can choose from when adding new files. - When no value is provided, users cannot add new file (they can only revise/delete/add format) - When a single value is given, it is used as default doctype for all new documents Order is relevant Eg: [('main', 'Main document'), ('additional', 'Figure, schema. etc')] @type doctypes_and_desc: list(tuple(string, string)) @param restrictions_and_desc: the list of restrictions (like 'Restricted' or 'No Restriction') and their description that users can choose from when adding or revising files. Restrictions can then be configured at the level of WebAccess. - When no value is provided, no restriction is applied - When a single value is given, it is used as default resctriction for all documents. - The first value of the list is used as default restriction if the user if not given the choice of the restriction. Order is relevant Eg: [('', 'No restriction'), ('restr', 'Restricted')] @type restrictions_and_desc: list(tuple(string, string)) @param can_delete_doctypes: the list of doctypes that users are allowed to delete. Eg: ['main', 'additional'] Use ['*'] for "all doctypes" @type can_delete_doctypes: list(string) @param can_revise_doctypes: the list of doctypes that users are allowed to revise Eg: ['main', 'additional'] Use ['*'] for "all doctypes" @type can_revise_doctypes: list(string) @param can_describe_doctypes: the list of doctypes that users are allowed to describe Eg: ['main', 'additional'] Use ['*'] for "all doctypes" @type can_describe_doctypes: list(string) @param can_comment_doctypes: the list of doctypes that users are allowed to comment Eg: ['main', 'additional'] Use ['*'] for "all doctypes" @type can_comment_doctypes: list(string) @param can_keep_doctypes: the list of doctypes for which users can choose to keep previous versions visible when revising a file (i.e. 'Keep previous version' checkbox). See also parameter 'keepDefault'. Note that this parameter is ~ignored when revising the attributes of a file (comment, description) without uploading a new file. See also parameter Move_Uploaded_Files_to_Storage.force_file_revision Eg: ['main', 'additional'] Use ['*'] for "all doctypes" @type can_keep_doctypes: list(string) @param can_add_format_to_doctypes: the list of doctypes for which users can add new formats. If there is no value, then no 'add format' link nor warning about losing old formats are displayed. Eg: ['main', 'additional'] Use ['*'] for "all doctypes" @type can_add_format_to_doctypes: list(string) @param can_restrict_doctypes: the list of doctypes for which users can choose the access restrictions when adding or revising a file. If no value is given: - no restriction is applied if none is defined in the 'restrictions' parameter. - else the *first* value of the 'restrictions' parameter is used as default restriction. Eg: ['main', 'additional'] Use ['*'] for "all doctypes" @type can_restrict_doctypes : list(string) @param can_rename_doctypes: the list of doctypes that users are allowed to rename (when revising) Eg: ['main', 'additional'] Use ['*'] for "all doctypes" @type can_rename_doctypes: list(string) @param can_name_new_files: if user can choose the name of the files they upload or not @type can_name_new_files: boolean @param doctypes_to_default_filename: Rename uploaded files to admin-chosen values. To rename to a value found in a file in curdir, use 'file:' prefix to specify the file to read from. Eg: {'main': 'file:RN', 'additional': 'foo'} If the same doctype is submitted several times, a"-%i" suffix is added to the name defined in the file. When using 'file:' prefix, the name is only resolved at the end of the submission, when attaching the file. The default filenames are overriden by user-chosen names if you allow 'can_name_new_files' or 'can_rename_doctypes', excepted if the name is prefixed with 'file:'. @type doctypes_to_default_filename: dict @param max_files_for_doctype: the maximum number of files that users can upload for each doctype. Eg: {'main': 1, 'additional': 2} Do not specify the doctype here to have an unlimited number of files for a given doctype. @type max_files_for_doctype: dict @param create_related_formats: if uploaded files get converted to whatever format we can or not @type create_related_formats: boolean @param keep_default: the default behaviour for keeping or not previous version of files when users cannot choose (no value in can_keep_doctypes). Note that this parameter is ignored when revising the attributes of a file (comment, description) without uploading a new file. See also parameter Move_Uploaded_Files_to_Storage.force_file_revision @type keep_default: boolean @param show_links: if we display links to files when possible or not @type show_links: boolean @param file_label: the label for the file field @type file_label: string @param filename_label: the label for the file name field @type filename_label: string @param description_label: the label for the description field @type description_label: string @param comment_label: the label for the comments field @type comment_label: string @param restriction_label: the label in front of the restrictions list @type restriction_label: string @param sbm_indir: the submission indir parameter, in case the function is used in a WebSubmit submission context. This value will be used to retrieve where to read the current state of the interface and store uploaded files @type sbm_indir : string @param sbm_doctype: the submission doctype parameter, in case the function is used in a WebSubmit submission context. This value will be used to retrieve where to read the current state of the interface and store uploaded files @type sbm_doctype: string @param sbm_access: the submission access parameter. Must be specified in the context of WebSubmit submission, as well when used in the WebSubmit Admin file management interface. This value will be used to retrieve where to read the current state of the interface and store uploaded files @type sbm_access: string @param sbm_curdir: the submission curdir parameter. Must be specified in the context of WebSubmit function Create_Upload_File_Interface. This value will be used to retrieve where to read the current state of the interface and store uploaded files. @type sbm_curdir: string @param uid: the user id @type uid: int @param display_hidden_files: if bibdoc containing bibdocfiles flagged as 'HIDDEN' should be displayed or not. @type display_hidden_files: boolean @param protect_hidden_files: if bibdoc containing bibdocfiles flagged as 'HIDDEN' can be edited (revise, delete, add format) or not. @type protect_hidden_files: boolean @return Tuple (errorcode, html) """ # Clean and set up a few parameters _ = gettext_set_language(ln) body = '' if not file_label: file_label = _('Choose a file') if not filename_label: filename_label = _('Name') if not description_label: description_label = _('Description') if not comment_label: comment_label = _('Comment') if not restriction_label: restriction_label = _('Access') if not doctypes_and_desc: doctypes_and_desc = [] if not can_delete_doctypes: can_delete_doctypes = [] if not can_revise_doctypes: can_revise_doctypes = [] if not can_describe_doctypes: can_describe_doctypes = [] if not can_comment_doctypes: can_comment_doctypes = [] if not can_keep_doctypes: can_keep_doctypes = [] if not can_rename_doctypes: can_rename_doctypes = [] if not can_add_format_to_doctypes: can_add_format_to_doctypes = [] if not restrictions_and_desc: restrictions_and_desc = [] if not can_restrict_doctypes: can_restrict_doctypes = [] if not doctypes_to_default_filename: doctypes_to_default_filename = {} if not max_files_for_doctype: max_files_for_doctype = {} doctypes = [doctype for (doctype, desc) in doctypes_and_desc] # Retrieve/build a working directory to save uploaded files and # states + configuration. working_dir = None if sbm_indir and sbm_doctype and sbm_access: # Write/read configuration to/from working_dir (WebSubmit mode). # Retrieve the interface configuration from the current # submission directory. working_dir = os.path.join(CFG_WEBSUBMIT_STORAGEDIR, sbm_indir, sbm_doctype, sbm_access) try: assert(working_dir == os.path.abspath(working_dir)) except AssertionError: register_exception(prefix='Cannot create file upload interface: ' + \ + 'missing parameter ', alert_admin=True) return (1, "Unauthorized parameters") form_url_params = "?" + urlencode({'access': sbm_access, 'indir': sbm_indir, 'doctype': sbm_doctype}) elif uid and sbm_access: # WebSubmit File Management (admin) interface mode. # Working directory is in CFG_TMPSHAREDDIR working_dir = os.path.join(CFG_TMPSHAREDDIR, 'websubmit_upload_interface_config_' + str(uid), sbm_access) try: assert(working_dir == os.path.abspath(working_dir)) except AssertionError: register_exception(prefix='Some user tried to access ' \ + working_dir + \ ' which is different than ' + \ os.path.abspath(working_dir), alert_admin=True) return (1, "Unauthorized parameters") if not os.path.exists(working_dir): os.makedirs(working_dir) form_url_params = "?" + urlencode({'access': sbm_access}) elif sbm_curdir: # WebSubmit Create_Upload_File_Interface.py function working_dir = sbm_curdir form_url_params = None else: register_exception(prefix='Some user tried to access ' \ + working_dir + \ ' which is different than ' + \ os.path.abspath(working_dir), alert_admin=True) return (1, "Unauthorized parameters") # Save interface configuration, if this is the first time we come # here, or else load parameters try: parameters = _read_file_revision_interface_configuration_from_disk(working_dir) (minsize, maxsize, doctypes_and_desc, doctypes, can_delete_doctypes, can_revise_doctypes, can_describe_doctypes, can_comment_doctypes, can_keep_doctypes, can_rename_doctypes, can_add_format_to_doctypes, create_related_formats, can_name_new_files, keep_default, show_links, file_label, filename_label, description_label, comment_label, restrictions_and_desc, can_restrict_doctypes, restriction_label, doctypes_to_default_filename, max_files_for_doctype, print_outside_form_tag, display_hidden_files, protect_hidden_files) = parameters except: # Initial display of the interface: save configuration to # disk for later reuse parameters = (minsize, maxsize, doctypes_and_desc, doctypes, can_delete_doctypes, can_revise_doctypes, can_describe_doctypes, can_comment_doctypes, can_keep_doctypes, can_rename_doctypes, can_add_format_to_doctypes, create_related_formats, can_name_new_files, keep_default, show_links, file_label, filename_label, description_label, comment_label, restrictions_and_desc, can_restrict_doctypes, restriction_label, doctypes_to_default_filename, max_files_for_doctype, print_outside_form_tag, display_hidden_files, protect_hidden_files) _write_file_revision_interface_configuration_to_disk(working_dir, parameters) # Get the existing bibdocs as well as the actions performed during # the former revise sessions of the user, to build an updated list # of documents. We will use it to check if last action performed # by user is allowed. performed_actions = read_actions_log(working_dir) if recid: bibrecdocs = BibRecDocs(recid) # Create the list of files based on current files and performed # actions bibdocs = bibrecdocs.list_bibdocs() else: bibdocs = [] # "merge": abstract_bibdocs = build_updated_files_list(bibdocs, performed_actions, recid or -1, display_hidden_files) # If any, process form submitted by user if form: ## Get and clean parameters received from user (file_action, file_target, file_target_doctype, keep_previous_files, file_description, file_comment, file_rename, file_doctype, file_restriction, uploaded_filename, uploaded_filepath) = \ wash_form_parameters(form, abstract_bibdocs, can_keep_doctypes, keep_default, can_describe_doctypes, can_comment_doctypes, can_rename_doctypes, can_name_new_files, can_restrict_doctypes, doctypes_to_default_filename, working_dir) if protect_hidden_files and \ (file_action in ['revise', 'addFormat', 'delete']) and \ is_hidden_for_docname(file_target, abstract_bibdocs): # Sanity check. We should not let editing file_action = '' body += '' % \ _("The file you want to edit is protected against modifications. Your action has not been applied") ## Check the last action performed by user, and log it if ## everything is ok if uploaded_filepath and \ ((file_action == 'add' and (file_doctype in doctypes)) or \ (file_action == 'revise' and \ ((file_target_doctype in can_revise_doctypes) or \ '*' in can_revise_doctypes)) or (file_action == 'addFormat' and \ ((file_target_doctype in can_add_format_to_doctypes) or \ '*' in can_add_format_to_doctypes))): # A file has been uploaded (user has revised or added a file, # or a format) dirname, filename, extension = decompose_file(uploaded_filepath) os.unlink("%s/myfile" % working_dir) if minsize.isdigit() and os.path.getsize(uploaded_filepath) < int(minsize): os.unlink(uploaded_filepath) body += '' % \ (_("The uploaded file is too small (<%i o) and has therefore not been considered") % \ int(minsize)).replace('"', '\\"') elif maxsize.isdigit() and os.path.getsize(uploaded_filepath) > int(maxsize): os.unlink(uploaded_filepath) body += '' % \ (_("The uploaded file is too big (>%i o) and has therefore not been considered") % \ int(maxsize)).replace('"', '\\"') elif len(filename) + len(extension) + 4 > 255: # Max filename = 256, including extension and version that # will be appended later by BibDoc os.unlink(uploaded_filepath) body += '' % \ _("The uploaded file name is too long and has therefore not been considered").replace('"', '\\"') elif file_action == 'add' and \ max_files_for_doctype.has_key(file_doctype) and \ max_files_for_doctype[file_doctype] < \ (len([bibdoc for bibdoc in abstract_bibdocs \ if bibdoc['get_type'] == file_doctype]) + 1): # User has tried to upload more than allowed for this # doctype. Should never happen, unless the user did some # nasty things os.unlink(uploaded_filepath) body += '' % \ _("You have already reached the maximum number of files for this type of document").replace('"', '\\"') else: # Prepare to move file to # working_dir/files/updated/doctype/bibdocname/ folder_doctype = file_doctype or \ bibrecdocs.get_bibdoc(file_target).get_type() folder_bibdocname = file_rename or file_target or filename new_uploaded_filepath = os.path.join(working_dir, 'files', 'updated', folder_doctype, folder_bibdocname, uploaded_filename) # First check that we do not conflict with an already # existing bibdoc name if file_action == "add" and \ ((filename in [bibdoc['get_docname'] for bibdoc \ in abstract_bibdocs] and not file_rename) or \ file_rename in [bibdoc['get_docname'] for bibdoc \ in abstract_bibdocs]): # A file with that name already exist. Cancel action # and tell user. os.unlink(uploaded_filepath) body += '' % \ (_("A file named %s already exists. Please choose another name.") % \ (file_rename or filename)).replace('"', '\\"') elif file_action == "revise" and \ file_rename != file_target and \ file_rename in [bibdoc['get_docname'] for bibdoc \ in abstract_bibdocs]: # A file different from the one to revise already has # the same bibdocname os.unlink(uploaded_filepath) body += '' % \ (_("A file named %s already exists. Please choose another name.") % \ file_rename).replace('"', '\\"') elif file_action == "addFormat" and \ (extension in \ get_extensions_for_docname(file_target, abstract_bibdocs)): # A file with that extension already exists. Cancel # action and tell user. os.unlink(uploaded_filepath) body += '' % \ (_("A file with format '%s' already exists. Please upload another format.") % \ extension).replace('"', '\\"') elif '.' in file_rename or '/' in file_rename or "\\" in file_rename or \ not os.path.abspath(new_uploaded_filepath).startswith(os.path.join(working_dir, 'files', 'updated')): # We forbid usage of a few characters, for the good of # everybody... os.unlink(uploaded_filepath) body += '' % \ _("You are not allowed to use dot '.', slash '/', or backslash '\\\\' in file names. Choose a different name and upload your file again. In particular, note that you should not include the extension in the renaming field.").replace('"', '\\"') else: # No conflict with file name # When revising, delete previously uploaded files for # this entry, so that we do not execute the # corresponding action if file_action == "revise": for path_to_delete in \ get_uploaded_files_for_docname(working_dir, file_target): delete_file(working_dir, path_to_delete) # Move uploaded file to working_dir/files/updated/doctype/bibdocname/ os.renames(uploaded_filepath, new_uploaded_filepath) if file_action == "add": # no need to check bibrecdocs.check_file_exists(new_uploaded_filepath, new_uploaded_format): was done before # Log if file_rename != '': # at this point, bibdocname is specified # name, no need to 'rename' filename = file_rename log_action(working_dir, file_action, filename, new_uploaded_filepath, file_rename, file_description, file_comment, file_doctype, keep_previous_files, file_restriction) # Automatically create additional formats when # possible. additional_formats = [] if create_related_formats: additional_formats = createRelatedFormats(new_uploaded_filepath, overwrite=False) for additional_format in additional_formats: # Log log_action(working_dir, 'addFormat', filename, additional_format, file_rename, file_description, file_comment, file_doctype, True, file_restriction) if file_action == "revise" and file_target != "": # Log log_action(working_dir, file_action, file_target, new_uploaded_filepath, file_rename, file_description, file_comment, file_target_doctype, keep_previous_files, file_restriction) # Automatically create additional formats when # possible. additional_formats = [] if create_related_formats: additional_formats = createRelatedFormats(new_uploaded_filepath, overwrite=False) for additional_format in additional_formats: # Log log_action(working_dir, 'addFormat', (file_rename or file_target), additional_format, file_rename, file_description, file_comment, file_target_doctype, True, file_restriction) if file_action == "addFormat" and file_target != "": # We have already checked above that this format does # not already exist. # Log log_action(working_dir, file_action, file_target, new_uploaded_filepath, file_rename, file_description, file_comment, file_target_doctype, keep_previous_files, file_restriction) elif file_action in ["add", "addFormat"]: # No file found, but action involved adding file: ask user to # select a file body += """""" elif file_action == "revise" and file_target != "": # User has chosen to revise attributes of a file (comment, # name, etc.) without revising the file itself. if file_rename != file_target and \ file_rename in [bibdoc['get_docname'] for bibdoc \ in abstract_bibdocs]: # A file different from the one to revise already has # the same bibdocname body += '' % \ (_("A file named %s already exists. Please choose another name.") % \ file_rename).replace('"', '\\"') else: # Log log_action(working_dir, file_action, file_target, "", file_rename, file_description, file_comment, file_target_doctype, keep_previous_files, file_restriction) elif file_action == "delete" and file_target != "" and \ ((file_target_doctype in can_delete_doctypes) or \ '*' in can_delete_doctypes): # Delete previously uploaded files for this entry for path_to_delete in get_uploaded_files_for_docname(working_dir, file_target): delete_file(working_dir, path_to_delete) # Log log_action(working_dir, file_action, file_target, "", file_rename, file_description, file_comment, "", keep_previous_files, file_restriction) ## Display performed_actions = read_actions_log(working_dir) #performed_actions = [] if recid: bibrecdocs = BibRecDocs(recid) # Create the list of files based on current files and performed # actions bibdocs = bibrecdocs.list_bibdocs() else: bibdocs = [] abstract_bibdocs = build_updated_files_list(bibdocs, performed_actions, recid or -1, display_hidden_files) abstract_bibdocs.sort(lambda x, y: x['order'] - y['order']) # Display form and necessary CSS + Javscript #body += '
' #body += css js_can_describe_doctypes = repr({}.fromkeys(can_describe_doctypes, '')) js_can_comment_doctypes = repr({}.fromkeys(can_comment_doctypes, '')) js_can_restrict_doctypes = repr({}.fromkeys(can_restrict_doctypes, '')) # Prepare to display file revise panel "balloon". Check if we # should display the list of doctypes or if it is not necessary (0 # or 1 doctype). Also make sure that we do not exceed the maximum # number of files specified per doctype. The markup of the list of # doctypes is prepared here, and will be passed as parameter to # the display_revise_panel function cleaned_doctypes = [doctype for doctype in doctypes if not max_files_for_doctype.has_key(doctype) or (max_files_for_doctype[doctype] > \ len([bibdoc for bibdoc in abstract_bibdocs \ if bibdoc['get_type'] == doctype]))] doctypes_list = "" if len(cleaned_doctypes) > 1: doctypes_list = '' elif len(cleaned_doctypes) == 1: doctypes_list = '' % cleaned_doctypes[0] # Check if we should display the list of access restrictions or if # it is not necessary restrictions_list = "" if len(restrictions_and_desc) > 1: restrictions_list = '' restrictions_list = ''' %(restrictions_list)s [?]''' % \ {'restrictions_list': restrictions_list, 'restriction_label': restriction_label, 'restriction_help': _('Choose how you want to restrict access to this file.').replace("'", "\\'")} elif len(restrictions_and_desc) == 1: restrictions_list = '' % { 'restriction': cgi.escape(restrictions_and_desc[0][0]), 'restriction_attr': cgi.escape(restrictions_and_desc[0][0], True) } else: restrictions_list = '' # List the files body += '''
''' i = 0 for bibdoc in abstract_bibdocs: if bibdoc['list_latest_files']: i += 1 body += create_file_row(bibdoc, can_delete_doctypes, can_rename_doctypes, can_revise_doctypes, can_describe_doctypes, can_comment_doctypes, can_keep_doctypes, can_add_format_to_doctypes, doctypes_list, show_links, can_restrict_doctypes, even=not (i % 2), ln=ln, form_url_params=form_url_params, protect_hidden_files=protect_hidden_files) body += '
' if len(cleaned_doctypes) > 0: body += '''''' % \ {'display_revise_panel':javascript_display_revise_panel(action='add', target='', show_doctypes=True, show_keep_previous_versions=False, show_rename=can_name_new_files, show_description=True, show_comment=True, bibdocname='', description='', comment='', show_restrictions=True, restriction=len(restrictions_and_desc) > 0 and restrictions_and_desc[0][0] or '', doctypes=doctypes_list), 'defaultSelectedDoctype': cleaned_doctypes[0], 'add_new_file': _("Add new file"), 'can_describe_doctypes':js_can_describe_doctypes, 'can_comment_doctypes': repr({}.fromkeys(can_comment_doctypes, '')), 'can_restrict_doctypes': repr({}.fromkeys(can_restrict_doctypes, ''))} body += '
' if print_envelope: # We should print this only if we display for the first time body = '
' + body + '
' if include_headers: body = get_upload_file_interface_javascript(form_url_params) + \ get_upload_file_interface_css() + \ body # Display markup of the revision panel. This one is also # printed only at the beginning, so that it does not need to # be returned with each response body += revise_balloon % \ {'CFG_SITE_URL': CFG_SITE_URL, 'file_label': file_label, 'filename_label': filename_label, 'description_label': description_label, 'comment_label': comment_label, 'restrictions': restrictions_list, 'previous_versions_help': _('You can decide to hide or not previous version(s) of this file.').replace("'", "\\'"), 'revise_format_help': _('When you revise a file, the additional formats that you might have previously uploaded are removed, since they no longer up-to-date with the new file.').replace("'", "\\'"), 'revise_format_warning': _('Alternative formats uploaded for current version of this file will be removed'), 'previous_versions_label': _('Keep previous versions'), 'cancel': _('Cancel'), 'upload': _('Upload')} body += ''' ''' % \ {'recid': recid or -1, 'ln': ln} # End submission button if sbm_curdir: body += '''
''' % \ {'apply_changes': _("Apply changes")} # Display a link to support email in case users have problem # revising/adding files mailto_link = create_html_mailto(email=CFG_SITE_SUPPORT_EMAIL, subject=_("Need help revising or adding files to record %(recid)s") % \ {'recid': recid or ''}, body=_("""Dear Support, I would need help to revise or add a file to record %(recid)s. I have attached the new version to this email. Best regards""") % {'recid': recid or ''}) problem_revising = _('Having a problem revising a file? Send the revised version to %(mailto_link)s.') % {'mailto_link': mailto_link} if len(cleaned_doctypes) > 0: # We can add files, so change note problem_revising = _('Having a problem adding or revising a file? Send the new/revised version to %(mailto_link)s.') % {'mailto_link': mailto_link} body += '
' body += problem_revising if print_envelope and print_outside_form_tag: - body = '' + body + '' + body = '
' % CFG_SITE_RECORD + body + '
' return (0, body) def create_file_row(abstract_bibdoc, can_delete_doctypes, can_rename_doctypes, can_revise_doctypes, can_describe_doctypes, can_comment_doctypes, can_keep_doctypes, can_add_format_to_doctypes, doctypes_list, show_links, can_restrict_doctypes, even=False, ln=CFG_SITE_LANG, form_url_params='', protect_hidden_files=True): """ Creates a row in the files list representing the given abstract_bibdoc @param abstract_bibdoc: list of "fake" BibDocs: it is a list of dictionaries with keys 'list_latest_files' and 'get_docname' with values corresponding to what you would expect to receive when calling their counterpart function on a real BibDoc object. @param can_delete_doctypes: list of doctypes for which we allow users to delete documents @param can_revise_doctypes: the list of doctypes that users are allowed to revise. @param can_describe_doctypes: the list of doctypes that users are allowed to describe. @param can_comment_doctypes: the list of doctypes that users are allowed to comment. @param can_keep_doctypes: the list of doctypes for which users can choose to keep previous versions visible when revising a file (i.e. 'Keep previous version' checkbox). @param can_rename_doctypes: the list of doctypes that users are allowed to rename (when revising) @param can_add_format_to_doctypes: the list of doctypes for which users can add new formats @param show_links: if we display links to files @param even: if the row is even or odd on the list @type even: boolean @param ln: language @type ln: string @param form_url_params: the @type form_url_params: string @param protect_hidden_files: if bibdoc containing bibdocfiles flagged as 'HIDDEN' can be edited (revise, delete, add format) or not. @type protect_hidden_files: boolean @return: an HTML formatted "file" row @rtype: string """ _ = gettext_set_language(ln) # Try to retrieve "main format", to display as link for the # file. There is no such concept in BibDoc, but let's just try to # get the pdf file if it exists main_bibdocfile = [bibdocfile for bibdocfile in abstract_bibdoc['list_latest_files'] \ if bibdocfile.get_format().strip('.').lower() == 'pdf'] if len(main_bibdocfile) > 0: main_bibdocfile = main_bibdocfile[0] else: main_bibdocfile = abstract_bibdoc['list_latest_files'][0] main_bibdocfile_description = main_bibdocfile.get_description() if main_bibdocfile_description is None: main_bibdocfile_description = '' updated = abstract_bibdoc['updated'] # Has BibDoc been updated? hidden_p = abstract_bibdoc['hidden_p'] # Main file row out = '' % (even and ' class="even"' or '') out += '' % (hidden_p and ' style="color:#99F"' or '') if not updated and show_links and not hidden_p: out += '' out += abstract_bibdoc['get_docname'] if hidden_p: out += ' (hidden)' if not updated and show_links and not hidden_p: out += '' if main_bibdocfile_description: out += ' (' + main_bibdocfile_description + ')' out += '' (description, comment) = get_description_and_comment(abstract_bibdoc['list_latest_files']) restriction = abstract_bibdoc['get_status'] # Revise link out += '' if main_bibdocfile.get_type() in can_revise_doctypes or \ '*' in can_revise_doctypes and not (hidden_p and protect_hidden_files): out += '[%(revise)s]' % \ {'display_revise_panel': javascript_display_revise_panel( action='revise', target=abstract_bibdoc['get_docname'], show_doctypes=False, show_keep_previous_versions=(main_bibdocfile.get_type() in can_keep_doctypes) or '*' in can_keep_doctypes, show_rename=(main_bibdocfile.get_type() in can_rename_doctypes) or '*' in can_rename_doctypes, show_description=(main_bibdocfile.get_type() in can_describe_doctypes) or '*' in can_describe_doctypes, show_comment=(main_bibdocfile.get_type() in can_comment_doctypes) or '*' in can_comment_doctypes, bibdocname=abstract_bibdoc['get_docname'], description=description, comment=comment, show_restrictions=(main_bibdocfile.get_type() in can_restrict_doctypes) or '*' in can_restrict_doctypes, restriction=restriction, doctypes=doctypes_list), 'revise': _("revise") } # Delete link if main_bibdocfile.get_type() in can_delete_doctypes or \ '*' in can_delete_doctypes and not (hidden_p and protect_hidden_files): out += '''[%(delete)s] ''' % {'bibdocname': abstract_bibdoc['get_docname'].replace("'", "\\'").replace('"', '"'), 'delete': _("delete"), 'form_url_params': form_url_params or ''} out += '''''' # Format row out += ''' ''' % (even and ' class="even"' or '', hidden_p and ' style="color:#999"' or '', CFG_SITE_URL) for bibdocfile in abstract_bibdoc['list_latest_files']: if not updated and show_links and not hidden_p: out += '' out += bibdocfile.get_format().strip('.') if not updated and show_links and not hidden_p: out += '' out += ' ' # Add format link out += '' if main_bibdocfile.get_type() in can_add_format_to_doctypes or \ '*' in can_add_format_to_doctypes and not (hidden_p and protect_hidden_files): out += '[%(add_format)s]' % \ {'display_revise_panel':javascript_display_revise_panel( action='addFormat', target=abstract_bibdoc['get_docname'], show_doctypes=False, show_keep_previous_versions=False, show_rename=False, show_description=False, show_comment=False, bibdocname='', description='', comment='', show_restrictions=False, restriction=restriction, doctypes=doctypes_list), 'add_format':_("add format")} out += '' return out def build_updated_files_list(bibdocs, actions, recid, display_hidden_files=False): """ Parses the list of BibDocs and builds an updated version to reflect the changes performed by the user of the file It is necessary to abstract the BibDocs since user wants to perform action on the files that are committed only at the end of the session. @param bibdocs: the original list of bibdocs on which we want to build a new updated list @param actions: the list of actions performed by the user on the files, and that we want to consider to build an updated file list @param recid: the record ID to which the files belong @param display_hidden_files: if bibdoc containing bibdocfiles flagged as 'HIDDEN' should be displayed or not. @type display_hidden_files: boolean """ abstract_bibdocs = {} i = 0 for bibdoc in bibdocs: hidden_p = True in [bibdocfile.hidden_p() for bibdocfile in bibdoc.list_latest_files()] if CFG_CERN_SITE: hidden_p = False # Temporary workaround. See Ticket #846 if not display_hidden_files and hidden_p: # Do not consider hidden files continue i += 1 status = bibdoc.get_status() if status == "DELETED": status = '' abstract_bibdocs[bibdoc.get_docname()] = \ {'list_latest_files': bibdoc.list_latest_files(), 'get_docname': bibdoc.get_docname(), 'updated': False, 'get_type': bibdoc.get_type(), 'get_status': status, 'order': i, 'hidden_p': hidden_p} for action, bibdoc_name, file_path, rename, description, \ comment, doctype, keep_previous_versions, \ file_restriction in actions: dirname, filename, fileformat = decompose_file(file_path) i += 1 if action in ["add", "revise"] and \ os.path.exists(file_path): checksum = calculate_md5(file_path) order = i if action == "revise" and \ abstract_bibdocs.has_key(bibdoc_name): # Keep previous values order = abstract_bibdocs[bibdoc_name]['order'] doctype = abstract_bibdocs[bibdoc_name]['get_type'] if bibdoc_name.strip() == '' and rename.strip() == '': bibdoc_name = os.path.extsep.join(filename.split(os.path.extsep)[:-1]) elif rename.strip() != '' and \ abstract_bibdocs.has_key(bibdoc_name): # Keep previous position del abstract_bibdocs[bibdoc_name] # First instantiate a fake BibDocMoreInfo object, without any side effect more_info = BibDocMoreInfo(1, cPickle.dumps({})) if description is not None: more_info.more_info['descriptions'] = {1: {fileformat:description}} if comment is not None: more_info.more_info['comments'] = {1: {fileformat:comment}} abstract_bibdocs[(rename or bibdoc_name)] = \ {'list_latest_files': [BibDocFile(file_path, doctype, version=1, name=(rename or bibdoc_name), format=fileformat, recid=int(recid), docid=-1, status=file_restriction, checksum=checksum, more_info=more_info)], 'get_docname': rename or bibdoc_name, 'get_type': doctype, 'updated': True, 'get_status': file_restriction, 'order': order, 'hidden_p': False} abstract_bibdocs[(rename or bibdoc_name)]['updated'] = True elif action == "revise" and not file_path: # revision of attributes of a file (description, name, # comment or restriction) but no new file. abstract_bibdocs[bibdoc_name]['get_docname'] = rename or bibdoc_name abstract_bibdocs[bibdoc_name]['get_status'] = file_restriction set_description_and_comment(abstract_bibdocs[bibdoc_name]['list_latest_files'], description, comment) abstract_bibdocs[bibdoc_name]['updated'] = True elif action == "delete": if abstract_bibdocs.has_key(bibdoc_name): del abstract_bibdocs[bibdoc_name] elif action == "addFormat" and \ os.path.exists(file_path): checksum = calculate_md5(file_path) # Preserve type and status doctype = abstract_bibdocs[bibdoc_name]['get_type'] file_restriction = abstract_bibdocs[bibdoc_name]['get_status'] # First instantiate a fake BibDocMoreInfo object, without any side effect more_info = BibDocMoreInfo(1, cPickle.dumps({})) if description is not None: more_info.more_info['descriptions'] = {1: {fileformat:description}} if comment is not None: more_info.more_info['comments'] = {1: {fileformat:comment}} abstract_bibdocs[bibdoc_name]['list_latest_files'].append(\ BibDocFile(file_path, doctype, version=1, name=(rename or bibdoc_name), format=fileformat, recid=int(recid), docid=-1, status='', checksum=checksum, more_info=more_info)) abstract_bibdocs[bibdoc_name]['updated'] = True return abstract_bibdocs.values() def _read_file_revision_interface_configuration_from_disk(working_dir): """ Read the configuration of the file revision interface from disk @param working_dir: the path to the working directory where we can find the configuration file """ input_file = open(os.path.join(working_dir, 'upload_interface.config'), 'rb') configuration = cPickle.load(input_file) input_file.close() return configuration def _write_file_revision_interface_configuration_to_disk(working_dir, parameters): """ Write the configuration of the file revision interface to disk @param working_dir: the path to the working directory where we should write the configuration. @param parameters: the parameters to write to disk """ output = open(os.path.join(working_dir, 'upload_interface.config'), 'wb') cPickle.dump(parameters, output) output.close() def log_action(log_dir, action, bibdoc_name, file_path, rename, description, comment, doctype, keep_previous_versions, file_restriction): """ Logs a new action performed by user on a BibDoc file. The log file record one action per line, each column being split by '<--->' ('---' is escaped from values 'rename', 'description', 'comment' and 'bibdoc_name'). The original request for this format was motivated by the need to have it easily readable by other scripts. Not sure it still makes sense nowadays... Newlines are also reserved, and are escaped from the input values (necessary for the 'comment' field, which is the only one allowing newlines from the browser) Each line starts with the time of the action in the following format: '2008-06-20 08:02:04 --> ' @param log_dir: directory where to save the log (ie. working_dir) @param action: the performed action (one of 'revise', 'delete', 'add', 'addFormat') @param bibdoc_name: the name of the bibdoc on which the change is applied @param file_path: the path to the file that is going to be integrated as bibdoc, if any (should be"" in case of action="delete", or action="revise" when revising only attributes of a file) @param rename: the name used to display the bibdoc, instead of the filename (can be None for no renaming) @param description: a description associated with the file @param comment: a comment associated with the file @param doctype: the category in which the file is going to be integrated @param keep_previous_versions: if the previous versions of this file are to be hidden (0) or not (1) @param file_restriction: the restriction applied to the file. Empty string if no restriction """ log_file = os.path.join(log_dir, 'bibdocactions.log') try: file_desc = open(log_file, "a+") # We must escape new lines from comments in some way: comment = str(comment).replace('\\', '\\\\').replace('\r\n', '\\n\\r') msg = action + '<--->' + \ bibdoc_name.replace('---', '___') + '<--->' + \ file_path + '<--->' + \ str(rename).replace('---', '___') + '<--->' + \ str(description).replace('---', '___') + '<--->' + \ comment.replace('---', '___') + '<--->' + \ doctype + '<--->' + \ str(int(keep_previous_versions)) + '<--->' + \ file_restriction + '\n' file_desc.write("%s --> %s" %(time.strftime("%Y-%m-%d %H:%M:%S"), msg)) file_desc.close() except Exception ,e: raise e def read_actions_log(log_dir): """ Reads the logs of action to be performed on files See log_action(..) for more information about the structure of the log file. @param log_dir: the path to the directory from which to read the log file @type log_dir: string """ actions = [] log_file = os.path.join(log_dir, 'bibdocactions.log') try: file_desc = open(log_file, "r") for line in file_desc.readlines(): (timestamp, action) = line.split(' --> ', 1) try: (action, bibdoc_name, file_path, rename, description, comment, doctype, keep_previous_versions, file_restriction) = action.rstrip('\n').split('<--->') except ValueError, e: # Malformed action log pass # Clean newline-escaped comment: comment = comment.replace('\\n\\r', '\r\n').replace('\\\\', '\\') # Perform some checking if action not in CFG_ALLOWED_ACTIONS: # Malformed action log pass try: keep_previous_versions = int(keep_previous_versions) except: # Malformed action log keep_previous_versions = 1 pass actions.append((action, bibdoc_name, file_path, rename, \ description, comment, doctype, keep_previous_versions, file_restriction)) file_desc.close() except: pass return actions def javascript_display_revise_panel(action, target, show_doctypes, show_keep_previous_versions, show_rename, show_description, show_comment, bibdocname, description, comment, show_restrictions, restriction, doctypes): """ Returns a correctly encoded call to the javascript function to display the revision panel. """ def escape_js_string_param(input): "Escape string parameter to be used in Javascript function" return input.replace('\\', '\\\\').replace('\r', '\\r').replace('\n', '\\n').replace("'", "\\'").replace('"', '"') return '''display_revise_panel(this, '%(action)s', '%(target)s', %(showDoctypes)s, %(showKeepPreviousVersions)s, %(showRename)s, %(showDescription)s, %(showComment)s, '%(bibdocname)s', '%(description)s', '%(comment)s', %(showRestrictions)s, '%(restriction)s', '%(doctypes)s')''' % \ {'action': action, 'showDoctypes': show_doctypes and 'true' or 'false', 'target': escape_js_string_param(target), 'bibdocname': escape_js_string_param(bibdocname), 'showRename': show_rename and 'true' or 'false', 'showKeepPreviousVersions': show_keep_previous_versions and 'true' or 'false', 'showComment': show_comment and 'true' or 'false', 'showDescription': show_description and 'true' or 'false', 'description': description and escape_js_string_param(description) or '', 'comment': comment and escape_js_string_param(comment) or '', 'showRestrictions': show_restrictions and 'true' or 'false', 'restriction': escape_js_string_param(restriction), 'doctypes': escape_js_string_param(doctypes)} def get_uploaded_files_for_docname(log_dir, docname): """ Given a docname, returns the paths to the files uploaded for this revision session. @param log_dir: the path to the directory that should contain the uploaded files. @param docname: the name of the bibdoc for which we want to retrieve files. """ return [file_path for action, bibdoc_name, file_path, rename, \ description, comment, doctype, keep_previous_versions , \ file_restriction in read_actions_log(log_dir) \ if bibdoc_name == docname and os.path.exists(file_path)] def get_bibdoc_for_docname(docname, abstract_bibdocs): """ Given a docname, returns the corresponding bibdoc from the 'abstract' bibdocs. Return None if not found @param docname: the name of the bibdoc we want to retrieve @param abstract_bibdocs: the list of bibdocs from which we want to retrieve the bibdoc """ bibdocs = [bibdoc for bibdoc in abstract_bibdocs \ if bibdoc['get_docname'] == docname] if len(bibdocs) > 0: return bibdocs[0] else: return None def get_extensions_for_docname(docname, abstract_bibdocs): """ Returns the list of extensions that exists for given bibdoc name in the given 'abstract' bibdocs. @param docname: the name of the bibdoc for wich we want to retrieve the available extensions @param abstract_bibdocs: the list of bibdocs from which we want to retrieve the bibdoc extensions """ bibdocfiles = [bibdoc['list_latest_files'] for bibdoc \ in abstract_bibdocs \ if bibdoc['get_docname'] == docname] if len(bibdocfiles) > 0: # There should always be at most 1 matching docname, or 0 if # it is a new file return [bibdocfile.get_format() for bibdocfile \ in bibdocfiles[0]] return [] def is_hidden_for_docname(docname, abstract_bibdocs): """ Returns True if the bibdoc with given docname in abstract_bibdocs should be hidden. Also return True if docname cannot be found in abstract_bibdocs. @param docname: the name of the bibdoc for wich we want to check if it is hidden or not @param abstract_bibdocs: the list of bibdocs from which we want to look for the given docname """ bibdocs = [bibdoc for bibdoc in abstract_bibdocs \ if bibdoc['get_docname'] == docname] if len(bibdocs) > 0: return bibdocs[0]['hidden_p'] return True def get_description_and_comment(bibdocfiles): """ Returns the first description and comment as tuple (description, comment) found in the given list of bibdocfile description and/or comment can be None. This function is needed since we do consider that there is one comment/description per bibdoc, and not per bibdocfile as APIs state. @param bibdocfiles: the list of files of a given bibdoc for which we want to extract the description and comment. """ description = None comment = None all_descriptions = [bibdocfile.get_description() for bibdocfile \ in bibdocfiles if bibdocfile.get_description() not in ['', None]] if len(all_descriptions) > 0: description = all_descriptions[0] all_comments = [bibdocfile.get_comment() for bibdocfile \ in bibdocfiles if bibdocfile.get_comment() not in ['', None]] if len(all_comments) > 0: comment = all_comments[0] return (description, comment) def set_description_and_comment(abstract_bibdocfiles, description, comment): """ Set the description and comment to the given (abstract) bibdocfiles. description and/or comment can be None. This function is needed since we do consider that there is one comment/description per bibdoc, and not per bibdocfile as APIs state. @param abstract_bibdocfiles: the list of 'abstract' files of a given bibdoc for which we want to set the description and comment. @param description: the new description @param comment: the new comment """ for bibdocfile in abstract_bibdocfiles: bibdocfile.description = description bibdocfile.comment = comment def delete_file(working_dir, file_path): """ Deletes a file at given path from the file. In fact, we just move it to working_dir/files/trash @param working_dir: the path to the working directory @param file_path: the path to the file to delete """ if os.path.exists(file_path): filename = os.path.split(file_path)[1] move_to = os.path.join(working_dir, 'files', 'trash', filename +'_' + str(time.time())) os.renames(file_path, move_to) def wash_form_parameters(form, abstract_bibdocs, can_keep_doctypes, keep_default, can_describe_doctypes, can_comment_doctypes, can_rename_doctypes, can_name_new_files, can_restrict_doctypes, doctypes_to_default_filename, working_dir): """ Washes the (user-defined) form parameters, taking into account the current state of the files and the admin defaults. @param form: the form of the function @param abstract_bibdocs: a representation of the current state of the files, as returned by build_updated_file_list(..) @param can_keep_doctypes: the list of doctypes for which we allow users to choose to keep or not the previous versions when revising. @type can_keep_doctypes: list @param keep_default: the admin-defined default for when users cannot choose to keep or not previous version of a revised file @type keep_default: boolean @param can_describe_doctypes: the list of doctypes for which we let users define descriptions. @type can_describe_doctypes: list @param can_comment_doctypes: the list of doctypes for which we let users define comments. @type can_comment_doctypes: list @param can_rename_doctypes: the list of doctypes for which we let users rename bibdoc when revising. @type can_rename_doctypes: list @param can_name_new_files: if we let users choose a name when adding new files. @type can_name_new_files: boolean @param can_restrict_doctypes: the list of doctypes for which we let users define access restrictions. @type can_restrict_doctypes: list @param doctypes_to_default_filename: mapping from doctype to admin-chosen name for uploaded file. @type doctypes_to_default_filename: dict @param working_dir: the path to the current working directory @type working_dir: string @return: tuple (file_action, file_target, file_target_doctype, keep_previous_files, file_description, file_comment, file_rename, file_doctype, file_restriction) where:: file_action: *str* the performed action ('add', 'revise','addFormat' or 'delete') file_target: *str* the bibdocname of the file on which the action is performed (empty string when file_action=='add') file_target_doctype: *str* the doctype of the file we will work on. Eg: ('main', 'additional'). Empty string with file_action=='add'. keep_previous_files: *bool* if we keep the previous version of the file or not. Only useful when revising files. file_description: *str* the user-defined description to apply to the file. Empty string when no description defined or when not applicable file_comment: *str* the user-defined comment to apply to the file. Empty string when no comment defined or when not applicable file_rename: *str* the new name chosen by user for the bibdoc. Empty string when not defined or when not applicable. file_doctype: *str* the user-chosen doctype for the bibdoc when file_action=='add', or the current doctype of the file_target in other cases (doctype must be preserved). file_restriction: *str* the user-selected restriction for the file. Emptry string if not defined or when not applicable. file_name: *str* the original name of the uploaded file. None if no file uploaded file_path: *str* the full path to the file @rtype: tuple(string, string, string, boolean, string, string, string, string, string, string, string) """ # Action performed ... if form.has_key("fileAction") and \ form['fileAction'] in CFG_ALLOWED_ACTIONS: file_action = str(form['fileAction']) # "add", "revise", # "addFormat" or "delete" else: file_action = "" # ... on file ... if form.has_key("fileTarget"): file_target = str(form['fileTarget']) # contains bibdocname # Also remember its doctype to make sure we do valid actions # on it corresponding_bibdoc = get_bibdoc_for_docname(file_target, abstract_bibdocs) if corresponding_bibdoc is not None: file_target_doctype = corresponding_bibdoc['get_type'] else: file_target_doctype = "" else: file_target = "" file_target_doctype = "" # ... with doctype? # Only useful when adding file: otherwise fileTarget doctype is # preserved file_doctype = file_target_doctype if form.has_key("fileDoctype") and \ file_action == 'add': file_doctype = str(form['fileDoctype']) # ... keeping previous version? ... if file_target_doctype != '' and \ not form.has_key("keepPreviousFiles"): # no corresponding key. Two possibilities: if file_target_doctype in can_keep_doctypes or \ '*' in can_keep_doctypes: # User decided no to keep keep_previous_files = 0 else: # No choice for user. Use default admin has chosen keep_previous_files = keep_default else: # Checkbox seems to be checked ... if file_target_doctype in can_keep_doctypes or \ '*' in can_keep_doctypes: # ...and this is allowed keep_previous_files = 1 else: # ...but this is not allowed keep_previous_files = keep_default # ... and decription? ... if form.has_key("description") and \ (((file_action == 'revise' and \ (file_target_doctype in can_describe_doctypes)) or \ (file_action == 'add' and \ (file_doctype in can_describe_doctypes))) \ or '*' in can_describe_doctypes): file_description = str(form['description']) else: file_description = '' # ... and comment? ... if form.has_key("comment") and \ (((file_action == 'revise' and \ (file_target_doctype in can_comment_doctypes)) or \ (file_action == 'add' and \ (file_doctype in can_comment_doctypes))) \ or '*' in can_comment_doctypes): file_comment = str(form['comment']) else: file_comment = '' # ... and rename to ? ... if form.has_key("rename") and \ ((file_action == "revise" and \ ((file_target_doctype in can_rename_doctypes) or \ '*' in can_rename_doctypes)) or \ (file_action == "add" and \ can_name_new_files)): file_rename = str(form['rename']) # contains new bibdocname if applicable elif file_action == "add" and \ doctypes_to_default_filename.has_key(file_doctype): # Admin-chosen name. file_rename = doctypes_to_default_filename[file_doctype] if file_rename.lower().startswith('file:'): # We will define name at a later stage, i.e. when # submitting the file with bibdocfile. The name will be # chosen by reading content of a file in curdir file_rename = '' else: # Ensure name is unique, by appending a suffix file_rename = doctypes_to_default_filename[file_doctype] file_counter = 2 while get_bibdoc_for_docname(file_rename, abstract_bibdocs): if file_counter == 2: file_rename += '-2' else: file_rename = file_rename[:-len(str(file_counter))] + \ str(file_counter) file_counter += 1 else: file_rename = '' # ... and file restriction ? ... file_restriction = '' if form.has_key("fileRestriction"): # We cannot clean that value as it could be a restriction # declared in another submission. We keep this value. file_restriction = str(form['fileRestriction']) # ... and the file itself ? ... if form.has_key('myfile') and \ hasattr(form['myfile'], "filename") and \ form['myfile'].filename: dir_to_open = os.path.join(working_dir, 'files', 'myfile') if not os.path.exists(dir_to_open): try: os.makedirs(dir_to_open) except: pass # Shall we continue? if os.path.exists(dir_to_open): form_field = form['myfile'] file_name = form_field.filename form_file = form_field.file ## Before saving the file to disk, wash the filename (in particular ## washing away UNIX and Windows (e.g. DFS) paths): file_name = os.path.basename(file_name.split('\\')[-1]) file_name = file_name.strip() if file_name != "": # This may be dangerous if the file size is bigger than # the available memory file_path = os.path.join(dir_to_open, file_name) if not os.path.exists(file_path): # If file already exists, it means that it was # handled by WebSubmit fp = file(file_path, "wb") chunk = form_file.read(10240) while chunk: fp.write(chunk) chunk = form_file.read(10240) fp.close() fp = open(os.path.join(working_dir, "lastuploadedfile"), "w") fp.write(file_name) fp.close() fp = open(os.path.join(working_dir, 'myfile'), "w") fp.write(file_name) fp.close() else: file_name = None file_path = None return (file_action, file_target, file_target_doctype, keep_previous_files, file_description, file_comment, file_rename, file_doctype, file_restriction, file_name, file_path) def move_uploaded_files_to_storage(working_dir, recid, icon_sizes, create_icon_doctypes, force_file_revision): """ Apply the modifications on files (add/remove/revise etc.) made by users with one of the compatible interfaces (WebSubmit function `Create_Upload_Files_Interface.py'; WebSubmit element or WebSubmit File management interface using function `create_file_upload_interface'). This function needs a "working directory" (working_dir) that contains a bibdocactions.log file with the list of actions to perform. @param working_dir: a path to the working directory containing actions to perform and files to attach @type working_dir: string @param recid: the recid to modify @type recid: int @param icon_sizes: the sizes of icons to create, as understood by the websubmit icon creation tool @type icon_sizes: list(string) @param create_icon_doctypes: a list of doctype for which we want to create icons @type create_icon_doctypes: list(string) @param force_file_revision: when revising attributes of a file (comment, description) without uploading a new file, force a revision of the current version (so that old comment, description, etc. is kept or not) @type force_file_revision: bool """ # We need to remember of some actions that cannot be performed, # because files have been deleted or moved after a renaming. # Those pending action must be applied when revising the bibdoc # with a file that exists (that means that the bibdoc has not been # deleted nor renamed by a later action) pending_bibdocs = {} newly_added_bibdocs = [] # Does not consider new formats/revisions performed_actions = read_actions_log(working_dir) for action, bibdoc_name, file_path, rename, description, \ comment, doctype, keep_previous_versions, \ file_restriction in performed_actions: # FIXME: get this out of the loop once changes to bibrecdocs # are immediately visible. For the moment, reload the # structure from scratch at each step bibrecdocs = BibRecDocs(recid) if action == 'add': new_bibdoc = \ add(file_path, bibdoc_name, rename, doctype, description, comment, file_restriction, recid, working_dir, icon_sizes, create_icon_doctypes, pending_bibdocs, bibrecdocs) if new_bibdoc: newly_added_bibdocs.append(new_bibdoc) elif action == 'addFormat': add_format(file_path, bibdoc_name, recid, doctype, working_dir, icon_sizes, create_icon_doctypes, pending_bibdocs, bibrecdocs) elif action == 'revise': new_bibdoc = \ revise(file_path, bibdoc_name, rename, doctype, description, comment, file_restriction, icon_sizes, create_icon_doctypes, keep_previous_versions, recid, working_dir, pending_bibdocs, bibrecdocs, force_file_revision) if new_bibdoc: newly_added_bibdocs.append(new_bibdoc) elif action == 'delete': delete(bibdoc_name, recid, working_dir, pending_bibdocs, bibrecdocs) # Finally rename bibdocs that should be named according to a file in # curdir (eg. naming according to report number). Only consider # file that have just been added. parameters = _read_file_revision_interface_configuration_from_disk(working_dir) new_names = [] doctypes_to_default_filename = parameters[22] for bibdoc_to_rename in newly_added_bibdocs: bibdoc_to_rename_doctype = bibdoc_to_rename.doctype rename_to = doctypes_to_default_filename.get(bibdoc_to_rename_doctype, '') if rename_to.startswith('file:'): # This BibDoc must be renamed. Look for name in working dir name_at_filepath = os.path.join(working_dir, rename_to[5:]) if os.path.exists(name_at_filepath) and \ os.path.abspath(name_at_filepath).startswith(working_dir): try: rename = file(name_at_filepath).read() except: register_exception(prefix='Move_Uploaded_Files_to_Storage ' \ 'could not read file %s in curdir to rename bibdoc' % \ (name_at_filepath,), alert_admin=True) if rename: file_counter = 2 new_filename = rename while bibrecdocs.has_docname_p(new_filename) or (new_filename in new_names): new_filename = rename + '_%i' % file_counter file_counter += 1 bibdoc_to_rename.change_name(new_filename) new_names.append(new_filename) # keep track of name, or we have to reload bibrecdoc... _do_log(working_dir, 'Renamed ' + bibdoc_to_rename.get_docname()) # Delete the HB BibFormat cache in the DB, so that the fulltext # links do not point to possible dead files run_sql("DELETE LOW_PRIORITY from bibfmt WHERE format='HB' AND id_bibrec=%s", (recid,)) # Update the MARC cli_fix_marc(None, [recid], interactive=False) def add(file_path, bibdoc_name, rename, doctype, description, comment, file_restriction, recid, working_dir, icon_sizes, create_icon_doctypes, pending_bibdocs, bibrecdocs): """ Adds the file using bibdocfile CLI Return the bibdoc that has been newly added. """ try: if os.path.exists(file_path): # Add file bibdoc = bibrecdocs.add_new_file(file_path, doctype, rename or bibdoc_name, never_fail=True) _do_log(working_dir, 'Added ' + bibdoc.get_docname() + ': ' + \ file_path) # Add icon iconpath = '' has_added_default_icon_subformat_p = False for icon_size in icon_sizes: if doctype in create_icon_doctypes or \ '*' in create_icon_doctypes: iconpath = _create_icon(file_path, icon_size) if iconpath is not None: try: if not has_added_default_icon_subformat_p: bibdoc.add_icon(iconpath) has_added_default_icon_subformat_p = True else: icon_suffix = icon_size.replace('>', '').replace('<', '').replace('^', '').replace('!', '') - bibdoc.add_icon(iconpath, subformat=CFG_WEBSUBMIT_DEFAULT_ICON_SUBFORMAT + "-" + icon_suffix) + bibdoc.add_icon(iconpath, subformat=CFG_BIBDOCFILE_DEFAULT_ICON_SUBFORMAT + "-" + icon_suffix) _do_log(working_dir, 'Added icon to ' + \ bibdoc.get_docname() + ': ' + iconpath) - except InvenioWebSubmitFileError, e: + except InvenioBibDocFileError, e: # Most probably icon already existed. pass # Add description if description: bibdocfiles = bibdoc.list_latest_files() for bibdocfile in bibdocfiles: bibdoc.set_description(description, bibdocfile.get_format()) _do_log(working_dir, 'Described ' + \ bibdoc.get_docname() + ': ' + description) # Add comment if comment: bibdocfiles = bibdoc.list_latest_files() for bibdocfile in bibdocfiles: bibdoc.set_comment(comment, bibdocfile.get_format()) _do_log(working_dir, 'Commented ' + \ bibdoc.get_docname() + ': ' + comment) # Set restriction bibdoc.set_status(file_restriction) _do_log(working_dir, 'Set restriction of ' + \ bibdoc.get_docname() + ': ' + \ file_restriction or '(no restriction)') return bibdoc else: # File has been later renamed or deleted. # Remember to add it later if file is found (ie # it was renamed) pending_bibdocs[bibdoc_name] = (doctype, comment, description, []) - except InvenioWebSubmitFileError, e: + except InvenioBibDocFileError, e: # Format already existed. How come? We should # have checked this in Create_Upload_Files_Interface.py register_exception(prefix='Move_Uploaded_Files_to_Storage ' \ 'tried to add already existing file %s ' \ 'with name %s to record %i.' % \ (file_path, bibdoc_name, recid), alert_admin=True) def add_format(file_path, bibdoc_name, recid, doctype, working_dir, icon_sizes, create_icon_doctypes, pending_bibdocs, bibrecdocs): """ Adds a new format to a bibdoc using bibdocfile CLI """ try: if os.path.exists(file_path): # We must retrieve previous description and comment as # adding a file using the APIs reset these values prev_desc, prev_comment = None, None if bibrecdocs.has_docname_p(bibdoc_name): (prev_desc, prev_comment) = \ get_description_and_comment(bibrecdocs.get_bibdoc(bibdoc_name).list_latest_files()) # Add file bibdoc = bibrecdocs.add_new_format(file_path, bibdoc_name, prev_desc, prev_comment) _do_log(working_dir, 'Added new format to ' + \ bibdoc.get_docname() + ': ' + file_path) # Add icons has_added_default_icon_subformat_p = False for icon_size in icon_sizes: iconpath = '' if doctype in create_icon_doctypes or \ '*' in create_icon_doctypes: iconpath = _create_icon(file_path, icon_size) if iconpath is not None: try: if not has_added_default_icon_subformat_p: bibdoc.add_icon(iconpath) has_added_default_icon_subformat_p = True else: # We have already added the "default" icon subformat icon_suffix = icon_size.replace('>', '').replace('<', '').replace('^', '').replace('!', '') - bibdoc.add_icon(iconpath, subformat=CFG_WEBSUBMIT_DEFAULT_ICON_SUBFORMAT + "-" + icon_suffix) + bibdoc.add_icon(iconpath, subformat=CFG_BIBDOCFILE_DEFAULT_ICON_SUBFORMAT + "-" + icon_suffix) _do_log(working_dir, 'Added icon to ' + \ bibdoc.get_docname() + ': ' + iconpath) - except InvenioWebSubmitFileError, e: + except InvenioBibDocFileError, e: # Most probably icon already existed. pass else: # File has been later renamed or deleted. # Remember to add it later if file is found if pending_bibdocs.has_key(bibdoc_name): pending_bibdocs[bibdoc_name][3].append(file_path) # else: we previously added a file by mistake. Do # not care, it will be deleted - except InvenioWebSubmitFileError, e: + except InvenioBibDocFileError, e: # Format already existed. How come? We should # have checked this in Create_Upload_Files_Interface.py register_exception(prefix='Move_Uploaded_Files_to_Storage ' \ 'tried to add already existing format %s ' \ 'named %s in record %i.' % \ (file_path, bibdoc_name, recid), alert_admin=True) def revise(file_path, bibdoc_name, rename, doctype, description, comment, file_restriction, icon_sizes, create_icon_doctypes, keep_previous_versions, recid, working_dir, pending_bibdocs, bibrecdocs, force_file_revision): """ Revises the given bibdoc with a new file. Return the bibdoc that has been newly added. (later: if needed, return as tuple the bibdoc that has been revised, or deleted, etc.) """ added_bibdoc = None try: if os.path.exists(file_path) or not file_path: # Perform pending actions if pending_bibdocs.has_key(bibdoc_name): # We have some pending actions to apply before # going further. if description == '': # Last revision did not include a description. # Use the one of the pending actions description = pending_bibdocs[bibdoc_name][2] if comment == '': # Last revision did not include a comment. # Use the one of the pending actions comment = pending_bibdocs[bibdoc_name][1] original_bibdoc_name = pending_bibdocs[bibdoc_name][0] if not bibrecdocs.has_docname_p(original_bibdoc_name) and file_path: # the bibdoc did not originaly exist, so it # must be added first bibdoc = bibrecdocs.add_new_file(file_path, pending_bibdocs[bibdoc_name][0], bibdoc_name, never_fail=True) _do_log(working_dir, 'Added ' + bibdoc.get_docname() + ': ' + \ file_path) added_bibdoc = bibdoc # Set restriction bibdoc.set_status(file_restriction) _do_log(working_dir, 'Set restriction of ' + \ bibdoc.get_docname() + ': ' + \ file_restriction or '(no restriction)') # We must retrieve previous description and comment as # revising a file using the APIs reset these values prev_desc, prev_comment = None, None if bibrecdocs.has_docname_p(bibdoc_name): (prev_desc, prev_comment) = \ get_description_and_comment(bibrecdocs.get_bibdoc(bibdoc_name).list_latest_files()) # Do we have additional formats? for additional_format in pending_bibdocs[bibdoc_name][3]: if os.path.exists(additional_format): bibdoc.add_file_new_format(additional_format, description=bibdoc.get_description(), comment=bibdoc.get_comment()) _do_log(working_dir, 'Added new format to' + \ bibdoc.get_docname() + ': ' + file_path) # All pending modification have been applied, # so delete del pending_bibdocs[bibdoc_name] # We must retrieve previous description and comment as # revising a file using the APIs reset these values prev_desc, prev_comment = None, None if bibrecdocs.has_docname_p(bibdoc_name): (prev_desc, prev_comment) = \ get_description_and_comment(bibrecdocs.get_bibdoc(bibdoc_name).list_latest_files()) if keep_previous_versions and file_path: # Standard procedure, keep previous version bibdoc = bibrecdocs.add_new_version(file_path, bibdoc_name, prev_desc, prev_comment) _do_log(working_dir, 'Revised ' + bibdoc.get_docname() + \ ' with : ' + file_path) elif file_path: # Soft-delete previous versions, and add new file # (we need to get the doctype before deleting) if bibrecdocs.has_docname_p(bibdoc_name): # Delete only if bibdoc originally # existed bibrecdocs.delete_bibdoc(bibdoc_name) _do_log(working_dir, 'Deleted ' + bibdoc_name) try: bibdoc = bibrecdocs.add_new_file(file_path, doctype, bibdoc_name, never_fail=True, description=prev_desc, comment=prev_comment) _do_log(working_dir, 'Added ' + bibdoc.get_docname() + ': ' + \ file_path) - except InvenioWebSubmitFileError, e: + except InvenioBibDocFileError, e: _do_log(working_dir, str(e)) register_exception(prefix='Move_Uploaded_Files_to_Storage ' \ 'tried to revise a file %s ' \ 'named %s in record %i.' % \ (file_path, bibdoc_name, recid), alert_admin=True) else: # User just wanted to change attribute of the file, # not the file itself bibdoc = bibrecdocs.get_bibdoc(bibdoc_name) (prev_desc, prev_comment) = \ get_description_and_comment(bibdoc.list_latest_files()) if prev_desc is None: prev_desc = "" if prev_comment is None: prev_comment = "" if force_file_revision and \ (description != prev_desc or comment != prev_comment): # FIXME: If we are going to create a new version, # then we should honour the keep_previous_versions # parameter (soft-delete, then add bibdoc, etc) # But it is a bit complex right now... # Trick: we revert to current version, which # creates a revision of the BibDoc bibdoc.revert(bibdoc.get_latest_version()) bibdoc = bibrecdocs.get_bibdoc(bibdoc_name) # Rename if rename and rename != bibdoc_name: bibdoc.change_name(rename) _do_log(working_dir, 'renamed ' + bibdoc_name +' to '+ rename) # Add icons if file_path: has_added_default_icon_subformat_p = False for icon_size in icon_sizes: iconpath = '' if doctype in create_icon_doctypes or \ '*' in create_icon_doctypes: iconpath = _create_icon(file_path, icon_size) if iconpath is not None: try: if not has_added_default_icon_subformat_p: bibdoc.add_icon(iconpath) has_added_default_icon_subformat_p = True else: # We have already added the "default" icon subformat icon_suffix = icon_size.replace('>', '').replace('<', '').replace('^', '').replace('!', '') - bibdoc.add_icon(iconpath, subformat=CFG_WEBSUBMIT_DEFAULT_ICON_SUBFORMAT + "-" + icon_suffix) + bibdoc.add_icon(iconpath, subformat=CFG_BIBDOCFILE_DEFAULT_ICON_SUBFORMAT + "-" + icon_suffix) _do_log(working_dir, 'Added icon to ' + \ bibdoc.get_docname() + ': ' + iconpath) - except InvenioWebSubmitFileError, e: + except InvenioBibDocFileError, e: # Most probably icon already existed. pass # Description if description: bibdocfiles = bibdoc.list_latest_files() for bibdocfile in bibdocfiles: bibdoc.set_description(description, bibdocfile.get_format()) _do_log(working_dir, 'Described ' + \ bibdoc.get_docname() + ': ' + description) # Comment if comment: bibdocfiles = bibdoc.list_latest_files() for bibdocfile in bibdocfiles: bibdoc.set_comment(comment, bibdocfile.get_format()) _do_log(working_dir, 'Commented ' + \ bibdoc.get_docname() + ': ' + comment) # Set restriction bibdoc.set_status(file_restriction) _do_log(working_dir, 'Set restriction of ' + \ bibdoc.get_docname() + ': ' + \ file_restriction or '(no restriction)') else: # File has been later renamed or deleted. # Remember it if rename and rename != bibdoc_name: pending_bibdocs[rename] = pending_bibdocs[bibdoc_name] - except InvenioWebSubmitFileError, e: + except InvenioBibDocFileError, e: # Format already existed. How come? We should # have checked this in Create_Upload_Files_Interface.py register_exception(prefix='Move_Uploaded_Files_to_Storage ' \ 'tried to revise a file %s ' \ 'named %s in record %i.' % \ (file_path, bibdoc_name, recid), alert_admin=True) return added_bibdoc def delete(bibdoc_name, recid, working_dir, pending_bibdocs, bibrecdocs): """ Deletes the given bibdoc """ try: if bibrecdocs.has_docname_p(bibdoc_name): bibrecdocs.delete_bibdoc(bibdoc_name) _do_log(working_dir, 'Deleted ' + bibdoc_name) if pending_bibdocs.has_key(bibdoc_name): del pending_bibdocs[bibdoc_name] - except InvenioWebSubmitFileError, e: + except InvenioBibDocFileError, e: # Mmh most probably we deleted two files at the same # second. Sleep 1 second and retry... This might go # away one bibdoc improves its way to delete files try: time.sleep(1) bibrecdocs.delete_bibdoc(bibdoc_name) _do_log(working_dir, 'Deleted ' + bibdoc_name) if pending_bibdocs.has_key(bibdoc_name): del pending_bibdocs[bibdoc_name] - except InvenioWebSubmitFileError, e: + except InvenioBibDocFileError, e: _do_log(working_dir, str(e)) _do_log(working_dir, repr(bibrecdocs.list_bibdocs())) register_exception(prefix='Move_Uploaded_Files_to_Storage ' \ 'tried to delete a file' \ 'named %s in record %i.' % \ (bibdoc_name, recid), alert_admin=True) def _do_log(log_dir, msg): """ Log what we have done, in case something went wrong. Nice to compare with bibdocactions.log Should be removed when the development is over. @param log_dir: the path to the working directory @type log_dir: string @param msg: the message to log @type msg: string """ log_file = os.path.join(log_dir, 'performed_actions.log') file_desc = open(log_file, "a+") file_desc.write("%s --> %s\n" %(time.strftime("%Y-%m-%d %H:%M:%S"), msg)) file_desc.close() def _create_icon(file_path, icon_size, format='gif', verbosity=9): """ Creates icon of given file. Returns path to the icon. If creation fails, return None, and register exception (send email to admin). @param file_path: full path to icon @type file_path: string @param icon_size: the scaling information to be used for the creation of the new icon. @type icon_size: int @param verbosity: the verbosity level under which the program is to run; @type verbosity: int """ icon_path = None try: filename = os.path.splitext(os.path.basename(file_path))[0] (icon_dir, icon_name) = create_icon( {'input-file':file_path, 'icon-name': "icon-%s" % filename, 'multipage-icon': False, 'multipage-icon-delay': 0, 'icon-scale': icon_size, 'icon-file-format': format, 'verbosity': verbosity}) icon_path = icon_dir + os.sep + icon_name except InvenioWebSubmitIconCreatorError, e: register_exception(prefix='Icon for file %s could not be created: %s' % \ (file_path, str(e)), alert_admin=False) return icon_path def get_upload_file_interface_javascript(form_url_params): """ Returns the Javascript code necessary to run the upload file interface. """ javascript = ''' -''' +''' % {'CFG_SITE_RECORD': CFG_SITE_RECORD} return javascript def get_upload_file_interface_css(): """ Returns the CSS to embed in the page for the upload file interface. """ # The CSS embedded in the page for the revise panel css = ''' ''' % {'CFG_SITE_URL': CFG_SITE_URL} return css # The HTML markup of the revise panel revise_balloon = ''' ''' diff --git a/modules/websubmit/lib/bibdocfile_regression_tests.py b/modules/bibdocfile/lib/bibdocfile_regression_tests.py similarity index 99% rename from modules/websubmit/lib/bibdocfile_regression_tests.py rename to modules/bibdocfile/lib/bibdocfile_regression_tests.py index deec00b05..5d33bbd78 100644 --- a/modules/websubmit/lib/bibdocfile_regression_tests.py +++ b/modules/bibdocfile/lib/bibdocfile_regression_tests.py @@ -1,320 +1,320 @@ # -*- coding: utf-8 -*- ## ## This file is part of Invenio. -## Copyright (C) 2009, 2010, 2011 CERN. +## Copyright (C) 2009, 2010, 2011, 2012 CERN. ## ## Invenio is free software; you can redistribute it and/or ## modify it under the terms of the GNU General Public License as ## published by the Free Software Foundation; either version 2 of the ## License, or (at your option) any later version. ## ## Invenio is distributed in the hope that it will be useful, but ## WITHOUT ANY WARRANTY; without even the implied warranty of ## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ## General Public License for more details. ## ## You should have received a copy of the GNU General Public License ## along with Invenio; if not, write to the Free Software Foundation, Inc., ## 59 Temple Place, Suite 330, Boston, MA 02111-1307, USA. """BibDocFile Regression Test Suite.""" __revision__ = "$Id$" import unittest from invenio.testutils import make_test_suite, run_test_suite from invenio.bibdocfile import BibRecDocs, check_bibdoc_authorization, bibdocfile_url_p from invenio.dbquery import run_sql from invenio.access_control_config import CFG_WEBACCESS_WARNING_MSGS from invenio.config import \ CFG_SITE_URL, \ CFG_PREFIX, \ - CFG_WEBSUBMIT_FILEDIR, \ + CFG_BIBDOCFILE_FILEDIR, \ CFG_SITE_RECORD from datetime import datetime import time class BibDocFsInfoTest(unittest.TestCase): """Regression tests about the table bibdocfsinfo""" def setUp(self): self.my_bibrecdoc = BibRecDocs(2) self.unique_name = self.my_bibrecdoc.propose_unique_docname('file') self.my_bibdoc = self.my_bibrecdoc.add_new_file(CFG_PREFIX + '/lib/webtest/invenio/test.jpg', docname=self.unique_name) self.my_bibdoc_id = self.my_bibdoc.id def tearDown(self): self.my_bibdoc.expunge() def test_hard_delete(self): """bibdocfile - test correct update of bibdocfsinfo when hard-deleting""" self.assertEqual(run_sql("SELECT MAX(version) FROM bibdocfsinfo WHERE id_bibdoc=%s", (self.my_bibdoc_id, ))[0][0], 1) self.assertEqual(run_sql("SELECT last_version FROM bibdocfsinfo WHERE id_bibdoc=%s AND version=1 AND format='.jpg'", (self.my_bibdoc_id, ))[0][0], True) self.my_bibdoc.add_file_new_version(CFG_PREFIX + '/lib/webtest/invenio/test.gif') self.assertEqual(run_sql("SELECT MAX(version) FROM bibdocfsinfo WHERE id_bibdoc=%s", (self.my_bibdoc_id, ))[0][0], 2) self.assertEqual(run_sql("SELECT last_version FROM bibdocfsinfo WHERE id_bibdoc=%s AND version=2 AND format='.gif'", (self.my_bibdoc_id, ))[0][0], True) self.assertEqual(run_sql("SELECT last_version FROM bibdocfsinfo WHERE id_bibdoc=%s AND version=1 AND format='.jpg'", (self.my_bibdoc_id, ))[0][0], False) self.my_bibdoc.delete_file('.gif', 2) self.assertEqual(run_sql("SELECT MAX(version) FROM bibdocfsinfo WHERE id_bibdoc=%s", (self.my_bibdoc_id, ))[0][0], 1) self.assertEqual(run_sql("SELECT last_version FROM bibdocfsinfo WHERE id_bibdoc=%s AND version=1 AND format='.jpg'", (self.my_bibdoc_id, ))[0][0], True) class BibRecDocsTest(unittest.TestCase): """regression tests about BibRecDocs""" def test_BibRecDocs(self): """bibdocfile - BibRecDocs functions""" my_bibrecdoc = BibRecDocs(2) #add bibdoc my_bibrecdoc.add_new_file(CFG_PREFIX + '/lib/webtest/invenio/test.jpg', 'Main', 'img_test', False, 'test add new file', 'test', '.jpg') my_bibrecdoc.add_bibdoc(doctype='Main', docname='file', never_fail=False) self.assertEqual(len(my_bibrecdoc.list_bibdocs()), 3) my_added_bibdoc = my_bibrecdoc.get_bibdoc('file') #add bibdocfile in empty bibdoc my_added_bibdoc.add_file_new_version(CFG_PREFIX + '/lib/webtest/invenio/test.gif', \ description= 'added in empty bibdoc', comment=None, format=None, flags=['PERFORM_HIDE_PREVIOUS']) #propose unique docname self.assertEqual(my_bibrecdoc.propose_unique_docname('file'), 'file_2') #has docname self.assertEqual(my_bibrecdoc.has_docname_p('file'), True) #merge 2 bibdocs my_bibrecdoc.merge_bibdocs('img_test', 'file') self.assertEqual(len(my_bibrecdoc.get_bibdoc("img_test").list_all_files()), 2) #check file exists self.assertEqual(my_bibrecdoc.check_file_exists(CFG_PREFIX + '/lib/webtest/invenio/test.jpg', '.jpg'), True) #get bibdoc names self.assertEqual(my_bibrecdoc.get_bibdoc_names('Main')[0], '0104007_02') self.assertEqual(my_bibrecdoc.get_bibdoc_names('Main')[1],'img_test') #get total size self.assertEqual(my_bibrecdoc.get_total_size(), 1647591) #get total size latest version self.assertEqual(my_bibrecdoc.get_total_size_latest_version(), 1647591) #display value = my_bibrecdoc.display(docname='img_test', version='', doctype='', ln='en', verbose=0, display_hidden=True) self.assert_("Main" in value) #get xml 8564 value = my_bibrecdoc.get_xml_8564() self.assert_('/'+ CFG_SITE_RECORD +'/2/files/img_test.jpg' in value) #check duplicate docnames self.assertEqual(my_bibrecdoc.check_duplicate_docnames(), True) def tearDown(self): my_bibrecdoc = BibRecDocs(2) #delete my_bibrecdoc.delete_bibdoc('img_test') my_bibrecdoc.delete_bibdoc('file') class BibDocsTest(unittest.TestCase): """regression tests about BibDocs""" def test_BibDocs(self): """bibdocfile - BibDocs functions""" #add file my_bibrecdoc = BibRecDocs(2) timestamp1 = datetime(*(time.strptime("2011-10-09 08:07:06", "%Y-%m-%d %H:%M:%S")[:6])) my_bibrecdoc.add_new_file(CFG_PREFIX + '/lib/webtest/invenio/test.jpg', 'Main', 'img_test', False, 'test add new file', 'test', '.jpg', modification_date=timestamp1) my_new_bibdoc = my_bibrecdoc.get_bibdoc("img_test") value = my_bibrecdoc.list_bibdocs() self.assertEqual(len(value), 2) #get total file (bibdoc) self.assertEqual(my_new_bibdoc.get_total_size(), 91750) #get recid self.assertEqual(my_new_bibdoc.get_recid(), 2) #change name my_new_bibdoc.change_name('new_name') #get docname self.assertEqual(my_new_bibdoc.get_docname(), 'new_name') #get type self.assertEqual(my_new_bibdoc.get_type(), 'Main') #get id self.assert_(my_new_bibdoc.get_id() > 80) #set status my_new_bibdoc.set_status('new status') #get status self.assertEqual(my_new_bibdoc.get_status(), 'new status') #get base directory - self.assert_(my_new_bibdoc.get_base_dir().startswith(CFG_WEBSUBMIT_FILEDIR)) + self.assert_(my_new_bibdoc.get_base_dir().startswith(CFG_BIBDOCFILE_FILEDIR)) #get file number self.assertEqual(my_new_bibdoc.get_file_number(), 1) #add file new version timestamp2 = datetime(*(time.strptime("2010-09-08 07:06:05", "%Y-%m-%d %H:%M:%S")[:6])) my_new_bibdoc.add_file_new_version(CFG_PREFIX + '/lib/webtest/invenio/test.jpg', description= 'the new version', comment=None, format=None, flags=["PERFORM_HIDE_PREVIOUS"], modification_date=timestamp2) self.assertEqual(my_new_bibdoc.list_versions(), [1, 2]) #revert timestamp3 = datetime.now() time.sleep(2) # so we can see a difference between now() and the time of the revert my_new_bibdoc.revert(1) self.assertEqual(my_new_bibdoc.list_versions(), [1, 2, 3]) self.assertEqual(my_new_bibdoc.get_description('.jpg', version=3), 'test add new file') #get total size latest version self.assertEqual(my_new_bibdoc.get_total_size_latest_version(), 91750) #get latest version self.assertEqual(my_new_bibdoc.get_latest_version(), 3) #list latest files self.assertEqual(len(my_new_bibdoc.list_latest_files()), 1) self.assertEqual(my_new_bibdoc.list_latest_files()[0].get_version(), 3) #list version files self.assertEqual(len(my_new_bibdoc.list_version_files(1, list_hidden=True)), 1) #display value = my_new_bibdoc.display(version='', ln='en', display_hidden=True) self.assert_('>test add new file<' in value) #format already exist self.assertEqual(my_new_bibdoc.format_already_exists_p('.jpg'), True) #get file self.assertEqual(my_new_bibdoc.get_file('.jpg', version='1').get_version(), 1) #set description my_new_bibdoc.set_description('new description', '.jpg', version=1) #get description self.assertEqual(my_new_bibdoc.get_description('.jpg', version=1), 'new description') #set comment my_new_bibdoc.set_description('new comment', '.jpg', version=1) #get comment self.assertEqual(my_new_bibdoc.get_description('.jpg', version=1), 'new comment') #get history assert len(my_new_bibdoc.get_history()) > 0 #check modification date self.assertEqual(my_new_bibdoc.get_file('.jpg', version=1).md, timestamp1) self.assertEqual(my_new_bibdoc.get_file('.jpg', version=2).md, timestamp2) assert my_new_bibdoc.get_file('.jpg', version=3).md > timestamp3 #delete file my_new_bibdoc.delete_file('.jpg', 2) #list all files self.assertEqual(len(my_new_bibdoc.list_all_files()), 2) #delete file my_new_bibdoc.delete_file('.jpg', 3) #add new format timestamp4 = datetime(*(time.strptime("2012-11-10 09:08:07", "%Y-%m-%d %H:%M:%S")[:6])) my_new_bibdoc.add_file_new_format(CFG_PREFIX + '/lib/webtest/invenio/test.gif', version=None, description=None, comment=None, format=None, modification_date=timestamp4) self.assertEqual(len(my_new_bibdoc.list_all_files()), 2) #check modification time self.assertEqual(my_new_bibdoc.get_file('.jpg', version=1).md, timestamp1) self.assertEqual(my_new_bibdoc.get_file('.gif', version=1).md, timestamp4) #delete file my_new_bibdoc.delete_file('.jpg', 1) #delete file my_new_bibdoc.delete_file('.gif', 1) #empty bibdoc self.assertEqual(my_new_bibdoc.empty_p(), True) #hidden? self.assertEqual(my_new_bibdoc.hidden_p('.jpg', version=1), False) #hide my_new_bibdoc.set_flag('HIDDEN', '.jpg', version=1) #hidden? self.assertEqual(my_new_bibdoc.hidden_p('.jpg', version=1), True) #add and get icon my_new_bibdoc.add_icon( CFG_PREFIX + '/lib/webtest/invenio/icon-test.gif', modification_date=timestamp4) value = my_bibrecdoc.list_bibdocs()[1] self.assertEqual(value.get_icon(), my_new_bibdoc.get_icon()) #check modification time self.assertEqual(my_new_bibdoc.get_icon().md, timestamp4) #delete icon my_new_bibdoc.delete_icon() #get icon self.assertEqual(my_new_bibdoc.get_icon(), None) #delete my_new_bibdoc.delete() self.assertEqual(my_new_bibdoc.deleted_p(), True) #undelete my_new_bibdoc.undelete(previous_status='') #expunging my_new_bibdoc.expunge() my_bibrecdoc.build_bibdoc_list() self.failIf('new_name' in my_bibrecdoc.get_bibdoc_names()) self.failUnless(my_bibrecdoc.get_bibdoc_names()) def tearDown(self): my_bibrecdoc = BibRecDocs(2) #delete my_bibrecdoc.delete_bibdoc('img_test') my_bibrecdoc.delete_bibdoc('new_name') class BibDocFilesTest(unittest.TestCase): """regression tests about BibDocFiles""" def test_BibDocFiles(self): """bibdocfile - BibDocFile functions """ #add bibdoc my_bibrecdoc = BibRecDocs(2) timestamp = datetime(*(time.strptime("2010-09-08 07:06:05", "%Y-%m-%d %H:%M:%S")[:6])) my_bibrecdoc.add_new_file(CFG_PREFIX + '/lib/webtest/invenio/test.jpg', 'Main', 'img_test', False, 'test add new file', 'test', '.jpg', modification_date=timestamp) my_new_bibdoc = my_bibrecdoc.get_bibdoc("img_test") my_new_bibdocfile = my_new_bibdoc.list_all_files()[0] #get url self.assertEqual(my_new_bibdocfile.get_url(), CFG_SITE_URL + '/%s/2/files/img_test.jpg' % CFG_SITE_RECORD) #get type self.assertEqual(my_new_bibdocfile.get_type(), 'Main') #get path - self.assert_(my_new_bibdocfile.get_path().startswith(CFG_WEBSUBMIT_FILEDIR)) + self.assert_(my_new_bibdocfile.get_path().startswith(CFG_BIBDOCFILE_FILEDIR)) self.assert_(my_new_bibdocfile.get_path().endswith('/img_test.jpg;1')) #get bibdocid self.assertEqual(my_new_bibdocfile.get_bibdocid(), my_new_bibdoc.get_id()) #get name self.assertEqual(my_new_bibdocfile.get_name() , 'img_test') #get full name self.assertEqual(my_new_bibdocfile.get_full_name() , 'img_test.jpg') #get full path - self.assert_(my_new_bibdocfile.get_full_path().startswith(CFG_WEBSUBMIT_FILEDIR)) + self.assert_(my_new_bibdocfile.get_full_path().startswith(CFG_BIBDOCFILE_FILEDIR)) self.assert_(my_new_bibdocfile.get_full_path().endswith('/img_test.jpg;1')) #get format self.assertEqual(my_new_bibdocfile.get_format(), '.jpg') #get version self.assertEqual(my_new_bibdocfile.get_version(), 1) #get description self.assertEqual(my_new_bibdocfile.get_description(), my_new_bibdoc.get_description('.jpg', version=1)) #get comment self.assertEqual(my_new_bibdocfile.get_comment(), my_new_bibdoc.get_comment('.jpg', version=1)) #get recid self.assertEqual(my_new_bibdocfile.get_recid(), 2) #get status self.assertEqual(my_new_bibdocfile.get_status(), '') #get size self.assertEqual(my_new_bibdocfile.get_size(), 91750) #get checksum self.assertEqual(my_new_bibdocfile.get_checksum(), '28ec893f9da735ad65de544f71d4ad76') #check self.assertEqual(my_new_bibdocfile.check(), True) #display value = my_new_bibdocfile.display(ln='en') assert 'files/img_test.jpg?version=1">' in value #hidden? self.assertEqual(my_new_bibdocfile.hidden_p(), False) #check modification date self.assertEqual(my_new_bibdocfile.md, timestamp) #delete my_new_bibdoc.delete() self.assertEqual(my_new_bibdoc.deleted_p(), True) class CheckBibDocAuthorizationTest(unittest.TestCase): """Regression tests for check_bibdoc_authorization function.""" def test_check_bibdoc_authorization(self): """bibdocfile - check_bibdoc_authorization function""" from invenio.webuser import collect_user_info, get_uid_from_email jekyll = collect_user_info(get_uid_from_email('jekyll@cds.cern.ch')) self.assertEqual(check_bibdoc_authorization(jekyll, 'role:thesesviewer'), (0, CFG_WEBACCESS_WARNING_MSGS[0])) self.assertEqual(check_bibdoc_authorization(jekyll, 'role: thesesviewer'), (0, CFG_WEBACCESS_WARNING_MSGS[0])) self.assertEqual(check_bibdoc_authorization(jekyll, 'role: thesesviewer'), (0, CFG_WEBACCESS_WARNING_MSGS[0])) self.assertEqual(check_bibdoc_authorization(jekyll, 'Role: thesesviewer'), (0, CFG_WEBACCESS_WARNING_MSGS[0])) self.assertEqual(check_bibdoc_authorization(jekyll, 'email: jekyll@cds.cern.ch'), (0, CFG_WEBACCESS_WARNING_MSGS[0])) self.assertEqual(check_bibdoc_authorization(jekyll, 'email: jekyll@cds.cern.ch'), (0, CFG_WEBACCESS_WARNING_MSGS[0])) juliet = collect_user_info(get_uid_from_email('juliet.capulet@cds.cern.ch')) self.assertEqual(check_bibdoc_authorization(juliet, 'restricted_picture'), (0, CFG_WEBACCESS_WARNING_MSGS[0])) self.assertEqual(check_bibdoc_authorization(juliet, 'status: restricted_picture'), (0, CFG_WEBACCESS_WARNING_MSGS[0])) self.assertNotEqual(check_bibdoc_authorization(juliet, 'restricted_video')[0], 0) self.assertNotEqual(check_bibdoc_authorization(juliet, 'status: restricted_video')[0], 0) class BibDocFileURLTest(unittest.TestCase): """Regression tests for bibdocfile_url_p function.""" def test_bibdocfile_url_p(self): self.failUnless(bibdocfile_url_p(CFG_SITE_URL + '/%s/98/files/9709037.pdf' % CFG_SITE_RECORD)) self.failUnless(bibdocfile_url_p(CFG_SITE_URL + '/%s/098/files/9709037.pdf' % CFG_SITE_RECORD)) TEST_SUITE = make_test_suite(BibRecDocsTest, BibDocsTest, BibDocFilesTest, CheckBibDocAuthorizationTest, BibDocFileURLTest, BibDocFsInfoTest) if __name__ == "__main__": run_test_suite(TEST_SUITE, warn_user=True) diff --git a/modules/bibdocfile/lib/bibdocfile_templates.py b/modules/bibdocfile/lib/bibdocfile_templates.py new file mode 100644 index 000000000..66221d4e0 --- /dev/null +++ b/modules/bibdocfile/lib/bibdocfile_templates.py @@ -0,0 +1,238 @@ +## This file is part of Invenio. +## Copyright (C) 2012 CERN. +## +## Invenio is free software; you can redistribute it and/or +## modify it under the terms of the GNU General Public License as +## published by the Free Software Foundation; either version 2 of the +## License, or (at your option) any later version. +## +## Invenio is distributed in the hope that it will be useful, but +## WITHOUT ANY WARRANTY; without even the implied warranty of +## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +## General Public License for more details. +## +## You should have received a copy of the GNU General Public License +## along with Invenio; if not, write to the Free Software Foundation, Inc., +## 59 Temple Place, Suite 330, Boston, MA 02111-1307, USA. + +import urllib +import cgi + +from invenio.config import CFG_SITE_URL, \ + CFG_SITE_LANG, CFG_SITE_RECORD, CFG_INSPIRE_SITE +from invenio.messages import gettext_set_language +from invenio.dateutils import convert_datestruct_to_dategui +from invenio.urlutils import create_html_link + +class Template: + + # Parameters allowed in the web interface for fetching files + files_default_urlargd = { + 'version': (str, ""), # version "" means "latest" + 'docname': (str, ""), # the docname (optional) + 'format' : (str, ""), # the format + 'verbose' : (int, 0), # the verbosity + 'subformat': (str, ""), # the subformat + 'download': (int, 0), # download as attachment + } + + def tmpl_filelist(self, ln, filelist='', recid='', docname='', version=''): + """ + Displays the file list for a record. + + Parameters: + + - 'ln' *string* - The language to display the interface in + + - 'recid' *int* - The record id + + - 'docname' *string* - The document name + + - 'version' *int* - The version of the document + + - 'filelist' *string* - The HTML string of the filelist (produced by the BibDoc classes) + """ + + # load the right message language + _ = gettext_set_language(ln) + + title = _("record") + ' #' + '%s' % (CFG_SITE_URL, CFG_SITE_RECORD, recid, recid) + if docname != "": + title += ' ' + _("document") + ' #' + str(docname) + if version != "": + title += ' ' + _("version") + ' #' + str(version) + + out = """
+ + %s +
+ """ % (filelist) + + return out + + def tmpl_bibrecdoc_filelist(self, ln, types, verbose_files=''): + """ + Displays the file list for a record. + + Parameters: + + - 'ln' *string* - The language to display the interface in + + - 'types' *array* - The different types to display, each record in the format: + + - 'name' *string* - The name of the format + + - 'content' *array of string* - The HTML code produced by tmpl_bibdoc_filelist, for the right files + + - 'verbose_files' - A string representing in a verbose way the + file information. + """ + + # load the right message language + _ = gettext_set_language(ln) + + out = "" + for mytype in types: + if mytype['name']: + if not (CFG_INSPIRE_SITE and mytype['name'] == 'INSPIRE-PUBLIC'): + out += "%s %s:" % (mytype['name'], _("file(s)")) + out += "
    " + for content in mytype['content']: + out += content + out += "
" + if verbose_files: + out += "
%s
" % verbose_files + return out + + def tmpl_bibdoc_filelist(self, ln, versions=None, imageurl='', recid='', docname='', status=''): + """ + Displays the file list for a record. + + Parameters: + + - 'ln' *string* - The language to display the interface in + + - 'versions' *array* - The different versions to display, each record in the format: + + - 'version' *string* - The version + + - 'content' *string* - The HTML code produced by tmpl_bibdocfile_filelist, for the right file + + - 'previous' *bool* - If the file has previous versions + + - 'imageurl' *string* - The URL to the file image + + - 'recid' *int* - The record id + + - 'docname' *string* - The name of the document + + - 'status' *string* - The status of a document + """ + if versions is None: + versions = [] + + # load the right message language + _ = gettext_set_language(ln) + + out = """ + %(restriction_label)s + + + """ % { + 'imageurl' : imageurl, + 'docname' : docname, + 'restriction_label': status and ('' % _('Restricted')) or '' + } + for version in versions: + if version['previous']: + versiontext = """
(%(see)s %(previous)s)""" % { + 'see' : _("see"), + 'siteurl' : CFG_SITE_URL, + 'CFG_SITE_RECORD': CFG_SITE_RECORD, + 'docname' : urllib.quote(docname), + 'recID': recid, + 'previous': _("previous"), + 'ln_link': (ln != CFG_SITE_LANG and '&ln=' + ln) or '', + } + else: + versiontext = "" + out += """ + + " + out += "" + return out + + def tmpl_bibdocfile_filelist(self, ln, recid, name, version, md, superformat, subformat, nice_size, description): + """ + Displays a file in the file list. + + Parameters: + + - 'ln' *string* - The language to display the interface in + + - 'recid' *int* - The id of the record + + - 'name' *string* - The name of the file + + - 'version' *string* - The version + + - 'md' *datetime* - the modification date + + - 'superformat' *string* - The display superformat + + - 'subformat' *string* - The display subformat + + - 'nice_size' *string* - The nice_size of the file + + - 'description' *string* - The description that might have been associated + to the particular file + """ + + # load the right message language + _ = gettext_set_language(ln) + + urlbase = '%s/%s/%s/files/%s' % ( + CFG_SITE_URL, + CFG_SITE_RECORD, + recid, + '%s%s' % (name, superformat)) + + urlargd = {'version' : version} + if subformat: + urlargd['subformat'] = subformat + + link_label = '%s%s' % (name, superformat) + if subformat: + link_label += ' (%s)' % subformat + + link = create_html_link(urlbase, urlargd, cgi.escape(link_label)) + + return """ + + %(link)s + + + [%(nice_size)s] + %(md)s + + %(description)s + """ % { + 'link' : link, + 'nice_size' : nice_size, + 'md' : convert_datestruct_to_dategui(md.timetuple(), ln), + 'description' : cgi.escape(description), + } + diff --git a/modules/bibdocfile/lib/bibdocfile_webinterface.py b/modules/bibdocfile/lib/bibdocfile_webinterface.py new file mode 100644 index 000000000..74a11bec4 --- /dev/null +++ b/modules/bibdocfile/lib/bibdocfile_webinterface.py @@ -0,0 +1,530 @@ +## This file is part of Invenio. +## Copyright (C) 2012 CERN. +## +## Invenio is free software; you can redistribute it and/or +## modify it under the terms of the GNU General Public License as +## published by the Free Software Foundation; either version 2 of the +## License, or (at your option) any later version. +## +## Invenio is distributed in the hope that it will be useful, but +## WITHOUT ANY WARRANTY; without even the implied warranty of +## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +## General Public License for more details. +## +## You should have received a copy of the GNU General Public License +## along with Invenio; if not, write to the Free Software Foundation, Inc., +## 59 Temple Place, Suite 330, Boston, MA 02111-1307, USA. + +import os +import time +import shutil + +from invenio.config import \ + CFG_ACCESS_CONTROL_LEVEL_SITE, \ + CFG_SITE_LANG, \ + CFG_TMPSHAREDDIR, \ + CFG_SITE_URL, \ + CFG_SITE_SECURE_URL, \ + CFG_WEBSUBMIT_STORAGEDIR, \ + CFG_SITE_RECORD +from invenio.bibdocfile_config import CFG_BIBDOCFILE_DOCUMENT_FILE_MANAGER_DOCTYPES, \ + CFG_BIBDOCFILE_DOCUMENT_FILE_MANAGER_MISC, \ + CFG_BIBDOCFILE_DOCUMENT_FILE_MANAGER_RESTRICTIONS, \ + CFG_BIBDOCFILE_ICON_SUBFORMAT_RE +from invenio import webinterface_handler_config as apache +from invenio.access_control_config import VIEWRESTRCOLL +from invenio.access_control_mailcookie import mail_cookie_create_authorize_action +from invenio.access_control_engine import acc_authorize_action +from invenio.access_control_admin import acc_is_role +from invenio.webpage import page, pageheaderonly, \ + pagefooteronly +from invenio.webuser import getUid, page_not_authorized, collect_user_info, isUserSuperAdmin, \ + isGuestUser +from invenio import webjournal_utils +from invenio.webinterface_handler import wash_urlargd, WebInterfaceDirectory +from invenio.urlutils import make_canonical_urlargd, redirect_to_url +from invenio.messages import gettext_set_language +from invenio.search_engine import \ + guess_primary_collection_of_a_record, get_colID, record_exists, \ + create_navtrail_links, check_user_can_view_record, record_empty, \ + is_user_owner_of_record, print_warning +from invenio.bibdocfile import BibRecDocs, normalize_format, file_strip_ext, \ + stream_restricted_icon, BibDoc, InvenioBibDocFileError, \ + get_subformat_from_format +from invenio.errorlib import register_exception +from invenio.websearchadminlib import get_detailed_page_tabs +import invenio.template +bibdocfile_templates = invenio.template.load('bibdocfile') +webstyle_templates = invenio.template.load('webstyle') +websubmit_templates = invenio.template.load('websubmit') +websearch_templates = invenio.template.load('websearch') + +from invenio.bibdocfile_managedocfiles import \ + create_file_upload_interface, \ + get_upload_file_interface_javascript, \ + get_upload_file_interface_css, \ + move_uploaded_files_to_storage + + +class WebInterfaceFilesPages(WebInterfaceDirectory): + + def __init__(self, recid): + self.recid = recid + + def _lookup(self, component, path): + # after ///files/ every part is used as the file + # name + filename = component + + def getfile(req, form): + args = wash_urlargd(form, bibdocfile_templates.files_default_urlargd) + ln = args['ln'] + + _ = gettext_set_language(ln) + + uid = getUid(req) + user_info = collect_user_info(req) + + verbose = args['verbose'] + if verbose >= 1 and not isUserSuperAdmin(user_info): + # Only SuperUser can see all the details! + verbose = 0 + + if uid == -1 or CFG_ACCESS_CONTROL_LEVEL_SITE > 1: + return page_not_authorized(req, "/%s/%s" % (CFG_SITE_RECORD, self.recid), + navmenuid='submit') + + if record_exists(self.recid) < 1: + msg = "

%s

" % _("Requested record does not seem to exist.") + return print_warning(req, msg) + + if record_empty(self.recid): + msg = "

%s

" % _("Requested record does not seem to have been integrated.") + return print_warning(req, msg) + + (auth_code, auth_message) = check_user_can_view_record(user_info, self.recid) + if auth_code and user_info['email'] == 'guest': + if webjournal_utils.is_recid_in_released_issue(self.recid): + # We can serve the file + pass + else: + cookie = mail_cookie_create_authorize_action(VIEWRESTRCOLL, {'collection' : guess_primary_collection_of_a_record(self.recid)}) + target = '/youraccount/login' + \ + make_canonical_urlargd({'action': cookie, 'ln' : ln, 'referer' : \ + CFG_SITE_SECURE_URL + user_info['uri']}, {}) + return redirect_to_url(req, target, norobot=True) + elif auth_code: + if webjournal_utils.is_recid_in_released_issue(self.recid): + # We can serve the file + pass + else: + return page_not_authorized(req, "../", \ + text = auth_message) + + + readonly = CFG_ACCESS_CONTROL_LEVEL_SITE == 1 + + # From now on: either the user provided a specific file + # name (and a possible version), or we return a list of + # all the available files. In no case are the docids + # visible. + try: + bibarchive = BibRecDocs(self.recid) + except InvenioBibDocFileError, e: + register_exception(req=req, alert_admin=True) + msg = "

%s

%s

" % ( + _("The system has encountered an error in retrieving the list of files for this document."), + _("The error has been logged and will be taken in consideration as soon as possible.")) + return print_warning(req, msg) + + if bibarchive.deleted_p(): + return print_warning(req, _("Requested record does not seem to exist.")) + + docname = '' + format = '' + version = '' + warn = '' + + if filename: + # We know the complete file name, guess which docid it + # refers to + ## TODO: Change the extension system according to ext.py from setlink + ## and have a uniform extension mechanism... + docname = file_strip_ext(filename) + format = filename[len(docname):] + if format and format[0] != '.': + format = '.' + format + if args['subformat']: + format += ';%s' % args['subformat'] + else: + docname = args['docname'] + + if not format: + format = args['format'] + if args['subformat']: + format += ';%s' % args['subformat'] + + if not version: + version = args['version'] + + ## Download as attachment + is_download = False + if args['download']: + is_download = True + + # version could be either empty, or all or an integer + try: + int(version) + except ValueError: + if version != 'all': + version = '' + + display_hidden = isUserSuperAdmin(user_info) + + if version != 'all': + # search this filename in the complete list of files + for doc in bibarchive.list_bibdocs(): + if docname == doc.get_docname(): + try: + docfile = doc.get_file(format, version) + (auth_code, auth_message) = docfile.is_restricted(user_info) + if auth_code != 0 and not is_user_owner_of_record(user_info, self.recid): + if CFG_BIBDOCFILE_ICON_SUBFORMAT_RE.match(get_subformat_from_format(format)): + return stream_restricted_icon(req) + if user_info['email'] == 'guest': + cookie = mail_cookie_create_authorize_action('viewrestrdoc', {'status' : docfile.get_status()}) + target = '/youraccount/login' + \ + make_canonical_urlargd({'action': cookie, 'ln' : ln, 'referer' : \ + CFG_SITE_SECURE_URL + user_info['uri']}, {}) + redirect_to_url(req, target) + else: + req.status = apache.HTTP_UNAUTHORIZED + warn += print_warning(req, _("This file is restricted: ") + auth_message) + break + + if not docfile.hidden_p(): + if not readonly: + ip = str(req.remote_ip) + res = doc.register_download(ip, version, format, uid) + try: + return docfile.stream(req, download=is_download) + except InvenioBibDocFileError, msg: + register_exception(req=req, alert_admin=True) + req.status = apache.HTTP_INTERNAL_SERVER_ERROR + return print_warning(req, _("An error has happened in trying to stream the request file.")) + else: + req.status = apache.HTTP_UNAUTHORIZED + warn = print_warning(req, _("The requested file is hidden and can not be accessed.")) + + except InvenioBibDocFileError, msg: + register_exception(req=req, alert_admin=True) + + if docname and format and not warn: + req.status = apache.HTTP_NOT_FOUND + warn += print_warning(req, _("Requested file does not seem to exist.")) + filelist = bibarchive.display("", version, ln=ln, verbose=verbose, display_hidden=display_hidden) + + t = warn + bibdocfile_templates.tmpl_filelist( + ln=ln, + recid=self.recid, + docname=args['docname'], + version=version, + filelist=filelist) + + cc = guess_primary_collection_of_a_record(self.recid) + unordered_tabs = get_detailed_page_tabs(get_colID(cc), self.recid, ln) + ordered_tabs_id = [(tab_id, values['order']) for (tab_id, values) in unordered_tabs.iteritems()] + ordered_tabs_id.sort(lambda x, y: cmp(x[1], y[1])) + link_ln = '' + if ln != CFG_SITE_LANG: + link_ln = '?ln=%s' % ln + tabs = [(unordered_tabs[tab_id]['label'], \ + '%s/%s/%s/%s%s' % (CFG_SITE_URL, CFG_SITE_RECORD, self.recid, tab_id, link_ln), \ + tab_id == 'files', + unordered_tabs[tab_id]['enabled']) \ + for (tab_id, order) in ordered_tabs_id + if unordered_tabs[tab_id]['visible'] == True] + top = webstyle_templates.detailed_record_container_top(self.recid, + tabs, + args['ln']) + bottom = webstyle_templates.detailed_record_container_bottom(self.recid, + tabs, + args['ln']) + title, description, keywords = websearch_templates.tmpl_record_page_header_content(req, self.recid, args['ln']) + return pageheaderonly(title=title, + navtrail=create_navtrail_links(cc=cc, aas=0, ln=ln) + \ + ''' > %s + > %s''' % \ + (CFG_SITE_URL, CFG_SITE_RECORD, self.recid, title, _("Access to Fulltext")), + + description=description, + keywords=keywords, + uid=uid, + language=ln, + req=req, + navmenuid='search', + navtrail_append_title_p=0) + \ + websearch_templates.tmpl_search_pagestart(ln) + \ + top + t + bottom + \ + websearch_templates.tmpl_search_pageend(ln) + \ + pagefooteronly(language=ln, req=req) + return getfile, [] + + def __call__(self, req, form): + """Called in case of URLs like /CFG_SITE_RECORD/123/files without + trailing slash. + """ + args = wash_urlargd(form, bibdocfile_templates.files_default_urlargd) + ln = args['ln'] + link_ln = '' + if ln != CFG_SITE_LANG: + link_ln = '?ln=%s' % ln + + return redirect_to_url(req, '%s/%s/%s/files/%s' % (CFG_SITE_URL, CFG_SITE_RECORD, self.recid, link_ln)) + +def bibdocfile_legacy_getfile(req, form): + """ Handle legacy /getfile.py URLs """ + + args = wash_urlargd(form, { + 'recid': (int, 0), + 'docid': (int, 0), + 'version': (str, ''), + 'name': (str, ''), + 'format': (str, ''), + 'ln' : (str, CFG_SITE_LANG) + }) + + _ = gettext_set_language(args['ln']) + + def _getfile_py(req, recid=0, docid=0, version="", name="", format="", ln=CFG_SITE_LANG): + if not recid: + ## Let's obtain the recid from the docid + if docid: + try: + bibdoc = BibDoc(docid=docid) + recid = bibdoc.get_recid() + except InvenioBibDocFileError, e: + return print_warning(req, _("An error has happened in trying to retrieve the requested file.")) + else: + return print_warning(req, _('Not enough information to retrieve the document')) + else: + if not name and docid: + ## Let's obtain the name from the docid + try: + bibdoc = BibDoc(docid) + name = bibdoc.get_docname() + except InvenioBibDocFileError, e: + return print_warning(req, _("An error has happened in trying to retrieving the requested file.")) + + format = normalize_format(format) + + redirect_to_url(req, '%s/%s/%s/files/%s%s?ln=%s%s' % (CFG_SITE_URL, CFG_SITE_RECORD, recid, name, format, ln, version and 'version=%s' % version or ''), apache.HTTP_MOVED_PERMANENTLY) + + return _getfile_py(req, **args) + + +# -------------------------------------------------- + +class WebInterfaceManageDocFilesPages(WebInterfaceDirectory): + + _exports = ['', 'managedocfiles', 'managedocfilesasync'] + + def managedocfiles(self, req, form): + """ + Display admin interface to manage files of a record + """ + argd = wash_urlargd(form, { + 'ln': (str, ''), + 'access': (str, ''), + 'recid': (int, None), + 'do': (int, 0), + 'cancel': (str, None), + }) + + _ = gettext_set_language(argd['ln']) + uid = getUid(req) + user_info = collect_user_info(req) + # Check authorization + (auth_code, auth_msg) = acc_authorize_action(req, + 'runbibdocfile') + if auth_code and user_info['email'] == 'guest': + # Ask to login + target = '/youraccount/login' + \ + make_canonical_urlargd({'ln' : argd['ln'], + 'referer' : CFG_SITE_SECURE_URL + user_info['uri']}, {}) + return redirect_to_url(req, target) + elif auth_code: + return page_not_authorized(req, referer="/%s/managedocfiles" % CFG_SITE_RECORD, + uid=uid, text=auth_msg, + ln=argd['ln'], + navmenuid="admin") + + # Prepare navtrail + navtrail = '''Admin Area > %(manage_files)s''' \ + % {'CFG_SITE_URL': CFG_SITE_URL, + 'manage_files': _("Manage Document Files")} + + body = '' + if argd['do'] != 0 and not argd['cancel']: + # Apply modifications + working_dir = os.path.join(CFG_TMPSHAREDDIR, + 'websubmit_upload_interface_config_' + str(uid), + argd['access']) + move_uploaded_files_to_storage(working_dir=working_dir, + recid=argd['recid'], + icon_sizes=['180>','700>'], + create_icon_doctypes=['*'], + force_file_revision=False) + # Clean temporary directory + shutil.rmtree(working_dir) + + # Confirm modifications + body += '

%s

' % \ + (_('Your modifications to record #%i have been submitted') % argd['recid']) + elif argd['cancel']: + # Clean temporary directory + working_dir = os.path.join(CFG_TMPSHAREDDIR, + 'websubmit_upload_interface_config_' + str(uid), + argd['access']) + shutil.rmtree(working_dir) + body += '

%s

' % \ + (_('Your modifications to record #%i have been cancelled') % argd['recid']) + + if not argd['recid'] or argd['do'] != 0: + body += ''' +
+ + + +
+ ''' % {'edit': _('Edit'), + 'edit_record': _('Edit record'), + 'CFG_SITE_URL': CFG_SITE_URL, + 'CFG_SITE_RECORD': CFG_SITE_RECORD} + + access = time.strftime('%Y%m%d_%H%M%S') + if argd['recid'] and argd['do'] == 0: + # Displaying interface to manage files + # Prepare navtrail + title, description, keywords = websearch_templates.tmpl_record_page_header_content(req, argd['recid'], + argd['ln']) + navtrail = '''Admin Area > + %(manage_files)s > + %(record)s: %(title)s + ''' \ + % {'CFG_SITE_URL': CFG_SITE_URL, + 'title': title, + 'manage_files': _("Document File Manager"), + 'record': _("Record #%i") % argd['recid'], + 'CFG_SITE_RECORD': CFG_SITE_RECORD} + + body += create_file_upload_interface(\ + recid=argd['recid'], + ln=argd['ln'], + uid=uid, + sbm_access=access, + display_hidden_files=True, + restrictions_and_desc=CFG_BIBDOCFILE_DOCUMENT_FILE_MANAGER_RESTRICTIONS, + doctypes_and_desc=CFG_BIBDOCFILE_DOCUMENT_FILE_MANAGER_DOCTYPES, + **CFG_BIBDOCFILE_DOCUMENT_FILE_MANAGER_MISC)[1] + + body += '''
+
+ + + + +
+ + +
''' % \ + {'apply_changes': _("Apply changes"), + 'cancel_changes': _("Cancel all changes"), + 'recid': argd['recid'], + 'access': access, + 'ln': argd['ln'], + 'CFG_SITE_URL': CFG_SITE_URL, + 'CFG_SITE_RECORD': CFG_SITE_RECORD} + + body += websubmit_templates.tmpl_page_do_not_leave_submission_js(argd['ln'], enabled=True) + + return page(title = _("Document File Manager") + (argd['recid'] and (': ' + _("Record #%i") % argd['recid']) or ''), + navtrail=navtrail, + navtrail_append_title_p=0, + metaheaderadd = get_upload_file_interface_javascript(form_url_params='?access='+access) + \ + get_upload_file_interface_css(), + body = body, + uid = uid, + language=argd['ln'], + req=req, + navmenuid='admin') + + def managedocfilesasync(self, req, form): + "Upload file and returns upload interface" + + argd = wash_urlargd(form, { + 'ln': (str, ''), + 'recid': (int, 1), + 'doctype': (str, ''), + 'access': (str, ''), + 'indir': (str, ''), + }) + + user_info = collect_user_info(req) + include_headers = False + # User submitted either through WebSubmit, or admin interface. + if form.has_key('doctype') and form.has_key('indir') \ + and form.has_key('access'): + # Submitted through WebSubmit. Check rights + include_headers = True + working_dir = os.path.join(CFG_WEBSUBMIT_STORAGEDIR, + argd['indir'], argd['doctype'], + argd['access']) + try: + assert(working_dir == os.path.abspath(working_dir)) + except AssertionError: + raise apache.SERVER_RETURN(apache.HTTP_UNAUTHORIZED) + try: + # Retrieve recid from working_dir, safer. + recid_fd = file(os.path.join(working_dir, 'SN')) + recid = int(recid_fd.read()) + recid_fd.close() + except: + recid = "" + try: + act_fd = file(os.path.join(working_dir, 'act')) + action = act_fd.read() + act_fd.close() + except: + action = "" + + # Is user authorized to perform this action? + (auth_code, auth_msg) = acc_authorize_action(user_info, + "submit", + authorized_if_no_roles=not isGuestUser(getUid(req)), + doctype=argd['doctype'], + act=action) + if not acc_is_role("submit", doctype=argd['doctype'], act=action): + # There is NO authorization plugged. User should have access + auth_code = 0 + else: + # User must be allowed to attach files + (auth_code, auth_msg) = acc_authorize_action(user_info, + 'runbibdocfile') + recid = argd['recid'] + + if auth_code: + raise apache.SERVER_RETURN(apache.HTTP_UNAUTHORIZED) + + return create_file_upload_interface(recid=recid, + ln=argd['ln'], + print_outside_form_tag=False, + print_envelope=False, + form=form, + include_headers=include_headers, + sbm_indir=argd['indir'], + sbm_access=argd['access'], + sbm_doctype=argd['doctype'], + uid=user_info['uid'])[1] + + __call__ = managedocfiles diff --git a/modules/websubmit/lib/bibdocfilecli.py b/modules/bibdocfile/lib/bibdocfilecli.py similarity index 99% rename from modules/websubmit/lib/bibdocfilecli.py rename to modules/bibdocfile/lib/bibdocfilecli.py index b32385c2a..391c6a68c 100644 --- a/modules/websubmit/lib/bibdocfilecli.py +++ b/modules/bibdocfile/lib/bibdocfilecli.py @@ -1,1190 +1,1190 @@ # -*- coding: utf-8 -*- ## ## This file is part of Invenio. -## Copyright (C) 2008, 2009, 2010, 2011 CERN. +## Copyright (C) 2008, 2009, 2010, 2011, 2012 CERN. ## ## Invenio is free software; you can redistribute it and/or ## modify it under the terms of the GNU General Public License as ## published by the Free Software Foundation; either version 2 of the ## License, or (at your option) any later version. ## ## Invenio is distributed in the hope that it will be useful, but ## WITHOUT ANY WARRANTY; without even the implied warranty of ## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ## General Public License for more details. ## ## You should have received a copy of the GNU General Public License ## along with Invenio; if not, write to the Free Software Foundation, Inc., ## 59 Temple Place, Suite 330, Boston, MA 02111-1307, USA. """ BibDocAdmin CLI administration tool """ __revision__ = "$Id$" import sys import re import os import time import fnmatch import time from datetime import datetime from logging import getLogger, debug, DEBUG from optparse import OptionParser, OptionGroup, OptionValueError from tempfile import mkstemp from invenio.errorlib import register_exception -from invenio.config import CFG_TMPDIR, CFG_SITE_URL, CFG_WEBSUBMIT_FILEDIR, \ +from invenio.config import CFG_TMPDIR, CFG_SITE_URL, CFG_BIBDOCFILE_FILEDIR, \ CFG_SITE_RECORD, CFG_TMPSHAREDDIR -from invenio.bibdocfile import BibRecDocs, BibDoc, InvenioWebSubmitFileError, \ +from invenio.bibdocfile import BibRecDocs, BibDoc, InvenioBibDocFileError, \ nice_size, check_valid_url, clean_url, get_docname_from_url, \ guess_format_from_url, KEEP_OLD_VALUE, decompose_bibdocfile_fullpath, \ bibdocfile_url_to_bibdoc, decompose_bibdocfile_url, CFG_BIBDOCFILE_AVAILABLE_FLAGS from invenio.intbitset import intbitset from invenio.search_engine import perform_request_search from invenio.textutils import wrap_text_in_a_box, wait_for_user from invenio.dbquery import run_sql from invenio.bibtask import task_low_level_submission from invenio.textutils import encode_for_xml from invenio.websubmit_file_converter import can_perform_ocr def _xml_mksubfield(key, subfield, fft): return fft.get(key, None) is not None and '\t\t%s\n' % (subfield, encode_for_xml(str(fft[key]))) or '' def _xml_mksubfields(key, subfield, fft): ret = "" for value in fft.get(key, []): ret += '\t\t%s\n' % (subfield, encode_for_xml(str(value))) return ret def _xml_fft_creator(fft): """Transform an fft dictionary (made by keys url, docname, format, new_docname, comment, description, restriction, doctype, into an xml string.""" debug('Input FFT structure: %s' % fft) out = '\t\n' out += _xml_mksubfield('url', 'a', fft) out += _xml_mksubfield('docname', 'n', fft) out += _xml_mksubfield('format', 'f', fft) out += _xml_mksubfield('new_docname', 'm', fft) out += _xml_mksubfield('doctype', 't', fft) out += _xml_mksubfield('description', 'd', fft) out += _xml_mksubfield('comment', 'z', fft) out += _xml_mksubfield('restriction', 'r', fft) out += _xml_mksubfields('options', 'o', fft) out += _xml_mksubfield('version', 'v', fft) out += '\t\n' debug('FFT created: %s' % out) return out def ffts_to_xml(ffts_dict): """Transform a dictionary: recid -> ffts where ffts is a list of fft dictionary into xml. """ debug('Input FFTs dictionary: %s' % ffts_dict) out = '' recids = ffts_dict.keys() recids.sort() for recid in recids: ffts = ffts_dict[recid] if ffts: out += '\n' out += '\t%i\n' % recid for fft in ffts: out += _xml_fft_creator(fft) out += '\n' debug('MARC to Upload: %s' % out) return out _shift_re = re.compile("([-\+]{0,1})([\d]+)([dhms])") def _parse_datetime(var): """Returns a date string according to the format string. It can handle normal date strings and shifts with respect to now.""" if not var: return None date = time.time() factors = {"d":24*3600, "h":3600, "m":60, "s":1} m = _shift_re.match(var) if m: sign = m.groups()[0] == "-" and -1 or 1 factor = factors[m.groups()[2]] value = float(m.groups()[1]) return datetime.fromtimestamp(date + sign * factor * value) else: return datetime(*(time.strptime(var, "%Y-%m-%d %H:%M:%S")[0:6])) # The code above is Python 2.4 compatible. The following is the 2.5 # version. # return datetime.strptime(var, "%Y-%m-%d %H:%M:%S") def _parse_date_range(var): """Returns the two dates contained as a low,high tuple""" limits = var.split(",") if len(limits)==1: low = _parse_datetime(limits[0]) return low, None if len(limits)==2: low = _parse_datetime(limits[0]) high = _parse_datetime(limits[1]) return low, high return None, None def cli_quick_match_all_recids(options): """Return an quickly an approximate but (by excess) list of good recids.""" url = getattr(options, 'url', None) if url: return intbitset([decompose_bibdocfile_url(url)[0]]) path = getattr(options, 'path', None) if path: return intbitset([decompose_bibdocfile_fullpath(path)[0]]) collection = getattr(options, 'collection', None) pattern = getattr(options, 'pattern', None) recids = getattr(options, 'recids', None) md_rec = getattr(options, 'md_rec', None) cd_rec = getattr(options, 'cd_rec', None) tmp_date_query = [] tmp_date_params = [] if recids is None: debug('Initially considering all the recids') recids = intbitset(run_sql('SELECT id FROM bibrec')) if not recids: print >> sys.stderr, 'WARNING: No record in the database' if md_rec[0] is not None: tmp_date_query.append('modification_date>=%s') tmp_date_params.append(md_rec[0]) if md_rec[1] is not None: tmp_date_query.append('modification_date<=%s') tmp_date_params.append(md_rec[1]) if cd_rec[0] is not None: tmp_date_query.append('creation_date>=%s') tmp_date_params.append(cd_rec[0]) if cd_rec[1] is not None: tmp_date_query.append('creation_date<=%s') tmp_date_params.append(cd_rec[1]) if tmp_date_query: tmp_date_query = ' AND '.join(tmp_date_query) tmp_date_params = tuple(tmp_date_params) query = 'SELECT id FROM bibrec WHERE %s' % tmp_date_query debug('Query: %s, param: %s' % (query, tmp_date_params)) recids &= intbitset(run_sql(query % tmp_date_query, tmp_date_params)) debug('After applying dates we obtain recids: %s' % recids) if not recids: print >> sys.stderr, 'WARNING: Time constraints for records are too strict' if collection or pattern: recids &= intbitset(perform_request_search(cc=collection or '', p=pattern or '')) debug('After applyings pattern and collection we obtain recids: %s' % recids) debug('Quick recids: %s' % recids) return recids def cli_quick_match_all_docids(options, recids=None): """Return an quickly an approximate but (by excess) list of good docids.""" url = getattr(options, 'url', None) if url: return intbitset([bibdocfile_url_to_bibdoc(url).get_id()]) path = getattr(options, 'path', None) if path: return intbitset([decompose_bibdocfile_fullpath(path)[0]]) deleted_docs = getattr(options, 'deleted_docs', None) action_undelete = getattr(options, 'action', None) == 'undelete' docids = getattr(options, 'docids', None) md_doc = getattr(options, 'md_doc', None) cd_doc = getattr(options, 'cd_doc', None) if docids is None: debug('Initially considering all the docids') if recids is None: recids = cli_quick_match_all_recids(options) docids = intbitset() for id_bibrec, id_bibdoc in run_sql('SELECT id_bibrec, id_bibdoc FROM bibrec_bibdoc'): if id_bibrec in recids: docids.add(id_bibdoc) else: debug('Initially considering this docids: %s' % docids) tmp_query = [] tmp_params = [] if deleted_docs is None and action_undelete: deleted_docs = 'only' if deleted_docs == 'no': tmp_query.append('status<>"DELETED"') elif deleted_docs == 'only': tmp_query.append('status="DELETED"') if md_doc[0] is not None: tmp_query.append('modification_date>=%s') tmp_params.append(md_doc[0]) if md_doc[1] is not None: tmp_query.append('modification_date<=%s') tmp_params.append(md_doc[1]) if cd_doc[0] is not None: tmp_query.append('creation_date>=%s') tmp_params.append(cd_doc[0]) if cd_doc[1] is not None: tmp_query.append('creation_date<=%s') tmp_params.append(cd_doc[1]) if tmp_query: tmp_query = ' AND '.join(tmp_query) tmp_params = tuple(tmp_params) query = 'SELECT id FROM bibdoc WHERE %s' % tmp_query debug('Query: %s, param: %s' % (query, tmp_params)) docids &= intbitset(run_sql(query, tmp_params)) debug('After applying dates we obtain docids: %s' % docids) return docids def cli_slow_match_single_recid(options, recid, recids=None, docids=None): """Apply all the given queries in order to assert wethever a recid match or not. if with_docids is True, the recid is matched if it has at least one docid that is matched""" debug('cli_slow_match_single_recid checking: %s' % recid) deleted_docs = getattr(options, 'deleted_docs', None) deleted_recs = getattr(options, 'deleted_recs', None) empty_recs = getattr(options, 'empty_recs', None) docname = cli2docname(options) bibrecdocs = BibRecDocs(recid, deleted_too=(deleted_docs != 'no')) if bibrecdocs.deleted_p() and (deleted_recs == 'no'): return False elif not bibrecdocs.deleted_p() and (deleted_recs != 'only'): if docids: for bibdoc in bibrecdocs.list_bibdocs(): if bibdoc.get_id() in docids: break else: return False if docname: for other_docname in bibrecdocs.get_bibdoc_names(): if docname and fnmatch.fnmatchcase(other_docname, docname): break else: return False if bibrecdocs.empty_p() and (empty_recs != 'no'): return True elif not bibrecdocs.empty_p() and (empty_recs != 'only'): return True return False def cli_slow_match_single_docid(options, docid, recids=None, docids=None): """Apply all the given queries in order to assert wethever a recid match or not.""" debug('cli_slow_match_single_docid checking: %s' % docid) empty_docs = getattr(options, 'empty_docs', None) docname = cli2docname(options) if recids is None: recids = cli_quick_match_all_recids(options) bibdoc = BibDoc(docid) if docname and not fnmatch.fnmatchcase(bibdoc.get_docname(), docname): debug('docname %s does not match the pattern %s' % (repr(bibdoc.get_docname()), repr(docname))) return False elif bibdoc.get_recid() and bibdoc.get_recid() not in recids: debug('recid %s is not in pattern %s' % (repr(bibdoc.get_recid()), repr(recids))) return False elif empty_docs == 'no' and bibdoc.empty_p(): debug('bibdoc is empty') return False elif empty_docs == 'only' and not bibdoc.empty_p(): debug('bibdoc is not empty') return False else: return True def cli2recid(options, recids=None, docids=None): """Given the command line options return a recid.""" recids = list(cli_recids_iterator(options, recids=recids, docids=docids)) if len(recids) == 1: return recids[0] if recids: raise StandardError, "More than one recid has been matched: %s" % recids else: raise StandardError, "No recids matched" def cli2docid(options, recids=None, docids=None): """Given the command line options return a docid.""" docids = list(cli_docids_iterator(options, recids=recids, docids=docids)) if len(docids) == 1: return docids[0] if docids: raise StandardError, "More than one docid has been matched: %s" % docids else: raise StandardError, "No docids matched" def cli2flags(options): """ Transform a comma separated list of flags into a list of valid flags. """ flags = getattr(options, 'flags', None) if flags: flags = [flag.strip().upper() for flag in flags.split(',')] for flag in flags: if flag not in CFG_BIBDOCFILE_AVAILABLE_FLAGS: raise StandardError("%s is not among the valid flags: %s" % (flag, ', '.join(CFG_BIBDOCFILE_AVAILABLE_FLAGS))) return flags return [] def cli2description(options): """Return a good value for the description.""" description = getattr(options, 'set_description', None) if description is None: description = KEEP_OLD_VALUE return description def cli2restriction(options): """Return a good value for the restriction.""" restriction = getattr(options, 'set_restriction', None) if restriction is None: restriction = KEEP_OLD_VALUE return restriction def cli2comment(options): """Return a good value for the comment.""" comment = getattr(options, 'set_comment', None) if comment is None: comment = KEEP_OLD_VALUE return comment def cli2doctype(options): """Return a good value for the doctype.""" doctype = getattr(options, 'set_doctype', None) if not doctype: return 'Main' return doctype def cli2docname(options, docid=None, url=None): """Given the command line options and optional precalculated docid returns the corresponding docname.""" if docid: bibdoc = BibDoc(docid=docid) return bibdoc.get_docname() docname = getattr(options, 'docname', None) if docname is not None: return docname if url is not None: return get_docname_from_url(url) else: return None def cli2format(options, url=None): """Given the command line options returns the corresponding format.""" format = getattr(options, 'format', None) if format is not None: return format elif url is not None: ## FIXME: to deploy once conversion-tools branch is merged #return guess_format_from_url(url) return guess_format_from_url(url) else: raise OptionValueError("Not enough information to retrieve a valid format") def cli_recids_iterator(options, recids=None, docids=None): """Slow iterator over all the matched recids. if with_docids is True, the recid must be attached to at least a matched docid""" debug('cli_recids_iterator') if recids is None: recids = cli_quick_match_all_recids(options) debug('working on recids: %s, docids: %s' % (recids, docids)) for recid in recids: if cli_slow_match_single_recid(options, recid, recids, docids): yield recid raise StopIteration def cli_docids_iterator(options, recids=None, docids=None): """Slow iterator over all the matched docids.""" if recids is None: recids = cli_quick_match_all_recids(options) if docids is None: docids = cli_quick_match_all_docids(options, recids) for docid in docids: if cli_slow_match_single_docid(options, docid, recids, docids): yield docid raise StopIteration def cli_get_stats(dummy): """Print per every collection some stats""" def print_table(title, table): if table: print "=" * 20, title, "=" * 20 for row in table: print "\t".join(str(elem) for elem in row) for collection, reclist in run_sql("SELECT name, reclist FROM collection ORDER BY name"): print "-" * 79 print "Statistic for: %s " % collection reclist = intbitset(reclist) if reclist: sqlreclist = "(" + ','.join(str(elem) for elem in reclist) + ')' print_table("Formats", run_sql("SELECT COUNT(format) as c, format FROM bibrec_bibdoc AS bb JOIN bibdocfsinfo AS fs ON bb.id_bibdoc=fs.id_bibdoc WHERE id_bibrec in %s AND last_version=true GROUP BY format ORDER BY c DESC" % sqlreclist)) # kwalitee: disable=sql print_table("Mimetypes", run_sql("SELECT COUNT(mime) as c, mime FROM bibrec_bibdoc AS bb JOIN bibdocfsinfo AS fs ON bb.id_bibdoc=fs.id_bibdoc WHERE id_bibrec in %s AND last_version=true GROUP BY mime ORDER BY c DESC" % sqlreclist)) # kwalitee: disable=sql print_table("Sizes", run_sql("SELECT SUM(filesize) AS c FROM bibrec_bibdoc AS bb JOIN bibdocfsinfo AS fs ON bb.id_bibdoc=fs.id_bibdoc WHERE id_bibrec in %s AND last_version=true" % sqlreclist)) # kwalitee: disable=sql class OptionParserSpecial(OptionParser): def format_help(self, *args, **kwargs): result = OptionParser.format_help(self, *args, **kwargs) if hasattr(self, 'trailing_text'): return "%s\n%s\n" % (result, self.trailing_text) else: return result def prepare_option_parser(): """Parse the command line options.""" def _ids_ranges_callback(option, opt, value, parser): """Callback for optparse to parse a set of ids ranges in the form nnn1-nnn2,mmm1-mmm2... returning the corresponding intbitset. """ try: debug('option: %s, opt: %s, value: %s, parser: %s' % (option, opt, value, parser)) debug('Parsing range: %s' % value) value = ranges2ids(value) setattr(parser.values, option.dest, value) except Exception, e: raise OptionValueError("It's impossible to parse the range '%s' for option %s: %s" % (value, opt, e)) def _date_range_callback(option, opt, value, parser): """Callback for optparse to parse a range of dates in the form [date1],[date2]. Both date1 and date2 could be optional. the date can be expressed absolutely ("%Y-%m-%d %H:%M:%S") or relatively (([-\+]{0,1})([\d]+)([dhms])) to the current time.""" try: value = _parse_date_range(value) setattr(parser.values, option.dest, value) except Exception, e: raise OptionValueError("It's impossible to parse the range '%s' for option %s: %s" % (value, opt, e)) parser = OptionParserSpecial(usage="usage: %prog [options]", #epilog="""With you select the range of record/docnames/single files to work on. Note that some actions e.g. delete, append, revise etc. works at the docname level, while others like --set-comment, --set-description, at single file level and other can be applied in an iterative way to many records in a single run. Note that specifing docid(2) takes precedence over recid(2) which in turns takes precedence over pattern/collection search.""", version=__revision__) parser.trailing_text = """ Examples: $ bibdocfile --append foo.tar.gz --recid=1 $ bibdocfile --revise http://foo.com?search=123 --with-docname='sam' --format=pdf --recid=3 --set-docname='pippo' # revise for record 3 # the document sam, renaming it to pippo. $ bibdocfile --delete --with-docname="*sam" --all # delete all documents # starting ending # with "sam" $ bibdocfile --undelete -c "Test Collection" # undelete documents for # the collection $ bibdocfile --get-info --recids=1-4,6-8 # obtain informations $ bibdocfile -r 1 --with-docname=foo --set-docname=bar # Rename a document $ bibdocfile -r 1 --set-restriction "firerole: deny until '2011-01-01' allow any" # set an embargo to all the documents attached to record 1 # (note the ^M or \\n before 'allow any') # See also $r subfield in <%(site)s/help/admin/bibupload-admin-guide#3.6> # and Firerole in <%(site)s/help/admin/webaccess-admin-guide#6> $ bibdocfile --append x.pdf --recid=1 --with-flags='PDF/A,OCRED' # append # to record 1 the file x.pdf specifying the PDF/A and OCRED flags """ % {'site': CFG_SITE_URL} query_options = OptionGroup(parser, 'Query options') query_options.add_option('-r', '--recids', action="callback", callback=_ids_ranges_callback, type='string', dest='recids', help='matches records by recids, e.g.: --recids=1-3,5-7') query_options.add_option('-d', '--docids', action="callback", callback=_ids_ranges_callback, type='string', dest='docids', help='matches documents by docids, e.g.: --docids=1-3,5-7') query_options.add_option('-a', '--all', action='store_true', dest='all', help='Select all the records') query_options.add_option("--with-deleted-recs", choices=['yes', 'no', 'only'], type="choice", dest="deleted_recs", help="'Yes' to also match deleted records, 'no' to exclude them, 'only' to match only deleted ones", metavar="yes/no/only", default='no') query_options.add_option("--with-deleted-docs", choices=['yes', 'no', 'only'], type="choice", dest="deleted_docs", help="'Yes' to also match deleted documents, 'no' to exclude them, 'only' to match only deleted ones (e.g. for undeletion)", metavar="yes/no/only", default='no') query_options.add_option("--with-empty-recs", choices=['yes', 'no', 'only'], type="choice", dest="empty_recs", help="'Yes' to also match records without attached documents, 'no' to exclude them, 'only' to consider only such records (e.g. for statistics)", metavar="yes/no/only", default='no') query_options.add_option("--with-empty-docs", choices=['yes', 'no', 'only'], type="choice", dest="empty_docs", help="'Yes' to also match documents without attached files, 'no' to exclude them, 'only' to consider only such documents (e.g. for sanity checking)", metavar="yes/no/only", default='no') query_options.add_option("--with-record-modification-date", action="callback", callback=_date_range_callback, dest="md_rec", nargs=1, type="string", default=(None, None), help="matches records modified date1 and date2; dates can be expressed relatively, e.g.:\"-5m,2030-2-23 04:40\" # matches records modified since 5 minutes ago until the 2030...", metavar="date1,date2") query_options.add_option("--with-record-creation-date", action="callback", callback=_date_range_callback, dest="cd_rec", nargs=1, type="string", default=(None, None), help="matches records created between date1 and date2; dates can be expressed relatively", metavar="date1,date2") query_options.add_option("--with-document-modification-date", action="callback", callback=_date_range_callback, dest="md_doc", nargs=1, type="string", default=(None, None), help="matches documents modified between date1 and date2; dates can be expressed relatively", metavar="date1,date2") query_options.add_option("--with-document-creation-date", action="callback", callback=_date_range_callback, dest="cd_doc", nargs=1, type="string", default=(None, None), help="matches documents created between date1 and date2; dates can be expressed relatively", metavar="date1,date2") query_options.add_option("--url", dest="url", help='matches the document referred by the URL, e.g. "%s/%s/1/files/foobar.pdf?version=2"' % (CFG_SITE_URL, CFG_SITE_RECORD)) - query_options.add_option("--path", dest="path", help='matches the document referred by the internal filesystem path, e.g. %s/g0/1/foobar.pdf\\;1' % CFG_WEBSUBMIT_FILEDIR) + query_options.add_option("--path", dest="path", help='matches the document referred by the internal filesystem path, e.g. %s/g0/1/foobar.pdf\\;1' % CFG_BIBDOCFILE_FILEDIR) query_options.add_option("--with-docname", dest="docname", help='matches documents with the given docname (accept wildcards)') query_options.add_option("--with-doctype", dest="doctype", help='matches documents with the given doctype') query_options.add_option('-p', '--pattern', dest='pattern', help='matches records by pattern') query_options.add_option('-c', '--collection', dest='collection', help='matches records by collection') query_options.add_option('--force', dest='force', help='force an action even when it\'s not necessary e.g. textify on an already textified bibdoc.', action='store_true', default=False) parser.add_option_group(query_options) getting_information_options = OptionGroup(parser, 'Actions for getting information') getting_information_options.add_option('--get-info', dest='action', action='store_const', const='get-info', help='print all the informations about the matched record/documents') getting_information_options.add_option('--get-disk-usage', dest='action', action='store_const', const='get-disk-usage', help='print disk usage statistics of the matched documents') getting_information_options.add_option('--get-history', dest='action', action='store_const', const='get-history', help='print the matched documents history') getting_information_options.add_option('--get-stats', dest='action', action='store_const', const='get-stats', help='print some statistics of file properties grouped by collections') parser.add_option_group(getting_information_options) setting_information_options = OptionGroup(parser, 'Actions for setting information') setting_information_options.add_option('--set-doctype', dest='set_doctype', help='specify the new doctype', metavar='doctype') setting_information_options.add_option('--set-description', dest='set_description', help='specify a description', metavar='description') setting_information_options.add_option('--set-comment', dest='set_comment', help='specify a comment', metavar='comment') setting_information_options.add_option('--set-restriction', dest='set_restriction', help='specify a restriction tag', metavar='restriction') setting_information_options.add_option('--set-docname', dest='new_docname', help='specifies a new docname for renaming', metavar='docname') setting_information_options.add_option("--unset-comment", action="store_const", const='', dest="set_comment", help="remove any comment") setting_information_options.add_option("--unset-descriptions", action="store_const", const='', dest="set_description", help="remove any description") setting_information_options.add_option("--unset-restrictions", action="store_const", const='', dest="set_restriction", help="remove any restriction") setting_information_options.add_option("--hide", dest="action", action='store_const', const='hide', help="hides matched documents and revisions") setting_information_options.add_option("--unhide", dest="action", action='store_const', const='unhide', help="hides matched documents and revisions") parser.add_option_group(setting_information_options) revising_options = OptionGroup(parser, 'Action for revising content') revising_options.add_option("--append", dest='append_path', help='specify the URL/path of the file that will appended to the bibdoc (implies --with-empty-recs=yes)', metavar='PATH/URL') revising_options.add_option("--revise", dest='revise_path', help='specify the URL/path of the file that will revise the bibdoc', metavar='PATH/URL') revising_options.add_option("--revert", dest='action', action='store_const', const='revert', help='reverts a document to the specified version') revising_options.add_option("--delete", action='store_const', const='delete', dest='action', help='soft-delete the matched documents') revising_options.add_option("--hard-delete", action='store_const', const='hard-delete', dest='action', help='hard-delete the single matched document with a specific format and a specific revision (this operation is not revertible)') revising_options.add_option("--undelete", action='store_const', const='undelete', dest='action', help='undelete previosuly soft-deleted documents') revising_options.add_option("--purge", action='store_const', const='purge', dest='action', help='purge (i.e. hard-delete any format of any version prior to the latest version of) the matched documents') revising_options.add_option("--expunge", action='store_const', const='expunge', dest='action', help='expunge (i.e. hard-delete any version and formats of) the matched documents') revising_options.add_option("--with-version", dest="version", help="specifies the version(s) to be used with hide, unhide, e.g.: 1-2,3 or ALL. Specifies the version to be used with hard-delete and revert, e.g. 2") revising_options.add_option("--with-format", dest="format", help='to specify a format when appending/revising/deleting/reverting a document, e.g. "pdf"', metavar='FORMAT') revising_options.add_option("--with-hide-previous", dest='hide_previous', action='store_true', help='when revising, hides previous versions', default=False) revising_options.add_option("--with-flags", dest='flags', help='comma-separated optional list of flags used when appending/revising a document. Valid flags are: %s' % ', '.join(CFG_BIBDOCFILE_AVAILABLE_FLAGS), default=None) parser.add_option_group(revising_options) housekeeping_options = OptionGroup(parser, 'Actions for housekeeping') housekeeping_options.add_option("--check-md5", action='store_const', const='check-md5', dest='action', help='check md5 checksum validity of files') housekeeping_options.add_option("--check-format", action='store_const', const='check-format', dest='action', help='check if any format-related inconsistences exists') housekeeping_options.add_option("--check-duplicate-docnames", action='store_const', const='check-duplicate-docnames', dest='action', help='check for duplicate docnames associated with the same record') housekeeping_options.add_option("--update-md5", action='store_const', const='update-md5', dest='action', help='update md5 checksum of files') housekeeping_options.add_option("--fix-all", action='store_const', const='fix-all', dest='action', help='fix inconsistences in filesystem vs database vs MARC') housekeeping_options.add_option("--fix-marc", action='store_const', const='fix-marc', dest='action', help='synchronize MARC after filesystem/database') housekeeping_options.add_option("--fix-format", action='store_const', const='fix-format', dest='action', help='fix format related inconsistences') housekeeping_options.add_option("--fix-duplicate-docnames", action='store_const', const='fix-duplicate-docnames', dest='action', help='fix duplicate docnames associated with the same record') housekeeping_options.add_option("--fix-bibdocfsinfo-cache", action='store_const', const='fix-bibdocfsinfo-cache', dest='action', help='fix bibdocfsinfo cache related inconsistences') parser.add_option_group(housekeeping_options) experimental_options = OptionGroup(parser, 'Experimental options (do not expect to find them in the next release)') experimental_options.add_option('--textify', dest='action', action='store_const', const='textify', help='extract text from matched documents and store it for later indexing') experimental_options.add_option('--with-ocr', dest='perform_ocr', action='store_true', default=False, help='when used with --textify, wether to perform OCR') parser.add_option_group(experimental_options) parser.add_option('-D', '--debug', action='store_true', dest='debug', default=False) parser.add_option('-H', '--human-readable', dest='human_readable', action='store_true', default=False, help='print sizes in human readable format (e.g., 1KB 234MB 2GB)') parser.add_option('--yes-i-know', action='store_true', dest='yes-i-know', help='use with care!') return parser def print_info(recid, docid, info): """Nicely print info about a recid, docid pair.""" print '%i:%i:%s' % (recid, docid, info) def bibupload_ffts(ffts, append=False, debug=False, interactive=True): """Given an ffts dictionary it creates the xml and submit it.""" xml = ffts_to_xml(ffts) if xml: if interactive: print xml tmp_file_fd, tmp_file_name = mkstemp(suffix='.xml', prefix="bibdocfile_%s" % time.strftime("%Y-%m-%d_%H:%M:%S"), dir=CFG_TMPSHAREDDIR) os.write(tmp_file_fd, xml) os.close(tmp_file_fd) os.chmod(tmp_file_name, 0644) if append: if interactive: wait_for_user("This will be appended via BibUpload") if debug: task = task_low_level_submission('bibupload', 'bibdocfile', '-a', tmp_file_name, '-N', 'FFT', '-S2', '-v9') else: task = task_low_level_submission('bibupload', 'bibdocfile', '-a', tmp_file_name, '-N', 'FFT', '-S2') if interactive: print "BibUpload append submitted with id %s" % task else: if interactive: wait_for_user("This will be corrected via BibUpload") if debug: task = task_low_level_submission('bibupload', 'bibdocfile', '-c', tmp_file_name, '-N', 'FFT', '-S2', '-v9') else: task = task_low_level_submission('bibupload', 'bibdocfile', '-c', tmp_file_name, '-N', 'FFT', '-S2') if interactive: print "BibUpload correct submitted with id %s" % task elif interactive: print >> sys.stderr, "WARNING: no MARC to upload." return True def ranges2ids(parse_string): """Parse a string and return the intbitset of the corresponding ids.""" ids = intbitset() ranges = parse_string.split(",") for arange in ranges: tmp_ids = arange.split("-") if len(tmp_ids)==1: ids.add(int(tmp_ids[0])) else: if int(tmp_ids[0]) > int(tmp_ids[1]): # sanity check tmp = tmp_ids[0] tmp_ids[0] = tmp_ids[1] tmp_ids[1] = tmp ids += xrange(int(tmp_ids[0]), int(tmp_ids[1]) + 1) return ids def cli_append(options, append_path): """Create a bibupload FFT task submission for appending a format.""" recid = cli2recid(options) comment = cli2comment(options) description = cli2description(options) restriction = cli2restriction(options) doctype = cli2doctype(options) docname = cli2docname(options, url=append_path) flags = cli2flags(options) if not docname: raise OptionValueError, 'Not enough information to retrieve a valid docname' format = cli2format(options, append_path) url = clean_url(append_path) check_valid_url(url) bibrecdocs = BibRecDocs(recid) if bibrecdocs.has_docname_p(docname) and bibrecdocs.get_bibdoc(docname).format_already_exists_p(format): new_docname = bibrecdocs.propose_unique_docname(docname) wait_for_user("WARNING: a document with name %s and format %s already exists for recid %s. A new document with name %s will be created instead." % (repr(docname), repr(format), repr(recid), repr(new_docname))) docname = new_docname ffts = {recid: [{ 'docname' : docname, 'comment' : comment, 'description' : description, 'restriction' : restriction, 'doctype' : doctype, 'format' : format, 'url' : url, 'options': flags }]} return bibupload_ffts(ffts, append=True) def cli_revise(options, revise_path): """Create a bibupload FFT task submission for appending a format.""" recid = cli2recid(options) comment = cli2comment(options) description = cli2description(options) restriction = cli2restriction(options) docname = cli2docname(options, url=revise_path) hide_previous = getattr(options, 'hide_previous', None) flags = cli2flags(options) if hide_previous and 'PERFORM_HIDE_PREVIOUS' not in flags: flags.append('PERFORM_HIDE_PREVIOUS') if not docname: raise OptionValueError, 'Not enough information to retrieve a valid docname' format = cli2format(options, revise_path) doctype = cli2doctype(options) url = clean_url(revise_path) new_docname = getattr(options, 'new_docname', None) check_valid_url(url) ffts = {recid : [{ 'docname' : docname, 'new_docname' : new_docname, 'comment' : comment, 'description' : description, 'restriction' : restriction, 'doctype' : doctype, 'format' : format, 'url' : url, 'options' : flags }]} return bibupload_ffts(ffts) def cli_set_batch(options): """Change in batch the doctype, description, comment and restriction.""" ffts = {} doctype = getattr(options, 'set_doctype', None) description = cli2description(options) comment = cli2comment(options) restriction = cli2restriction(options) with_format = getattr(options, 'format', None) for docid in cli_docids_iterator(options): bibdoc = BibDoc(docid) recid = bibdoc.get_recid() docname = bibdoc.get_docname() fft = [] if description is not None or comment is not None: for bibdocfile in bibdoc.list_latest_files(): format = bibdocfile.get_format() if not with_format or with_format == format: fft.append({ 'docname': docname, 'restriction': restriction, 'comment': comment, 'description': description, 'format': format, 'doctype': doctype }) else: fft.append({ 'docname': docname, 'restriction': restriction, 'doctype': doctype, }) ffts[recid] = fft return bibupload_ffts(ffts, append=False) def cli_textify(options): """Extract text to let indexing on fulltext be possible.""" force = getattr(options, 'force', None) perform_ocr = getattr(options, 'perform_ocr', None) if perform_ocr: if not can_perform_ocr(): print >> sys.stderr, "WARNING: OCR requested but OCR is not possible" perform_ocr = False if perform_ocr: additional = ' using OCR (this might take some time)' else: additional = '' for docid in cli_docids_iterator(options): bibdoc = BibDoc(docid) print 'Extracting text for docid %s%s...' % (docid, additional), sys.stdout.flush() if force or not bibdoc.has_text(require_up_to_date=True): try: bibdoc.extract_text(perform_ocr=perform_ocr) print "DONE" - except InvenioWebSubmitFileError, e: + except InvenioBibDocFileError, e: print >> sys.stderr, "WARNING: %s" % e else: print "not needed" def cli_rename(options): """Rename a docname within a recid.""" new_docname = getattr(options, 'new_docname', None) docid = cli2docid(options) bibdoc = BibDoc(docid) docname = bibdoc.get_docname() recid = bibdoc.get_recid() ffts = {recid : [{'docname' : docname, 'new_docname' : new_docname}]} return bibupload_ffts(ffts, append=False) def cli_fix_bibdocfsinfo_cache(options): """Rebuild the bibdocfsinfo table according to what is available on filesystem""" to_be_fixed = intbitset() for docid in intbitset(run_sql("SELECT id FROM bibdoc")): print "Fixing bibdocfsinfo table for docid %s..." % docid, sys.stdout.flush() try: bibdoc = BibDoc(docid) - except InvenioWebSubmitFileError, err: + except InvenioBibDocFileError, err: print err continue try: bibdoc._sync_to_db() except Exception, err: recid = bibdoc.recid if recid: to_be_fixed.add(recid) print "ERROR: %s, scheduling a fix for recid %s" % (err, recid) print "DONE" if to_be_fixed: cli_fix_format(options, recids=to_be_fixed) print "You can now add CFG_BIBDOCFILE_ENABLE_BIBDOCFSINFO_CACHE=1 to your invenio-local.conf file." def cli_fix_all(options): """Fix all the records of a recid_set.""" ffts = {} for recid in cli_recids_iterator(options): ffts[recid] = [] for docname in BibRecDocs(recid).get_bibdoc_names(): ffts[recid].append({'docname' : docname, 'doctype' : 'FIX-ALL'}) return bibupload_ffts(ffts, append=False) def cli_fix_marc(options, explicit_recid_set=None, interactive=True): """Fix all the records of a recid_set.""" ffts = {} if explicit_recid_set is not None: for recid in explicit_recid_set: ffts[recid] = [{'doctype' : 'FIX-MARC'}] else: for recid in cli_recids_iterator(options): ffts[recid] = [{'doctype' : 'FIX-MARC'}] return bibupload_ffts(ffts, append=False, interactive=interactive) def cli_check_format(options): """Check if any format-related inconsistences exists.""" count = 0 tot = 0 duplicate = False for recid in cli_recids_iterator(options): tot += 1 bibrecdocs = BibRecDocs(recid) if not bibrecdocs.check_duplicate_docnames(): print >> sys.stderr, "recid %s has duplicate docnames!" broken = True duplicate = True else: broken = False for docname in bibrecdocs.get_bibdoc_names(): if not bibrecdocs.check_format(docname): print >> sys.stderr, "recid %s with docname %s need format fixing" % (recid, docname) broken = True if broken: count += 1 if count: result = "%d out of %d records need their formats to be fixed." % (count, tot) else: result = "All records appear to be correct with respect to formats." if duplicate: result += " Note however that at least one record appear to have duplicate docnames. You should better fix this situation by using --fix-duplicate-docnames." print wrap_text_in_a_box(result, style="conclusion") return not(duplicate or count) def cli_check_duplicate_docnames(options): """Check if some record is connected with bibdoc having the same docnames.""" count = 0 tot = 0 for recid in cli_recids_iterator(options): tot += 1 bibrecdocs = BibRecDocs(recid) if bibrecdocs.check_duplicate_docnames(): count += 1 print >> sys.stderr, "recid %s has duplicate docnames!" if count: print "%d out of %d records have duplicate docnames." % (count, tot) return False else: print "All records appear to be correct with respect to duplicate docnames." return True def cli_fix_format(options, recids=None): """Fix format-related inconsistences.""" fixed = intbitset() tot = 0 if not recids: recids = cli_recids_iterator(options) for recid in recids: tot += 1 bibrecdocs = BibRecDocs(recid) for docname in bibrecdocs.get_bibdoc_names(): if not bibrecdocs.check_format(docname): if bibrecdocs.fix_format(docname, skip_check=True): print >> sys.stderr, "%i has been fixed for docname %s" % (recid, docname) else: print >> sys.stderr, "%i has been fixed for docname %s. However note that a new bibdoc might have been created." % (recid, docname) fixed.add(recid) if fixed: print "Now we need to synchronize MARC to reflect current changes." cli_fix_marc(options, explicit_recid_set=fixed) print wrap_text_in_a_box("%i out of %i record needed to be fixed." % (tot, len(fixed)), style="conclusion") return not fixed def cli_fix_duplicate_docnames(options): """Fix duplicate docnames.""" fixed = intbitset() tot = 0 for recid in cli_recids_iterator(options): tot += 1 bibrecdocs = BibRecDocs(recid) if not bibrecdocs.check_duplicate_docnames(): bibrecdocs.fix_duplicate_docnames(skip_check=True) print >> sys.stderr, "%i has been fixed for duplicate docnames." % recid fixed.add(recid) if fixed: print "Now we need to synchronize MARC to reflect current changes." cli_fix_marc(options, explicit_recid_set=fixed) print wrap_text_in_a_box("%i out of %i record needed to be fixed." % (tot, len(fixed)), style="conclusion") return not fixed def cli_delete(options): """Delete the given docid_set.""" ffts = {} for docid in cli_docids_iterator(options): bibdoc = BibDoc(docid) docname = bibdoc.get_docname() recid = bibdoc.get_recid() if recid not in ffts: ffts[recid] = [{'docname' : docname, 'doctype' : 'DELETE'}] else: ffts[recid].append({'docname' : docname, 'doctype' : 'DELETE'}) return bibupload_ffts(ffts) def cli_delete_file(options): """Delete the given file irreversibely.""" docid = cli2docid(options) recid = cli2recid(options, docids=intbitset([docid])) format = cli2format(options) docname = BibDoc(docid).get_docname() version = getattr(options, 'version', None) try: version_int = int(version) if 0 >= version_int: raise ValueError except: raise OptionValueError, 'when hard-deleting, version should be valid positive integer, not %s' % version ffts = {recid : [{'docname' : docname, 'version' : version, 'format' : format, 'doctype' : 'DELETE-FILE'}]} return bibupload_ffts(ffts) def cli_revert(options): """Revert a bibdoc to a given version.""" docid = cli2docid(options) recid = cli2recid(options, docids=intbitset([docid])) docname = BibDoc(docid).get_docname() version = getattr(options, 'version', None) try: version_int = int(version) if 0 >= version_int: raise ValueError except: raise OptionValueError, 'when reverting, version should be valid positive integer, not %s' % version ffts = {recid : [{'docname' : docname, 'version' : version, 'doctype' : 'REVERT'}]} return bibupload_ffts(ffts) def cli_undelete(options): """Delete the given docname""" docname = cli2docname(options) restriction = getattr(options, 'restriction', None) count = 0 if not docname: docname = 'DELETED-*-*' if not docname.startswith('DELETED-'): docname = 'DELETED-*-' + docname to_be_undeleted = intbitset() fix_marc = intbitset() setattr(options, 'deleted_docs', 'only') for docid in cli_docids_iterator(options): bibdoc = BibDoc(docid) if bibdoc.get_status() == 'DELETED' and fnmatch.fnmatch(bibdoc.get_docname(), docname): to_be_undeleted.add(docid) fix_marc.add(bibdoc.get_recid()) count += 1 print '%s (docid %s from recid %s) will be undeleted to restriction: %s' % (bibdoc.get_docname(), docid, bibdoc.get_recid(), restriction) wait_for_user("I'll proceed with the undeletion") for docid in to_be_undeleted: bibdoc = BibDoc(docid) bibdoc.undelete(restriction) cli_fix_marc(options, explicit_recid_set=fix_marc) print wrap_text_in_a_box("%s bibdoc successfuly undeleted with status '%s'" % (count, restriction), style="conclusion") def cli_get_info(options): """Print all the info of the matched docids or recids.""" debug('Getting info!') human_readable = bool(getattr(options, 'human_readable', None)) debug('human_readable: %s' % human_readable) deleted_docs = getattr(options, 'deleted_docs', None) in ('yes', 'only') debug('deleted_docs: %s' % deleted_docs) if getattr(options, 'docids', None): for docid in cli_docids_iterator(options): sys.stdout.write(str(BibDoc(docid, human_readable=human_readable))) else: for recid in cli_recids_iterator(options): sys.stdout.write(str(BibRecDocs(recid, deleted_too=deleted_docs, human_readable=human_readable))) def cli_purge(options): """Purge the matched docids.""" ffts = {} for docid in cli_docids_iterator(options): bibdoc = BibDoc(docid) recid = bibdoc.get_recid() docname = bibdoc.get_docname() if recid: if recid not in ffts: ffts[recid] = [] ffts[recid].append({ 'docname' : docname, 'doctype' : 'PURGE', }) return bibupload_ffts(ffts) def cli_expunge(options): """Expunge the matched docids.""" ffts = {} for docid in cli_docids_iterator(options): bibdoc = BibDoc(docid) recid = bibdoc.get_recid() docname = bibdoc.get_docname() if recid: if recid not in ffts: ffts[recid] = [] ffts[recid].append({ 'docname' : docname, 'doctype' : 'EXPUNGE', }) return bibupload_ffts(ffts) def cli_get_history(options): """Print the history of a docid_set.""" for docid in cli_docids_iterator(options): bibdoc = BibDoc(docid) history = bibdoc.get_history() for row in history: print_info(bibdoc.get_recid(), docid, row) def cli_get_disk_usage(options): """Print the space usage of a docid_set.""" human_readable = getattr(options, 'human_readable', None) total_size = 0 total_latest_size = 0 for docid in cli_docids_iterator(options): bibdoc = BibDoc(docid) size = bibdoc.get_total_size() total_size += size latest_size = bibdoc.get_total_size_latest_version() total_latest_size += latest_size if human_readable: print_info(bibdoc.get_recid(), docid, 'size=%s' % nice_size(size)) print_info(bibdoc.get_recid(), docid, 'latest version size=%s' % nice_size(latest_size)) else: print_info(bibdoc.get_recid(), docid, 'size=%s' % size) print_info(bibdoc.get_recid(), docid, 'latest version size=%s' % latest_size) if human_readable: print wrap_text_in_a_box('total size: %s\n\nlatest version total size: %s' % (nice_size(total_size), nice_size(total_latest_size)), style='conclusion') else: print wrap_text_in_a_box('total size: %s\n\nlatest version total size: %s' % (total_size, total_latest_size), style='conclusion') def cli_check_md5(options): """Check the md5 sums of a docid_set.""" failures = 0 for docid in cli_docids_iterator(options): bibdoc = BibDoc(docid) if bibdoc.md5s.check(): print_info(bibdoc.get_recid(), docid, 'checksum OK') else: for afile in bibdoc.list_all_files(): if not afile.check(): failures += 1 print_info(bibdoc.get_recid(), docid, '%s failing checksum!' % afile.get_full_path()) if failures: print wrap_text_in_a_box('%i files failing' % failures , style='conclusion') else: print wrap_text_in_a_box('All files are correct', style='conclusion') def cli_update_md5(options): """Update the md5 sums of a docid_set.""" for docid in cli_docids_iterator(options): bibdoc = BibDoc(docid) if bibdoc.md5s.check(): print_info(bibdoc.get_recid(), docid, 'checksum OK') else: for afile in bibdoc.list_all_files(): if not afile.check(): print_info(bibdoc.get_recid(), docid, '%s failing checksum!' % afile.get_full_path()) wait_for_user('Updating the md5s of this document can hide real problems.') bibdoc.md5s.update(only_new=False) def cli_hide(options): """Hide the matched versions of documents.""" documents_to_be_hidden = {} to_be_fixed = intbitset() versions = getattr(options, 'versions', 'all') if versions != 'all': try: versions = ranges2ids(versions) except: raise OptionValueError, 'You should specify correct versions. Not %s' % versions else: versions = intbitset(trailing_bits=True) for docid in cli_docids_iterator(options): bibdoc = BibDoc(docid) recid = bibdoc.get_recid() if recid: for bibdocfile in bibdoc.list_all_files(): this_version = bibdocfile.get_version() this_format = bibdocfile.get_format() if this_version in versions: if docid not in documents_to_be_hidden: documents_to_be_hidden[docid] = [] documents_to_be_hidden[docid].append((this_version, this_format)) to_be_fixed.add(recid) print '%s (docid: %s, recid: %s) will be hidden' % (bibdocfile.get_full_name(), docid, recid) wait_for_user('Proceeding to hide the matched documents...') for docid, documents in documents_to_be_hidden.iteritems(): bibdoc = BibDoc(docid) for version, format in documents: bibdoc.set_flag('HIDDEN', format, version) return cli_fix_marc(options, to_be_fixed) def cli_unhide(options): """Unhide the matched versions of documents.""" documents_to_be_unhidden = {} to_be_fixed = intbitset() versions = getattr(options, 'versions', 'all') if versions != 'all': try: versions = ranges2ids(versions) except: raise OptionValueError, 'You should specify correct versions. Not %s' % versions else: versions = intbitset(trailing_bits=True) for docid in cli_docids_iterator(options): bibdoc = BibDoc(docid) recid = bibdoc.get_recid() if recid: for bibdocfile in bibdoc.list_all_files(): this_version = bibdocfile.get_version() this_format = bibdocfile.get_format() if this_version in versions: if docid not in documents_to_be_unhidden: documents_to_be_unhidden[docid] = [] documents_to_be_unhidden[docid].append((this_version, this_format)) to_be_fixed.add(recid) print '%s (docid: %s, recid: %s) will be unhidden' % (bibdocfile.get_full_name(), docid, recid) wait_for_user('Proceeding to unhide the matched documents...') for docid, documents in documents_to_be_unhidden.iteritems(): bibdoc = BibDoc(docid) for version, format in documents: bibdoc.unset_flag('HIDDEN', format, version) return cli_fix_marc(options, to_be_fixed) def main(): parser = prepare_option_parser() (options, args) = parser.parse_args() if getattr(options, 'debug', None): getLogger().setLevel(DEBUG) debug('test') debug('options: %s, args: %s' % (options, args)) try: if not getattr(options, 'action', None) and \ not getattr(options, 'append_path', None) and \ not getattr(options, 'revise_path', None): if getattr(options, 'set_doctype', None) is not None or \ getattr(options, 'set_comment', None) is not None or \ getattr(options, 'set_description', None) is not None or \ getattr(options, 'set_restriction', None) is not None: cli_set_batch(options) elif getattr(options, 'new_docname', None): cli_rename(options) else: print >> sys.stderr, "ERROR: no action specified" sys.exit(1) elif getattr(options, 'append_path', None): options.empty_recs = 'yes' options.empty_docs = 'yes' cli_append(options, getattr(options, 'append_path', None)) elif getattr(options, 'revise_path', None): cli_revise(options, getattr(options, 'revise_path', None)) elif options.action == 'textify': cli_textify(options) elif getattr(options, 'action', None) == 'get-history': cli_get_history(options) elif getattr(options, 'action', None) == 'get-info': cli_get_info(options) elif getattr(options, 'action', None) == 'get-disk-usage': cli_get_disk_usage(options) elif getattr(options, 'action', None) == 'check-md5': cli_check_md5(options) elif getattr(options, 'action', None) == 'update-md5': cli_update_md5(options) elif getattr(options, 'action', None) == 'fix-all': cli_fix_all(options) elif getattr(options, 'action', None) == 'fix-marc': cli_fix_marc(options) elif getattr(options, 'action', None) == 'delete': cli_delete(options) elif getattr(options, 'action', None) == 'hard-delete': cli_delete_file(options) elif getattr(options, 'action', None) == 'fix-duplicate-docnames': cli_fix_duplicate_docnames(options) elif getattr(options, 'action', None) == 'fix-format': cli_fix_format(options) elif getattr(options, 'action', None) == 'check-duplicate-docnames': cli_check_duplicate_docnames(options) elif getattr(options, 'action', None) == 'check-format': cli_check_format(options) elif getattr(options, 'action', None) == 'undelete': cli_undelete(options) elif getattr(options, 'action', None) == 'purge': cli_purge(options) elif getattr(options, 'action', None) == 'expunge': cli_expunge(options) elif getattr(options, 'action', None) == 'revert': cli_revert(options) elif getattr(options, 'action', None) == 'hide': cli_hide(options) elif getattr(options, 'action', None) == 'unhide': cli_unhide(options) elif getattr(options, 'action', None) == 'fix-bibdocfsinfo-cache': options.empty_docs = 'yes' cli_fix_bibdocfsinfo_cache(options) elif getattr(options, 'action', None) == 'get-stats': cli_get_stats(options) else: print >> sys.stderr, "ERROR: Action %s is not valid" % getattr(options, 'action', None) sys.exit(1) except Exception, e: register_exception() print >> sys.stderr, 'ERROR: %s' % e sys.exit(1) if __name__ == '__main__': main() diff --git a/modules/websubmit/lib/file.py b/modules/bibdocfile/lib/file.py similarity index 100% rename from modules/websubmit/lib/file.py rename to modules/bibdocfile/lib/file.py diff --git a/modules/websubmit/lib/fulltext_files_migration_kit.py b/modules/bibdocfile/lib/fulltext_files_migration_kit.py similarity index 98% rename from modules/websubmit/lib/fulltext_files_migration_kit.py rename to modules/bibdocfile/lib/fulltext_files_migration_kit.py index 99ccb105c..7a801da33 100644 --- a/modules/websubmit/lib/fulltext_files_migration_kit.py +++ b/modules/bibdocfile/lib/fulltext_files_migration_kit.py @@ -1,142 +1,142 @@ ## This file is part of Invenio. ## Copyright (C) 2007, 2008, 2010, 2011 CERN. ## ## Invenio is free software; you can redistribute it and/or ## modify it under the terms of the GNU General Public License as ## published by the Free Software Foundation; either version 2 of the ## License, or (at your option) any later version. ## ## Invenio is distributed in the hope that it will be useful, but ## WITHOUT ANY WARRANTY; without even the implied warranty of ## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ## General Public License for more details. ## ## You should have received a copy of the GNU General Public License ## along with Invenio; if not, write to the Free Software Foundation, Inc., ## 59 Temple Place, Suite 330, Boston, MA 02111-1307, USA. __revision__ = "$Id$" """This script updates the filesystem structure of fulltext files in order to make it coherent with bibdocfile implementation (bibdocfile.py structure is backward compatible with file.py structure, but the viceversa is not true). """ import sys from invenio.intbitset import intbitset from invenio.textutils import wrap_text_in_a_box from invenio.config import CFG_LOGDIR, CFG_SITE_SUPPORT_EMAIL from invenio.dbquery import run_sql, OperationalError -from invenio.bibdocfile import BibRecDocs, InvenioWebSubmitFileError +from invenio.bibdocfile import BibRecDocs, InvenioBibDocFileError from datetime import datetime def retrieve_fulltext_recids(): """Returns the list of all the recid number linked with at least a fulltext file.""" res = run_sql('SELECT DISTINCT id_bibrec FROM bibrec_bibdoc') return intbitset(res) def fix_recid(recid, logfile): """Fix a given recid.""" print "Upgrading record %s ->" % recid, print >> logfile, "Upgrading record %s:" % recid bibrec = BibRecDocs(recid) print >> logfile, bibrec docnames = bibrec.get_bibdoc_names() try: for docname in docnames: print docname, new_bibdocs = bibrec.fix(docname) new_bibdocnames = [bibdoc.get_docname() for bibdoc in new_bibdocs] if new_bibdocnames: print "(created bibdocs: '%s')" % "', '".join(new_bibdocnames), print >> logfile, "(created bibdocs: '%s')" % "', '".join(new_bibdocnames) - except InvenioWebSubmitFileError, e: + except InvenioBibDocFileError, e: print >> logfile, BibRecDocs(recid) print "%s -> ERROR", e return False else: print >> logfile, BibRecDocs(recid) print "-> OK" return True def backup_tables(drop=False): """This function create a backup of bibrec_bibdoc, bibdoc and bibdoc_bibdoc tables. Returns False in case dropping of previous table is needed.""" if drop: run_sql('DROP TABLE bibrec_bibdoc_backup') run_sql('DROP TABLE bibdoc_backup') run_sql('DROP TABLE bibdoc_bibdoc_backup') try: run_sql("""CREATE TABLE bibrec_bibdoc_backup (KEY id_bibrec(id_bibrec), KEY id_bibdoc(id_bibdoc)) SELECT * FROM bibrec_bibdoc""") run_sql("""CREATE TABLE bibdoc_backup (PRIMARY KEY id(id)) SELECT * FROM bibdoc""") run_sql("""CREATE TABLE bibdoc_bibdoc_backup (KEY id_bibdoc1(id_bibdoc1), KEY id_bibdoc2(id_bibdoc2)) SELECT * FROM bibdoc_bibdoc""") except OperationalError, e: if not drop: return False raise return True def check_yes(): """Return True if the user types 'yes'.""" try: return raw_input().strip() == 'yes' except KeyboardInterrupt: return False def main(): """Core loop.""" logfilename = '%s/fulltext_files_migration_kit-%s.log' % (CFG_LOGDIR, datetime.today().strftime('%Y%m%d%H%M%S')) try: logfile = open(logfilename, 'w') except IOError, e: print wrap_text_in_a_box('NOTE: it\'s impossible to create the log:\n\n %s\n\nbecause of:\n\n %s\n\nPlease run this migration kit as the same user who runs Invenio (e.g. Apache)' % (logfilename, e), style='conclusion', break_long=False) sys.exit(1) recids = retrieve_fulltext_recids() print wrap_text_in_a_box ("""This script migrate the filesystem structure used to store fulltext files to the new stricter structure. This script must not be run during normal Invenio operations. It is safe to run this script. No file will be deleted. Anyway it is recommended to run a backup of the filesystem structure just in case. A backup of the database tables involved will be automatically performed.""", style='important') print "%s records will be migrated/fixed." % len(recids) print "Please type yes if you want to go further:", if not check_yes(): print "INTERRUPTED" sys.exit(1) print "Backing up database tables" try: if not backup_tables(): print wrap_text_in_a_box("""It appears that is not the first time that you run this script. Backup tables have been already created by a previous run. In order for the script to go further they need to be removed.""", style='important') print "Please, type yes if you agree to remove them and go further:", if not check_yes(): print wrap_text_in_a_box("INTERRUPTED", style='conclusion') sys.exit(1) print "Backing up database tables (after dropping previous backup)", backup_tables(drop=True) print "-> OK" else: print "-> OK" except Exception, e: print wrap_text_in_a_box("Unexpected error while backing up tables. Please, do your checks: %s" % e, style='conclusion') sys.exit(1) print "Created a complete log file into %s" % logfilename for recid in recids: if not fix_recid(recid, logfile): logfile.close() print wrap_text_in_a_box(title="INTERRUPTED BECAUSE OF ERROR!", body="""Please see the log file %s for what was the status of record %s prior to the error. Contact %s in case of problems, attaching the log.""" % (logfilename, recid, CFG_SITE_SUPPORT_EMAIL), style='conclusion') sys.exit(1) print wrap_text_in_a_box("DONE", style='conclusion') if __name__ == '__main__': main() diff --git a/modules/websubmit/lib/icon_migration_kit.py b/modules/bibdocfile/lib/icon_migration_kit.py similarity index 100% rename from modules/websubmit/lib/icon_migration_kit.py rename to modules/bibdocfile/lib/icon_migration_kit.py diff --git a/modules/bibedit/lib/bibedit_engine.py b/modules/bibedit/lib/bibedit_engine.py index edc610918..78005369a 100644 --- a/modules/bibedit/lib/bibedit_engine.py +++ b/modules/bibedit/lib/bibedit_engine.py @@ -1,1354 +1,1354 @@ ## This file is part of Invenio. ## Copyright (C) 2006, 2007, 2008, 2009, 2010, 2011 CERN. ## ## Invenio is free software; you can redistribute it and/or ## modify it under the terms of the GNU General Public License as ## published by the Free Software Foundation; either version 2 of the ## License, or (at your option) any later version. ## ## Invenio is distributed in the hope that it will be useful, but ## WITHOUT ANY WARRANTY; without even the implied warranty of ## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ## General Public License for more details. ## ## You should have received a copy of the GNU General Public License ## along with Invenio; if not, write to the Free Software Foundation, Inc., ## 59 Temple Place, Suite 330, Boston, MA 02111-1307, USA. # pylint: disable=C0103 """Invenio BibEdit Engine.""" __revision__ = "$Id" from datetime import datetime import re import difflib import zlib from invenio import bibrecord from invenio import bibformat from invenio.jsonutils import json, CFG_JSON_AVAILABLE from invenio.urlutils import auto_version_url from invenio.bibedit_config import CFG_BIBEDIT_AJAX_RESULT_CODES, \ CFG_BIBEDIT_JS_CHECK_SCROLL_INTERVAL, CFG_BIBEDIT_JS_HASH_CHECK_INTERVAL, \ CFG_BIBEDIT_JS_CLONED_RECORD_COLOR, \ CFG_BIBEDIT_JS_CLONED_RECORD_COLOR_FADE_DURATION, \ CFG_BIBEDIT_JS_NEW_ADD_FIELD_FORM_COLOR, \ CFG_BIBEDIT_JS_NEW_ADD_FIELD_FORM_COLOR_FADE_DURATION, \ CFG_BIBEDIT_JS_NEW_CONTENT_COLOR, \ CFG_BIBEDIT_JS_NEW_CONTENT_COLOR_FADE_DURATION, \ CFG_BIBEDIT_JS_NEW_CONTENT_HIGHLIGHT_DELAY, \ CFG_BIBEDIT_JS_STATUS_ERROR_TIME, CFG_BIBEDIT_JS_STATUS_INFO_TIME, \ CFG_BIBEDIT_JS_TICKET_REFRESH_DELAY, CFG_BIBEDIT_MAX_SEARCH_RESULTS, \ CFG_BIBEDIT_TAG_FORMAT, CFG_BIBEDIT_AJAX_RESULT_CODES_REV, \ CFG_BIBEDIT_AUTOSUGGEST_TAGS, CFG_BIBEDIT_AUTOCOMPLETE_TAGS_KBS,\ CFG_BIBEDIT_KEYWORD_TAXONOMY, CFG_BIBEDIT_KEYWORD_TAG, \ CFG_BIBEDIT_KEYWORD_RDFLABEL, CFG_BIBEDIT_MSG from invenio.config import CFG_SITE_LANG, CFG_DEVEL_SITE from invenio.bibedit_dblayer import get_name_tags_all, reserve_record_id, \ get_related_hp_changesets, get_hp_update_xml, delete_hp_change, \ get_record_last_modification_date, get_record_revision_author, \ get_marcxml_of_record_revision, delete_related_holdingpen_changes, \ get_record_revisions from invenio.bibedit_utils import cache_exists, cache_expired, \ create_cache_file, delete_cache_file, get_bibrecord, \ get_cache_file_contents, get_cache_mtime, get_record_templates, \ get_record_template, latest_record_revision, record_locked_by_other_user, \ record_locked_by_queue, save_xml_record, touch_cache_file, \ update_cache_file_contents, get_field_templates, get_marcxml_of_revision, \ revision_to_timestamp, timestamp_to_revision, \ get_record_revision_timestamps, record_revision_exists, \ can_record_have_physical_copies, extend_record_with_template, \ merge_record_with_template, record_xml_output, \ user_can_edit_record_collection from invenio.bibrecord import create_record, print_rec, record_add_field, \ record_add_subfield_into, record_delete_field, \ record_delete_subfield_from, \ record_modify_subfield, record_move_subfield, \ create_field, record_replace_field, record_move_fields, \ record_modify_controlfield, record_get_field_values, \ record_get_subfields, record_get_field_instances, record_add_fields, \ record_strip_empty_fields, record_strip_empty_volatile_subfields from invenio.config import CFG_BIBEDIT_PROTECTED_FIELDS, CFG_CERN_SITE, \ CFG_SITE_URL, CFG_SITE_RECORD, CFG_BIBEDIT_KB_SUBJECTS, \ CFG_BIBEDIT_KB_INSTITUTIONS, CFG_BIBEDIT_AUTOCOMPLETE_INSTITUTIONS_FIELDS from invenio.search_engine import record_exists, search_pattern from invenio.webuser import session_param_get, session_param_set from invenio.bibcatalog import bibcatalog_system from invenio.webpage import page from invenio.htmlutils import get_mathjax_header from invenio.textutils import wash_for_xml from invenio.bibknowledge import get_kbd_values_for_bibedit, get_kbr_values, \ get_kbt_items_for_bibedit, kb_exists from invenio.batchuploader_engine import perform_upload_check from invenio.bibcirculation_dblayer import get_number_copies, has_copies from invenio.bibcirculation_utils import create_item_details_url -from invenio.bibdocfile import BibRecDocs, InvenioWebSubmitFileError +from invenio.bibdocfile import BibRecDocs, InvenioBibDocFileError import invenio.template bibedit_templates = invenio.template.load('bibedit') re_revdate_split = re.compile('^(\d\d\d\d)(\d\d)(\d\d)(\d\d)(\d\d)(\d\d)') def get_empty_fields_templates(): """ Returning the templates of empty fields:: -an empty data field -an empty control field """ return [{ "name": "Empty field", "description": "The data field not containing any " + \ "information filled in", "tag" : "", "ind1" : "", "ind2" : "", "subfields" : [("","")], "isControlfield" : False },{ "name" : "Empty control field", "description" : "The controlfield not containing any " + \ "data or tag description", "isControlfield" : True, "tag" : "", "value" : "" }] def get_available_fields_templates(): """ A method returning all the available field templates Returns a list of descriptors. Each descriptor has the same structure as a full field descriptor inside the record """ templates = get_field_templates() result = get_empty_fields_templates() for template in templates: tplTag = template[3].keys()[0] field = template[3][tplTag][0] if (field[0] == []): # if the field is a controlField, add different structure result.append({ "name" : template[1], "description" : template[2], "isControlfield" : True, "tag" : tplTag, "value" : field[3] }) else: result.append({ "name": template[1], "description": template[2], "tag" : tplTag, "ind1" : field[1], "ind2" : field[2], "subfields" : field[0], "isControlfield" : False }) return result def perform_request_init(uid, ln, req, lastupdated): """Handle the initial request by adding menu and JavaScript to the page.""" errors = [] warnings = [] body = '' # Add script data. record_templates = get_record_templates() record_templates.sort() tag_names = get_name_tags_all() protected_fields = ['001'] protected_fields.extend(CFG_BIBEDIT_PROTECTED_FIELDS.split(',')) history_url = '"' + CFG_SITE_URL + '/admin/bibedit/bibeditadmin.py/history"' cern_site = 'false' if not CFG_JSON_AVAILABLE: title = 'Record Editor' body = '''Sorry, the record editor cannot operate when the `simplejson' module is not installed. Please see the INSTALL file.''' return page(title = title, body = body, errors = [], warnings = [], uid = uid, language = ln, navtrail = "", lastupdated = lastupdated, req = req) if CFG_CERN_SITE: cern_site = 'true' data = {'gRECORD_TEMPLATES': record_templates, 'gTAG_NAMES': tag_names, 'gPROTECTED_FIELDS': protected_fields, 'gSITE_URL': '"' + CFG_SITE_URL + '"', 'gSITE_RECORD': '"' + CFG_SITE_RECORD + '"', 'gHISTORY_URL': history_url, 'gCERN_SITE': cern_site, 'gHASH_CHECK_INTERVAL': CFG_BIBEDIT_JS_HASH_CHECK_INTERVAL, 'gCHECK_SCROLL_INTERVAL': CFG_BIBEDIT_JS_CHECK_SCROLL_INTERVAL, 'gSTATUS_ERROR_TIME': CFG_BIBEDIT_JS_STATUS_ERROR_TIME, 'gSTATUS_INFO_TIME': CFG_BIBEDIT_JS_STATUS_INFO_TIME, 'gCLONED_RECORD_COLOR': '"' + CFG_BIBEDIT_JS_CLONED_RECORD_COLOR + '"', 'gCLONED_RECORD_COLOR_FADE_DURATION': CFG_BIBEDIT_JS_CLONED_RECORD_COLOR_FADE_DURATION, 'gNEW_ADD_FIELD_FORM_COLOR': '"' + CFG_BIBEDIT_JS_NEW_ADD_FIELD_FORM_COLOR + '"', 'gNEW_ADD_FIELD_FORM_COLOR_FADE_DURATION': CFG_BIBEDIT_JS_NEW_ADD_FIELD_FORM_COLOR_FADE_DURATION, 'gNEW_CONTENT_COLOR': '"' + CFG_BIBEDIT_JS_NEW_CONTENT_COLOR + '"', 'gNEW_CONTENT_COLOR_FADE_DURATION': CFG_BIBEDIT_JS_NEW_CONTENT_COLOR_FADE_DURATION, 'gNEW_CONTENT_HIGHLIGHT_DELAY': CFG_BIBEDIT_JS_NEW_CONTENT_HIGHLIGHT_DELAY, 'gTICKET_REFRESH_DELAY': CFG_BIBEDIT_JS_TICKET_REFRESH_DELAY, 'gRESULT_CODES': CFG_BIBEDIT_AJAX_RESULT_CODES, 'gAUTOSUGGEST_TAGS' : CFG_BIBEDIT_AUTOSUGGEST_TAGS, 'gAUTOCOMPLETE_TAGS' : CFG_BIBEDIT_AUTOCOMPLETE_TAGS_KBS.keys(), 'gKEYWORD_TAG' : '"' + CFG_BIBEDIT_KEYWORD_TAG + '"', 'gAVAILABLE_KBS': get_available_kbs(), 'gTagsToAutocomplete': CFG_BIBEDIT_AUTOCOMPLETE_INSTITUTIONS_FIELDS } body += '\n' # Adding the information about field templates fieldTemplates = get_available_fields_templates() body += "\n" # Add scripts (the ordering is NOT irrelevant). scripts = ['jquery.jeditable.mini.js', 'jquery.hotkeys.js', 'json2.js', 'bibedit_display.js', 'bibedit_engine.js', 'bibedit_keys.js', 'bibedit_menu.js', 'bibedit_holdingpen.js', 'marcxml.js', 'bibedit_clipboard.js','jquery-ui.min.js'] for script in scripts: body += ' \n' % (CFG_SITE_URL, auto_version_url("js/" + script)) body += '' # Build page structure and menu. # rec = create_record(format_record(235, "xm"))[0] #oaiId = record_extract_oai_id(rec) body += bibedit_templates.menu() body += """
""" return body, errors, warnings def get_available_kbs(): """ Return list of KBs that are available in the system to be used with BibEdit """ kb_list = [CFG_BIBEDIT_KB_INSTITUTIONS, CFG_BIBEDIT_KB_SUBJECTS] available_kbs = [kb for kb in kb_list if kb_exists(kb)] return available_kbs def get_xml_comparison(header1, header2, xml1, xml2): """ Return diffs of two MARCXML records. """ return "".join(difflib.unified_diff(xml1.splitlines(1), xml2.splitlines(1), header1, header2)) def get_marcxml_of_revision_id(recid, revid): """ Return MARCXML string with corresponding to revision REVID (=RECID.REVDATE) of a record. Return empty string if revision does not exist. """ res = "" job_date = "%s-%s-%s %s:%s:%s" % re_revdate_split.search(revid).groups() tmp_res = get_marcxml_of_record_revision(recid, job_date) if tmp_res: for row in tmp_res: res += zlib.decompress(row[0]) + "\n" return res def perform_request_compare(ln, recid, rev1, rev2): """Handle a request for comparing two records""" body = "" errors = [] warnings = [] if (not record_revision_exists(recid, rev1)) or \ (not record_revision_exists(recid, rev2)): body = "The requested record revision does not exist !" else: xml1 = get_marcxml_of_revision_id(recid, rev1) xml2 = get_marcxml_of_revision_id(recid, rev2) fullrevid1 = "%i.%s" % (recid, rev1) fullrevid2 = "%i.%s" % (recid, rev2) comparison = bibedit_templates.clean_value( get_xml_comparison(fullrevid1, fullrevid2, xml1, xml2), 'text').replace('\n', '
\n ') job_date1 = "%s-%s-%s %s:%s:%s" % re_revdate_split.search(rev1).groups() job_date2 = "%s-%s-%s %s:%s:%s" % re_revdate_split.search(rev2).groups() body += bibedit_templates.history_comparebox(ln, job_date1, job_date2, comparison) return body, errors, warnings def perform_request_newticket(recid, uid): """create a new ticket with this record's number @param recid: record id @param uid: user id @return: (error_msg, url) """ t_url = "" errmsg = "" if bibcatalog_system is not None: t_id = bibcatalog_system.ticket_submit(uid, "", recid, "") if t_id: #get the ticket's URL t_url = bibcatalog_system.ticket_get_attribute(uid, t_id, 'url_modify') else: errmsg = "ticket_submit failed" else: errmsg = "No ticket system configured" return (errmsg, t_url) def perform_request_ajax(req, recid, uid, data, isBulk = False, \ ln = CFG_SITE_LANG): """Handle Ajax requests by redirecting to appropriate function.""" response = {} request_type = data['requestType'] undo_redo = None if data.has_key("undoRedo"): undo_redo = data["undoRedo"] # Call function based on request type. if request_type == 'searchForRecord': # Search request. response.update(perform_request_search(data)) elif request_type in ['changeTagFormat']: # User related requests. response.update(perform_request_user(req, request_type, recid, data)) elif request_type in ('getRecord', 'submit', 'cancel', 'newRecord', 'deleteRecord', 'deleteRecordCache', 'prepareRecordMerge', 'revert'): # 'Major' record related requests. response.update(perform_request_record(req, request_type, recid, uid, data)) elif request_type in ('addField', 'addSubfields', \ 'addFieldsSubfieldsOnPositions', 'modifyContent', \ 'modifySubfieldTag', 'modifyFieldTag', \ 'moveSubfield', 'deleteFields', 'moveField', \ 'modifyField', 'otherUpdateRequest', \ 'disableHpChange', 'deactivateHoldingPenChangeset'): # Record updates. cacheMTime = data['cacheMTime'] if data.has_key('hpChanges'): hpChanges = data['hpChanges'] else: hpChanges = {} response.update(perform_request_update_record(request_type, recid, \ uid, cacheMTime, data, \ hpChanges, undo_redo, \ isBulk)) elif request_type in ('autosuggest', 'autocomplete', 'autokeyword'): response.update(perform_request_autocomplete(request_type, recid, uid, \ data)) elif request_type in ('getTickets', ): # BibCatalog requests. response.update(perform_request_bibcatalog(request_type, recid, uid)) elif request_type in ('getHoldingPenUpdates', ): response.update(perform_request_holdingpen(request_type, recid)) elif request_type in ('getHoldingPenUpdateDetails', \ 'deleteHoldingPenChangeset'): updateId = data['changesetNumber'] response.update(perform_request_holdingpen(request_type, recid, \ updateId)) elif request_type in ('applyBulkUpdates', ): # a general version of a bulk request changes = data['requestsData'] cacheMTime = data['cacheMTime'] response.update(perform_bulk_request_ajax(req, recid, uid, changes, \ undo_redo, cacheMTime)) elif request_type in ('preview', ): response.update(perform_request_preview_record(request_type, recid, uid, data)) elif request_type in ('get_pdf_url', ): response.update(perform_request_get_pdf_url(recid)) elif request_type in ('record_has_pdf', ): response.update(perform_request_record_has_pdf(recid, uid)) return response def perform_bulk_request_ajax(req, recid, uid, reqsData, undoRedo, cacheMTime): """ An AJAX handler used when treating bulk updates """ lastResult = {} lastTime = cacheMTime isFirst = True for data in reqsData: assert data != None data['cacheMTime'] = lastTime if isFirst and undoRedo != None: # we add the undo/redo handler to the first operation in order to # save the handler on the server side ! data['undoRedo'] = undoRedo isFirst = False lastResult = perform_request_ajax(req, recid, uid, data, True) # now we have to update the cacheMtime in next request ! # if lastResult.has_key('cacheMTime'): try: lastTime = lastResult['cacheMTime'] except: raise Exception(str(lastResult)) return lastResult def perform_request_search(data): """Handle search requests.""" response = {} searchType = data['searchType'] if searchType is None: searchType = "anywhere" searchPattern = data['searchPattern'] if searchType == 'anywhere': pattern = searchPattern else: pattern = searchType + ':' + searchPattern result_set = list(search_pattern(p=pattern)) response['resultCode'] = 1 response['resultSet'] = result_set[0:CFG_BIBEDIT_MAX_SEARCH_RESULTS] return response def perform_request_user(req, request_type, recid, data): """Handle user related requests.""" response = {} if request_type == 'changeTagFormat': try: tagformat_settings = session_param_get(req, 'bibedit_tagformat') except KeyError: tagformat_settings = {} tagformat_settings[recid] = data['tagFormat'] session_param_set(req, 'bibedit_tagformat', tagformat_settings) response['resultCode'] = 2 return response def perform_request_holdingpen(request_type, recId, changeId=None): """ A method performing the holdingPen ajax request. The following types of requests can be made:: -getHoldingPenUpdates: retrieving the holding pen updates pending for a given record """ response = {} if request_type == 'getHoldingPenUpdates': changeSet = get_related_hp_changesets(recId) changes = [] for change in changeSet: changes.append((str(change[0]), str(change[1]))) response["changes"] = changes elif request_type == 'getHoldingPenUpdateDetails': # returning the list of changes related to the holding pen update # the format based on what the record difference xtool returns assert(changeId != None) hpContent = get_hp_update_xml(changeId) holdingPenRecord = create_record(hpContent[0], "xm")[0] # databaseRecord = get_record(hpContent[1]) response['record'] = holdingPenRecord response['changeset_number'] = changeId elif request_type == 'deleteHoldingPenChangeset': assert(changeId != None) delete_hp_change(changeId) return response def perform_request_record(req, request_type, recid, uid, data, ln=CFG_SITE_LANG): """Handle 'major' record related requests like fetching, submitting or deleting a record, cancel editing or preparing a record for merging. """ response = {} if request_type == 'newRecord': # Create a new record. new_recid = reserve_record_id() new_type = data['newType'] if new_type == 'empty': # Create a new empty record. create_cache_file(recid, uid) response['resultCode'], response['newRecID'] = 6, new_recid elif new_type == 'template': # Create a new record from XML record template. template_filename = data['templateFilename'] template = get_record_template(template_filename) if not template: response['resultCode'] = 108 else: record = create_record(template)[0] if not record: response['resultCode'] = 109 else: record_add_field(record, '001', controlfield_value=str(new_recid)) create_cache_file(new_recid, uid, record, True) response['resultCode'], response['newRecID'] = 7, new_recid elif new_type == 'clone': # Clone an existing record (from the users cache). existing_cache = cache_exists(recid, uid) if existing_cache: try: record = get_cache_file_contents(recid, uid)[2] except: # if, for example, the cache format was wrong (outdated) record = get_bibrecord(recid) else: # Cache missing. Fall back to using original version. record = get_bibrecord(recid) record_delete_field(record, '001') record_add_field(record, '001', controlfield_value=str(new_recid)) create_cache_file(new_recid, uid, record, True) response['resultCode'], response['newRecID'] = 8, new_recid elif request_type == 'getRecord': # Fetch the record. Possible error situations: # - Non-existing record # - Deleted record # - Record locked by other user # - Record locked by queue # A cache file will be created if it does not exist. # If the cache is outdated (i.e., not based on the latest DB revision), # cacheOutdated will be set to True in the response. record_status = record_exists(recid) existing_cache = cache_exists(recid, uid) read_only_mode = False if data.has_key("inReadOnlyMode"): read_only_mode = data['inReadOnlyMode'] if record_status == 0: response['resultCode'] = 102 elif record_status == -1: response['resultCode'] = 103 elif not read_only_mode and not existing_cache and \ record_locked_by_other_user(recid, uid): response['resultCode'] = 104 elif not read_only_mode and existing_cache and \ cache_expired(recid, uid) and \ record_locked_by_other_user(recid, uid): response['resultCode'] = 104 elif not read_only_mode and record_locked_by_queue(recid): response['resultCode'] = 105 else: if data.get('deleteRecordCache'): delete_cache_file(recid, uid) existing_cache = False pending_changes = [] disabled_hp_changes = {} if read_only_mode: if data.has_key('recordRevision') and data['recordRevision'] != 'sampleValue': record_revision_ts = data['recordRevision'] record_xml = get_marcxml_of_revision(recid, \ record_revision_ts) record = create_record(record_xml)[0] record_revision = timestamp_to_revision(record_revision_ts) pending_changes = [] disabled_hp_changes = {} else: # a normal cacheless retrieval of a record record = get_bibrecord(recid) record_revision = get_record_last_modification_date(recid) if record_revision == None: record_revision = datetime.now().timetuple() pending_changes = [] disabled_hp_changes = {} cache_dirty = False mtime = 0 undo_list = [] redo_list = [] elif not existing_cache: record_revision, record = create_cache_file(recid, uid) mtime = get_cache_mtime(recid, uid) pending_changes = [] disabled_hp_changes = {} undo_list = [] redo_list = [] cache_dirty = False else: #TODO: This try except should be replaced with something nicer, # like an argument indicating if a new cache file is to # be created try: cache_dirty, record_revision, record, pending_changes, \ disabled_hp_changes, undo_list, redo_list = \ get_cache_file_contents(recid, uid) touch_cache_file(recid, uid) mtime = get_cache_mtime(recid, uid) if not latest_record_revision(recid, record_revision) and \ get_record_revisions(recid) != (): # This sould prevent from using old cache in case of # viewing old version. If there are no revisions, # it means we should skip this step because this # is a new record response['cacheOutdated'] = True except: record_revision, record = create_cache_file(recid, uid) mtime = get_cache_mtime(recid, uid) pending_changes = [] disabled_hp_changes = {} cache_dirty = False undo_list = [] redo_list = [] if data.get('clonedRecord',''): response['resultCode'] = 9 else: response['resultCode'] = 3 revision_author = get_record_revision_author(recid, record_revision) latest_revision = get_record_last_modification_date(recid) if latest_revision == None: latest_revision = datetime.now().timetuple() last_revision_ts = revision_to_timestamp(latest_revision) revisions_history = get_record_revision_timestamps(recid) number_of_physical_copies = get_number_copies(recid) bibcirc_details_URL = create_item_details_url(recid, ln) can_have_copies = can_record_have_physical_copies(recid) # For some collections, merge template with record template_to_merge = extend_record_with_template(recid) if template_to_merge: record = merge_record_with_template(record, template_to_merge) create_cache_file(recid, uid, record, True) response['cacheDirty'], response['record'], \ response['cacheMTime'], response['recordRevision'], \ response['revisionAuthor'], response['lastRevision'], \ response['revisionsHistory'], response['inReadOnlyMode'], \ response['pendingHpChanges'], response['disabledHpChanges'], \ response['undoList'], response['redoList'] = cache_dirty, \ record, mtime, revision_to_timestamp(record_revision), \ revision_author, last_revision_ts, revisions_history, \ read_only_mode, pending_changes, disabled_hp_changes, \ undo_list, redo_list response['numberOfCopies'] = number_of_physical_copies response['bibCirculationUrl'] = bibcirc_details_URL response['canRecordHavePhysicalCopies'] = can_have_copies # Set tag format from user's session settings. try: tagformat_settings = session_param_get(req, 'bibedit_tagformat') tagformat = tagformat_settings[recid] except KeyError: tagformat = CFG_BIBEDIT_TAG_FORMAT response['tagFormat'] = tagformat # KB information response['KBSubject'] = CFG_BIBEDIT_KB_SUBJECTS response['KBInstitution'] = CFG_BIBEDIT_KB_INSTITUTIONS elif request_type == 'submit': # Submit the record. Possible error situations: # - Missing cache file # - Cache file modified in other editor # - Record locked by other user # - Record locked by queue # - Invalid XML characters # If the cache is outdated cacheOutdated will be set to True in the # response. if not cache_exists(recid, uid): response['resultCode'] = 106 elif not get_cache_mtime(recid, uid) == data['cacheMTime']: response['resultCode'] = 107 elif cache_expired(recid, uid) and \ record_locked_by_other_user(recid, uid): response['resultCode'] = 104 elif record_locked_by_queue(recid): response['resultCode'] = 105 else: try: tmp_result = get_cache_file_contents(recid, uid) record_revision = tmp_result[1] record = tmp_result[2] pending_changes = tmp_result[3] # disabled_changes = tmp_result[4] xml_record = wash_for_xml(print_rec(record)) record, status_code, list_of_errors = create_record(xml_record) # Simulate upload to catch errors errors_upload = perform_upload_check(xml_record, '--replace') if not user_can_edit_record_collection(req, recid): errors_upload += CFG_BIBEDIT_MSG["not_authorised"] if errors_upload: response['resultCode'], response['errors'] = 113, \ errors_upload elif status_code == 0: response['resultCode'], response['errors'] = 110, \ list_of_errors elif not data['force'] and \ not latest_record_revision(recid, record_revision): response['cacheOutdated'] = True if CFG_DEVEL_SITE: response['record_revision'] = record_revision.__str__() response['newest_record_revision'] = \ get_record_last_modification_date(recid).__str__() else: save_xml_record(recid, uid) response['resultCode'] = 4 except Exception, e: response['resultCode'] = CFG_BIBEDIT_AJAX_RESULT_CODES_REV[ \ 'error_wrong_cache_file_format'] if CFG_DEVEL_SITE: # return debug information in the request response['exception_message'] = e.__str__() elif request_type == 'revert': revId = data['revId'] job_date = "%s-%s-%s %s:%s:%s" % re_revdate_split.search(revId).groups() revision_xml = get_marcxml_of_revision(recid, job_date) save_xml_record(recid, uid, revision_xml) if (cache_exists(recid, uid)): delete_cache_file(recid, uid) response['resultCode'] = 4 elif request_type == 'cancel': # Cancel editing by deleting the cache file. Possible error situations: # - Cache file modified in other editor if cache_exists(recid, uid): if get_cache_mtime(recid, uid) == data['cacheMTime']: delete_cache_file(recid, uid) response['resultCode'] = 5 else: response['resultCode'] = 107 else: response['resultCode'] = 5 elif request_type == 'deleteRecord': # Submit the record. Possible error situations: # - Record locked by other user # - Record locked by queue # As the user is requesting deletion we proceed even if the cache file # is missing and we don't check if the cache is outdated or has # been modified in another editor. existing_cache = cache_exists(recid, uid) pending_changes = [] if has_copies(recid): response['resultCode'] = \ CFG_BIBEDIT_AJAX_RESULT_CODES_REV['error_physical_copies_exist'] elif existing_cache and cache_expired(recid, uid) and \ record_locked_by_other_user(recid, uid): response['resultCode'] = \ CFG_BIBEDIT_AJAX_RESULT_CODES_REV['error_rec_locked_by_user'] elif record_locked_by_queue(recid): response['resultCode'] = \ CFG_BIBEDIT_AJAX_RESULT_CODES_REV['error_rec_locked_by_queue'] else: if not existing_cache: record_revision, record, pending_changes, \ deactivated_hp_changes, undo_list, redo_list = \ create_cache_file(recid, uid) else: try: record_revision, record, pending_changes, \ deactivated_hp_changes, undo_list, redo_list = \ get_cache_file_contents(recid, uid)[1:] except: record_revision, record, pending_changes, \ deactivated_hp_changes = create_cache_file(recid, uid) record_add_field(record, '980', ' ', ' ', '', [('c', 'DELETED')]) undo_list = [] redo_list = [] update_cache_file_contents(recid, uid, record_revision, record, \ pending_changes, \ deactivated_hp_changes, undo_list, \ redo_list) save_xml_record(recid, uid) delete_related_holdingpen_changes(recid) # we don't need any changes # related to a deleted record response['resultCode'] = 10 elif request_type == 'deleteRecordCache': # Delete the cache file. Ignore the request if the cache has been # modified in another editor. if data.has_key('cacheMTime'): if cache_exists(recid, uid) and get_cache_mtime(recid, uid) == \ data['cacheMTime']: delete_cache_file(recid, uid) response['resultCode'] = 11 elif request_type == 'prepareRecordMerge': # We want to merge the cache with the current DB version of the record, # so prepare an XML file from the file cache, to be used by BibMerge. # Possible error situations: # - Missing cache file # - Record locked by other user # - Record locked by queue # We don't check if cache is outdated (a likely scenario for this # request) or if it has been modified in another editor. if not cache_exists(recid, uid): response['resultCode'] = 106 elif cache_expired(recid, uid) and \ record_locked_by_other_user(recid, uid): response['resultCode'] = 104 elif record_locked_by_queue(recid): response['resultCode'] = 105 else: save_xml_record(recid, uid, to_upload=False, to_merge=True) response['resultCode'] = 12 return response def perform_request_update_record(request_type, recid, uid, cacheMTime, data, \ hpChanges, undoRedoOp, isBulk=False): """ Handle record update requests like adding, modifying, moving or deleting of fields or subfields. Possible common error situations:: - Missing cache file - Cache file modified in other editor @param undoRedoOp: Indicates in "undo"/"redo"/undo_descriptor operation is performed by a current request. """ response = {} if not cache_exists(recid, uid): response['resultCode'] = 106 elif not get_cache_mtime(recid, uid) == cacheMTime and isBulk == False: # In case of a bulk request, the changes are deliberately performed # immediately one after another response['resultCode'] = 107 else: try: record_revision, record, pending_changes, deactivated_hp_changes, \ undo_list, redo_list = get_cache_file_contents(recid, uid)[1:] except: response['resultCode'] = CFG_BIBEDIT_AJAX_RESULT_CODES_REV[ \ 'error_wrong_cache_file_format'] return response # process all the Holding Pen changes operations ... regardles the # request type # import rpdb2; # rpdb2.start_embedded_debugger('password', fAllowRemote=True) if hpChanges.has_key("toDisable"): for changeId in hpChanges["toDisable"]: pending_changes[changeId]["applied_change"] = True if hpChanges.has_key("toEnable"): for changeId in hpChanges["toEnable"]: pending_changes[changeId]["applied_change"] = False if hpChanges.has_key("toOverride"): pending_changes = hpChanges["toOverride"] if hpChanges.has_key("changesetsToDeactivate"): for changesetId in hpChanges["changesetsToDeactivate"]: deactivated_hp_changes[changesetId] = True if hpChanges.has_key("changesetsToActivate"): for changesetId in hpChanges["changesetsToActivate"]: deactivated_hp_changes[changesetId] = False # processing the undo/redo entries if undoRedoOp == "undo": try: redo_list = [undo_list[-1]] + redo_list undo_list = undo_list[:-1] except: raise Exception("An exception occured when undoing previous" + \ " operation. Undo list: " + str(undo_list) + \ " Redo list " + str(redo_list)) elif undoRedoOp == "redo": try: undo_list = undo_list + [redo_list[0]] redo_list = redo_list[1:] except: raise Exception("An exception occured when redoing previous" + \ " operation. Undo list: " + str(undo_list) + \ " Redo list " + str(redo_list)) else: # This is a genuine operation - we have to add a new descriptor # to the undo list and cancel the redo unless the operation is # a bulk operation if undoRedoOp != None: undo_list = undo_list + [undoRedoOp] redo_list = [] else: assert isBulk == True field_position_local = data.get('fieldPosition') if field_position_local is not None: field_position_local = int(field_position_local) if request_type == 'otherUpdateRequest': # An empty request. Might be useful if we want to perform # operations that require only the actions performed globally, # like modifying the holdingPen changes list response['resultCode'] = CFG_BIBEDIT_AJAX_RESULT_CODES_REV[ \ 'editor_modifications_changed'] elif request_type == 'deactivateHoldingPenChangeset': # the changeset has been marked as processed ( user applied it in # the editor). Marking as used in the cache file. # CAUTION: This function has been implemented here because logically # it fits with the modifications made to the cache file. # No changes are made to the Holding Pen physically. The # changesets are related to the cache because we want to # cancel the removal every time the cache disappears for # any reason response['resultCode'] = CFG_BIBEDIT_AJAX_RESULT_CODES_REV[ \ 'disabled_hp_changeset'] elif request_type == 'addField': if data['controlfield']: record_add_field(record, data['tag'], controlfield_value=data['value']) response['resultCode'] = 20 else: record_add_field(record, data['tag'], data['ind1'], data['ind2'], subfields=data['subfields'], field_position_local=field_position_local) response['resultCode'] = 21 elif request_type == 'addSubfields': subfields = data['subfields'] for subfield in subfields: record_add_subfield_into(record, data['tag'], subfield[0], subfield[1], subfield_position=None, field_position_local=field_position_local) if len(subfields) == 1: response['resultCode'] = 22 else: response['resultCode'] = 23 elif request_type == 'addFieldsSubfieldsOnPositions': #1) Sorting the fields by their identifiers fieldsToAdd = data['fieldsToAdd'] subfieldsToAdd = data['subfieldsToAdd'] for tag in fieldsToAdd.keys(): positions = fieldsToAdd[tag].keys() positions.sort() for position in positions: # now adding fields at a position isControlfield = (len(fieldsToAdd[tag][position][0]) == 0) # if there are n subfields, this is a control field if isControlfield: controlfieldValue = fieldsToAdd[tag][position][3] record_add_field(record, tag, field_position_local = \ int(position), \ controlfield_value = \ controlfieldValue) else: subfields = fieldsToAdd[tag][position][0] ind1 = fieldsToAdd[tag][position][1] ind2 = fieldsToAdd[tag][position][2] record_add_field(record, tag, ind1, ind2, subfields = \ subfields, field_position_local = \ int(position)) # now adding the subfields for tag in subfieldsToAdd.keys(): for fieldPosition in subfieldsToAdd[tag].keys(): #now the fields #order not important ! subfieldsPositions = subfieldsToAdd[tag][fieldPosition]. \ keys() subfieldsPositions.sort() for subfieldPosition in subfieldsPositions: subfield = subfieldsToAdd[tag][fieldPosition]\ [subfieldPosition] record_add_subfield_into(record, tag, subfield[0], \ subfield[1], \ subfield_position = \ int(subfieldPosition), \ field_position_local = \ int(fieldPosition)) response['resultCode'] = \ CFG_BIBEDIT_AJAX_RESULT_CODES_REV['added_positioned_subfields'] elif request_type == 'modifyField': # changing the field structure # first remove subfields and then add new... change the indices subfields = data['subFields'] # parse the JSON representation of # the subfields here new_field = create_field(subfields, data['ind1'], data['ind2']) record_replace_field(record, data['tag'], new_field, \ field_position_local = data['fieldPosition']) response['resultCode'] = 26 elif request_type == 'modifyContent': if data['subfieldIndex'] != None: record_modify_subfield(record, data['tag'], data['subfieldCode'], data['value'], int(data['subfieldIndex']), field_position_local=field_position_local) else: record_modify_controlfield(record, data['tag'], data["value"], field_position_local=field_position_local) response['resultCode'] = 24 elif request_type == 'modifySubfieldTag': record_add_subfield_into(record, data['tag'], data['subfieldCode'], data["value"], subfield_position= int(data['subfieldIndex']), field_position_local=field_position_local) record_delete_subfield_from(record, data['tag'], int(data['subfieldIndex']) + 1, field_position_local=field_position_local) response['resultCode'] = 24 elif request_type == 'modifyFieldTag': subfields = record_get_subfields(record, data['oldTag'], field_position_local=field_position_local) record_add_field(record, data['newTag'], data['ind1'], data['ind2'] , subfields=subfields) record_delete_field(record, data['oldTag'], ind1=data['oldInd1'], \ ind2=data['oldInd2'], field_position_local=field_position_local) response['resultCode'] = 32 elif request_type == 'moveSubfield': record_move_subfield(record, data['tag'], int(data['subfieldIndex']), int(data['newSubfieldIndex']), field_position_local=field_position_local) response['resultCode'] = 25 elif request_type == 'moveField': if data['direction'] == 'up': final_position_local = field_position_local-1 else: # direction is 'down' final_position_local = field_position_local+1 record_move_fields(record, data['tag'], [field_position_local], final_position_local) response['resultCode'] = 32 elif request_type == 'deleteFields': to_delete = data['toDelete'] deleted_fields = 0 deleted_subfields = 0 for tag in to_delete: #Sorting the fields in a edcreasing order by the local position! fieldsOrder = to_delete[tag].keys() fieldsOrder.sort(lambda a, b: int(b) - int(a)) for field_position_local in fieldsOrder: if not to_delete[tag][field_position_local]: # No subfields specified - delete entire field. record_delete_field(record, tag, field_position_local=int(field_position_local)) deleted_fields += 1 else: for subfield_position in \ to_delete[tag][field_position_local][::-1]: # Delete subfields in reverse order (to keep the # indexing correct). record_delete_subfield_from(record, tag, int(subfield_position), field_position_local=int(field_position_local)) deleted_subfields += 1 if deleted_fields == 1 and deleted_subfields == 0: response['resultCode'] = 26 elif deleted_fields and deleted_subfields == 0: response['resultCode'] = 27 elif deleted_subfields == 1 and deleted_fields == 0: response['resultCode'] = 28 elif deleted_subfields and deleted_fields == 0: response['resultCode'] = 29 else: response['resultCode'] = 30 response['cacheMTime'], response['cacheDirty'] = \ update_cache_file_contents(recid, uid, record_revision, record, \ pending_changes, \ deactivated_hp_changes, \ undo_list, redo_list), \ True return response def perform_request_autocomplete(request_type, recid, uid, data): """ Perfrom an AJAX request associated with the retrieval of autocomplete data. @param request_type: Type of the currently served request @param recid: the identifer of the record @param uid: The identifier of the user being currently logged in @param data: The request data containing possibly important additional arguments """ response = {} # get the values based on which one needs to search searchby = data['value'] #we check if the data is properly defined fulltag = '' if data.has_key('maintag') and data.has_key('subtag1') and \ data.has_key('subtag2') and data.has_key('subfieldcode'): maintag = data['maintag'] subtag1 = data['subtag1'] subtag2 = data['subtag2'] u_subtag1 = subtag1 u_subtag2 = subtag2 if (not subtag1) or (subtag1 == ' '): u_subtag1 = '_' if (not subtag2) or (subtag2 == ' '): u_subtag2 = '_' subfieldcode = data['subfieldcode'] fulltag = maintag+u_subtag1+u_subtag2+subfieldcode if (request_type == 'autokeyword'): #call the keyword-form-ontology function if fulltag and searchby: items = get_kbt_items_for_bibedit(CFG_BIBEDIT_KEYWORD_TAXONOMY, \ CFG_BIBEDIT_KEYWORD_RDFLABEL, \ searchby) response['autokeyword'] = items if (request_type == 'autosuggest'): #call knowledge base function to put the suggestions in an array.. if fulltag and searchby and len(searchby) > 3: #add trailing '*' wildcard for 'search_unit_in_bibxxx()' if not already present suggest_values = get_kbd_values_for_bibedit(fulltag, "", searchby+"*") #remove .. new_suggest_vals = [] for sugg in suggest_values: if sugg.startswith(searchby): new_suggest_vals.append(sugg) response['autosuggest'] = new_suggest_vals if (request_type == 'autocomplete'): #call the values function with the correct kb_name if CFG_BIBEDIT_AUTOCOMPLETE_TAGS_KBS.has_key(fulltag): kbname = CFG_BIBEDIT_AUTOCOMPLETE_TAGS_KBS[fulltag] #check if the seachby field has semicolons. Take all #the semicolon-separated items.. items = [] vals = [] if searchby: if searchby.rfind(';'): items = searchby.split(';') else: items = [searchby.strip()] for item in items: item = item.strip() kbrvals = get_kbr_values(kbname, item, '', 'e') #we want an exact match if kbrvals and kbrvals[0]: #add the found val into vals vals.append(kbrvals[0]) #check that the values are not already contained in other #instances of this field record = get_cache_file_contents(recid, uid)[2] xml_rec = wash_for_xml(print_rec(record)) record, status_code, dummy_errors = create_record(xml_rec) existing_values = [] if (status_code != 0): existing_values = record_get_field_values(record, maintag, subtag1, subtag2, subfieldcode) #get the new values.. i.e. vals not in existing new_vals = vals for val in new_vals: if val in existing_values: new_vals.remove(val) response['autocomplete'] = new_vals response['resultCode'] = CFG_BIBEDIT_AJAX_RESULT_CODES_REV['autosuggestion_scanned'] return response def perform_request_bibcatalog(request_type, recid, uid): """Handle request to BibCatalog (RT). """ response = {} if request_type == 'getTickets': # Insert the ticket data in the response, if possible if bibcatalog_system is None: response['tickets'] = "" elif bibcatalog_system and uid: bibcat_resp = bibcatalog_system.check_system(uid) if bibcat_resp == "": tickets_found = bibcatalog_system.ticket_search(uid, \ status=['new', 'open'], recordid=recid) t_url_str = '' #put ticket urls here, formatted for HTML display for t_id in tickets_found: #t_url = bibcatalog_system.ticket_get_attribute(uid, \ # t_id, 'url_display') ticket_info = bibcatalog_system.ticket_get_info( \ uid, t_id, ['url_display', 'url_close']) t_url = ticket_info['url_display'] t_close_url = ticket_info['url_close'] #format.. t_url_str += "#" + str(t_id) + '[read] [close]
' #put ticket header and tickets links in the box t_url_str = "Tickets
" + t_url_str + \ "
" + '" response['resultCode'] = 31 return response def perform_request_preview_record(request_type, recid, uid, data): """ Handle request to preview record with formatting """ response = {} if request_type == "preview": if cache_exists(recid, uid): dummy1, dummy2, record, dummy3, dummy4, dummy5, dummy6 = get_cache_file_contents(recid, uid) else: record = get_bibrecord(recid) # clean the record from unfilled volatile fields record_strip_empty_volatile_subfields(record) record_strip_empty_fields(record) response['html_preview'] = _get_formated_record(record, data['new_window']) # clean the record from unfilled volatile fields record_strip_empty_volatile_subfields(record) record_strip_empty_fields(record) response['html_preview'] = _get_formated_record(record, data['new_window']) return response def perform_request_get_pdf_url(recid): """ Handle request to get the URL of the attached PDF """ response = {} rec_info = BibRecDocs(recid) docs = rec_info.list_bibdocs() try: doc = docs[0] response['pdf_url'] = doc.get_file('pdf').get_url() - except (IndexError, InvenioWebSubmitFileError): + except (IndexError, InvenioBibDocFileError): # FIXME, return here some information about error. # We could allow the user to specify a URl and add the FFT tags automatically response['pdf_url'] = '' return response def perform_request_record_has_pdf(recid, uid): """ Check if record has a pdf attached """ rec_info = BibRecDocs(recid) docs = rec_info.list_bibdocs() return {'record_has_pdf': bool(docs)} def _get_formated_record(record, new_window): """Returns a record in a given format @param record: BibRecord object @param new_window: Boolean, indicates if it is needed to add all the headers to the page (used when clicking Preview button) """ from invenio.config import CFG_WEBSTYLE_TEMPLATE_SKIN xml_record = wash_for_xml(bibrecord.record_xml_output(record)) result = '' if new_window: result = """ Record preview """%{'cssurl': CFG_SITE_URL, 'cssskin': CFG_WEBSTYLE_TEMPLATE_SKIN != 'default' and '_' + CFG_WEBSTYLE_TEMPLATE_SKIN or '' } result += get_mathjax_header(True) + '' result += "

Brief format preview


" result += bibformat.format_record(recID=None, of="hb", xml_record=xml_record) + "
" result += "

Detailed format preview


" result += bibformat.format_record(recID=None, of="hd", xml_record=xml_record) #Preview references result += "

References


" result += bibformat.format_record(0, 'hdref', xml_record=xml_record) result += """ """ if new_window: result += "" return result ########### Functions related to templates web interface ############# def perform_request_init_template_interface(): """Handle a request to manage templates""" errors = [] warnings = [] body = '' # Add script data. record_templates = get_record_templates() record_templates.sort() data = {'gRECORD_TEMPLATES': record_templates, 'gSITE_RECORD': '"' + CFG_SITE_RECORD + '"', 'gSITE_URL': '"' + CFG_SITE_URL + '"'} body += '\n' # Add scripts (the ordering is NOT irrelevant). scripts = ['jquery-ui.min.js', 'json2.js', 'bibedit_display.js', 'bibedit_template_interface.js'] for script in scripts: body += ' \n' % (CFG_SITE_URL, script) body += '
\n' body += '
\n' return body, errors, warnings def perform_request_ajax_template_interface(data): """Handle Ajax requests by redirecting to appropriate function.""" response = {} request_type = data['requestType'] if request_type == 'editTemplate': # Edit a template request. response.update(perform_request_edit_template(data)) return response def perform_request_edit_template(data): """ Handle request to edit a template """ response = {} template_filename = data['templateFilename'] template = get_record_template(template_filename) if not template: response['resultCode'] = 1 else: response['templateMARCXML'] = template return response diff --git a/modules/bibformat/lib/elements/bfe_edit_files.py b/modules/bibformat/lib/elements/bfe_edit_files.py index af88d9405..9f61dc409 100644 --- a/modules/bibformat/lib/elements/bfe_edit_files.py +++ b/modules/bibformat/lib/elements/bfe_edit_files.py @@ -1,59 +1,59 @@ # -*- coding: utf-8 -*- ## ## This file is part of Invenio. ## Copyright (C) 2010, 2011 CERN. ## ## Invenio is free software; you can redistribute it and/or ## modify it under the terms of the GNU General Public License as ## published by the Free Software Foundation; either version 2 of the ## License, or (at your option) any later version. ## ## Invenio is distributed in the hope that it will be useful, but ## WITHOUT ANY WARRANTY; without even the implied warranty of ## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ## General Public License for more details. ## ## You should have received a copy of the GNU General Public License ## along with Invenio; if not, write to the Free Software Foundation, Inc., ## 59 Temple Place, Suite 330, Boston, MA 02111-1307, USA. """BibFormat element - Prints a link to BibDocFile """ __revision__ = "$Id$" from invenio.urlutils import create_html_link from invenio.messages import gettext_set_language -from invenio.config import CFG_SITE_URL +from invenio.config import CFG_SITE_URL, CFG_SITE_RECORD from invenio.access_control_engine import acc_authorize_action def format_element(bfo, style): """ Prints a link to simple file management interface (BibDocFile), if authorization is granted. @param style: the CSS style to be applied to the link. """ _ = gettext_set_language(bfo.lang) out = "" user_info = bfo.user_info (auth_code, auth_message) = acc_authorize_action(user_info, 'runbibdocfile') if auth_code == 0: linkattrd = {} if style != '': linkattrd['style'] = style - out += create_html_link(CFG_SITE_URL + '/submit/managedocfiles', + out += create_html_link(CFG_SITE_URL + '/%s/managedocfiles' % CFG_SITE_RECORD, urlargd={'ln': bfo.lang, 'recid': str(bfo.recID)}, link_label=_("Manage Files of This Record"), linkattrd=linkattrd) return out def escape_values(bfo): """ Called by BibFormat in order to check if output of this element should be escaped. """ return 0 diff --git a/modules/bibformat/lib/elements/bfe_fulltext.py b/modules/bibformat/lib/elements/bfe_fulltext.py index a9ba5be1b..eff1193ba 100644 --- a/modules/bibformat/lib/elements/bfe_fulltext.py +++ b/modules/bibformat/lib/elements/bfe_fulltext.py @@ -1,314 +1,314 @@ # -*- coding: utf-8 -*- ## ## This file is part of Invenio. ## Copyright (C) 2006, 2007, 2008, 2009, 2010, 2011, 2012 CERN. ## ## Invenio is free software; you can redistribute it and/or ## modify it under the terms of the GNU General Public License as ## published by the Free Software Foundation; either version 2 of the ## License, or (at your option) any later version. ## ## Invenio is distributed in the hope that it will be useful, but ## WITHOUT ANY WARRANTY; without even the implied warranty of ## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ## General Public License for more details. ## ## You should have received a copy of the GNU General Public License ## along with Invenio; if not, write to the Free Software Foundation, Inc., ## 59 Temple Place, Suite 330, Boston, MA 02111-1307, USA. """BibFormat element - Prints a links to fulltext """ __revision__ = "$Id$" import re from invenio.bibdocfile import BibRecDocs, file_strip_ext, normalize_format, compose_format from invenio.messages import gettext_set_language from invenio.config import CFG_SITE_URL, CFG_CERN_SITE, CFG_SITE_RECORD, \ CFG_BIBFORMAT_HIDDEN_FILE_FORMATS -from invenio.websubmit_config import CFG_WEBSUBMIT_ICON_SUBFORMAT_RE +from invenio.bibdocfile_config import CFG_BIBDOCFILE_ICON_SUBFORMAT_RE from cgi import escape, parse_qs from urlparse import urlparse from os.path import basename import urllib _CFG_NORMALIZED_BIBFORMAT_HIDDEN_FILE_FORMATS = set(normalize_format(fmt) for fmt in CFG_BIBFORMAT_HIDDEN_FILE_FORMATS) cern_arxiv_categories = ["astro-ph", "chao-dyn", "cond-mat", "gr-qc", "hep-ex", "hep-lat", "hep-ph", "hep-th", "math-ph", "math", "nucl-ex", "nucl-th", "out", "physics", "quant-ph", "q-alg", "cs", "adap-org", "comp-gas", "chem-ph", "cs", "math", "neuro-sys", "patt-sol", "solv-int", "acc-phys", "alg-geom", "ao-sci", "atom-ph", "cmp-lg", "dg-ga", "funct-an", "mtrl-th", "plasm-ph", "q-alg", "supr-con"] def format_element(bfo, style, separator='; ', show_icons='no', focus_on_main_file='no', show_subformat_icons='no'): """ This is the default format for formatting fulltext links. When possible, it returns only the main file(s) (+ link to additional files if needed). If no distinction is made at submission time between main and additional files, returns all the files @param separator: the separator between urls. @param style: CSS class of the link @param show_icons: if 'yes', print icons for fulltexts @param focus_on_main_file: if 'yes' and a doctype 'Main' is found, prominently display this doctype. In that case other doctypes are summarized with a link to the Files tab, named "Additional files" @param show_subformat_icons: shall we display subformats considered as icons? """ _ = gettext_set_language(bfo.lang) out = '' # Retrieve files (parsed_urls, old_versions, additionals) = get_files(bfo, \ distinguish_main_and_additional_files=focus_on_main_file.lower() == 'yes', include_subformat_icons=show_subformat_icons == 'yes') main_urls = parsed_urls['main_urls'] others_urls = parsed_urls['others_urls'] if parsed_urls.has_key('cern_urls'): cern_urls = parsed_urls['cern_urls'] # Prepare style and icon if style != "": style = 'class="'+style+'"' if show_icons.lower() == 'yes': file_icon = '%s' % (CFG_SITE_URL, _("Download fulltext")) else: file_icon = '' # Build urls list. # Escape special chars for
tag value. additional_str = '' if additionals: additional_str = ' (%s)' % _("additional files") versions_str = '' #if old_versions: #versions_str = ' (%s)' % _("older versions") if main_urls: main_urls_keys = sort_alphanumerically(main_urls.keys()) for descr in main_urls_keys: urls = main_urls[descr] if re.match(r'^\d+\s', descr) and urls[0][2] == 'png': # FIXME: we have probably hit a Plot (as link # description looks like '0001 This is Caption'), so # do not take it. This test is not ideal, we should # rather study doc type, and base ourselves on # Main/Additional/Plot etc. continue out += "%s: " % descr urls_dict = {} for url, name, url_format in urls: if name not in urls_dict: urls_dict[name] = [(url, url_format)] else: urls_dict[name].append((url, url_format)) for name, urls_and_format in urls_dict.items(): if len(urls_dict) > 1: print_name = "%s - " % name url_list = [print_name] else: url_list = [] for url, url_format in urls_and_format: if CFG_CERN_SITE and url_format == 'ps.gz' and len(urls_and_format) > 1: ## We skip old PS.GZ files continue url_list.append('%(file_icon)s%(url_format)s' % { 'style': style, 'url': escape(url, True), 'file_icon': file_icon, 'url_format': escape(url_format.upper()) }) out += " ".join(url_list) + additional_str + versions_str + separator if CFG_CERN_SITE and cern_urls: link_word = len(cern_urls) == 1 and _('%(x_sitename)s link') or _('%(x_sitename)s links') out += '%s: ' % (link_word % {'x_sitename': 'CERN'}) url_list = [] for url, descr in cern_urls: url_list.append(''+ \ file_icon + escape(str(descr))+'') out += separator.join(url_list) if others_urls: external_link = len(others_urls) == 1 and _('external link') or _('external links') out += '%s: ' % external_link.capitalize() url_list = [] for url, descr in others_urls: url_list.append(''+ \ file_icon + escape(str(descr))+'') out += separator.join(url_list) + '
' if out.endswith('
'): out = out[:-len('
')] # When exported to text (eg. in WebAlert emails) we do not want to # display the link to the fulltext: if out: out = '' + out + '' return out def escape_values(bfo): """ Called by BibFormat in order to check if output of this element should be escaped. """ return 0 def get_files(bfo, distinguish_main_and_additional_files=True, include_subformat_icons=False): """ Returns the files available for the given record. Returned structure is a tuple (parsed_urls, old_versions, additionals): - parsed_urls: contains categorized URLS (see details below) - old_versions: set to True if we can have access to old versions - additionals: set to True if we have other documents than the 'main' document Parameter 'include_subformat_icons' decides if subformat considered as icons should be returned 'parsed_urls' is a dictionary in the form:: {'main_urls' : {'Main' : [('http://CFG_SITE_URL/CFG_SITE_RECORD/1/files/aFile.pdf', 'aFile', 'PDF'), ('http://CFG_SITE_URL/CFG_SITE_RECORD/1/files/aFile.gif', 'aFile', 'GIF')], 'Additional': [('http://CFG_SITE_URL/CFG_SITE_RECORD/1/files/bFile.pdf', 'bFile', 'PDF')]}, 'other_urls': [('http://externalurl.com/aFile.pdf', 'Fulltext'), # url(8564_u), description(8564_z/y) ('http://externalurl.com/bFile.pdf', 'Fulltext')], 'cern_urls' : [('http://cern.ch/aFile.pdf', 'Fulltext'), # url(8564_u), description(8564_z/y) ('http://cern.ch/bFile.pdf', 'Fulltext')], } Some notes about returned structure: - key 'cern_urls' is only available on CERN site - keys in main_url dictionaries are defined by the BibDoc. - older versions are not part of the parsed urls - returns only main files when possible, that is when doctypes make a distinction between 'Main' files and other files. Otherwise returns all the files as main. This is only enabled if distinguish_main_and_additional_files is set to True """ _ = gettext_set_language(bfo.lang) urls = bfo.fields("8564_") bibarchive = BibRecDocs(bfo.recID) old_versions = False # We can provide link to older files. Will be # set to True if older files are found. additionals = False # We have additional files. Will be set to # True if additional files are found. # Prepare object to return parsed_urls = {'main_urls':{}, # Urls hosted by Invenio (bibdocs) 'others_urls':[] # External urls } if CFG_CERN_SITE: parsed_urls['cern_urls'] = [] # cern.ch urls # Doctypes can of any type, but when there is one file marked as # 'Main', we consider that there is a distinction between "main" # and "additional" files. Otherwise they will all be considered # equally as main files distinct_main_and_additional_files = False if len(bibarchive.list_bibdocs(doctype='Main')) > 0 and \ distinguish_main_and_additional_files: distinct_main_and_additional_files = True # Parse URLs for complete_url in urls: if complete_url.has_key('u'): url = complete_url['u'] (dummy, host, path, dummy, params, dummy) = urlparse(url) subformat = complete_url.get('x', '') filename = urllib.unquote(basename(path)) name = file_strip_ext(filename) url_format = filename[len(name):] if url_format.startswith('.'): url_format = url_format[1:] if compose_format(url_format, subformat) in _CFG_NORMALIZED_BIBFORMAT_HIDDEN_FILE_FORMATS: ## This format should be hidden. continue descr = _("Fulltext") if complete_url.has_key('y'): descr = complete_url['y'] if descr == 'Fulltext': descr = _("Fulltext") if not url.startswith(CFG_SITE_URL): # Not a bibdoc? if not descr: # For not bibdoc let's have a description # Display the URL in full: descr = url if CFG_CERN_SITE and 'cern.ch' in host and \ ('/setlink?' in url or \ 'cms' in host or \ 'documents.cern.ch' in url or \ 'doc.cern.ch' in url or \ 'preprints.cern.ch' in url): url_params_dict = dict([part.split('=') for part in params.split('&') if len(part.split('=')) == 2]) if url_params_dict.has_key('categ') and \ (url_params_dict['categ'].split('.', 1)[0] in cern_arxiv_categories) and \ url_params_dict.has_key('id'): # Old arXiv links, used to be handled by # setlink. Provide direct links to arXiv for file_format, label in [('pdf', "PDF")]:#, #('ps', "PS"), #('e-print', "Source (generally TeX or LaTeX)"), #('abs', "Abstract")]: url = "http://arxiv.org/%(format)s/%(category)s/%(id)s" % \ {'format': file_format, 'category': url_params_dict['categ'], 'id': url_params_dict['id']} parsed_urls['others_urls'].append((url, "%s/%s %s" % \ (url_params_dict['categ'], url_params_dict['id'], label))) else: parsed_urls['others_urls'].append((url, descr)) # external url else: # It's a bibdoc! assigned = False for doc in bibarchive.list_bibdocs(): if int(doc.get_latest_version()) > 1: old_versions = True if True in [f.fullname.startswith(filename) \ for f in doc.list_all_files()]: assigned = True if not include_subformat_icons and \ - CFG_WEBSUBMIT_ICON_SUBFORMAT_RE.match(subformat): + CFG_BIBDOCFILE_ICON_SUBFORMAT_RE.match(subformat): # This is an icon and we want to skip it continue if not doc.doctype == 'Main' and \ distinct_main_and_additional_files == True: # In that case we record that there are # additional files, but don't add them to # returned structure. additionals = True else: if not descr: descr = _('Fulltext') if not parsed_urls['main_urls'].has_key(descr): parsed_urls['main_urls'][descr] = [] params = parse_qs(params) if 'subformat' in params: url_format += ' (%s)' % params['subformat'][0] parsed_urls['main_urls'][descr].append((url, name, url_format)) if not assigned: # Url is not a bibdoc :-S if not descr: descr = filename parsed_urls['others_urls'].append((url, descr)) # Let's put it in a general other url return (parsed_urls, old_versions, additionals) _RE_SPLIT = re.compile(r"\d+|\D+") def sort_alphanumerically(elements): elements = [([not token.isdigit() and token or int(token) for token in _RE_SPLIT.findall(element)], element) for element in elements] elements.sort() return [element[1] for element in elements] diff --git a/modules/bibupload/lib/bibupload.py b/modules/bibupload/lib/bibupload.py index 83aeae090..55e23dd8f 100644 --- a/modules/bibupload/lib/bibupload.py +++ b/modules/bibupload/lib/bibupload.py @@ -1,2272 +1,2272 @@ # -*- coding: utf-8 -*- ## ## This file is part of Invenio. ## Copyright (C) 2006, 2007, 2008, 2009, 2010, 2011, 2012 CERN. ## ## Invenio is free software; you can redistribute it and/or ## modify it under the terms of the GNU General Public License as ## published by the Free Software Foundation; either version 2 of the ## License, or (at your option) any later version. ## ## Invenio is distributed in the hope that it will be useful, but ## WITHOUT ANY WARRANTY; without even the implied warranty of ## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ## General Public License for more details. ## ## You should have received a copy of the GNU General Public License ## along with Invenio; if not, write to the Free Software Foundation, Inc., ## 59 Temple Place, Suite 330, Boston, MA 02111-1307, USA. """ BibUpload: Receive MARC XML file and update the appropriate database tables according to options. """ __revision__ = "$Id$" import os import re import sys import time from datetime import datetime from zlib import compress import socket import marshal import copy import tempfile import urlparse import urllib2 import urllib from invenio.config import CFG_OAI_ID_FIELD, \ CFG_BIBUPLOAD_REFERENCE_TAG, \ CFG_BIBUPLOAD_EXTERNAL_SYSNO_TAG, \ CFG_BIBUPLOAD_EXTERNAL_OAIID_TAG, \ CFG_BIBUPLOAD_EXTERNAL_OAIID_PROVENANCE_TAG, \ CFG_BIBUPLOAD_STRONG_TAGS, \ CFG_BIBUPLOAD_CONTROLLED_PROVENANCE_TAGS, \ CFG_BIBUPLOAD_SERIALIZE_RECORD_STRUCTURE, \ CFG_BIBUPLOAD_DELETE_FORMATS, \ CFG_SITE_URL, CFG_SITE_RECORD, \ CFG_OAI_PROVENANCE_ALTERED_SUBFIELD from invenio.jsonutils import json, CFG_JSON_AVAILABLE from invenio.bibupload_config import CFG_BIBUPLOAD_CONTROLFIELD_TAGS, \ CFG_BIBUPLOAD_SPECIAL_TAGS from invenio.dbquery import run_sql, \ Error from invenio.bibrecord import create_records, \ record_add_field, \ record_delete_field, \ record_xml_output, \ record_get_field_instances, \ record_get_field_value, \ record_get_field_values, \ field_get_subfield_values, \ field_get_subfield_instances, \ record_modify_subfield, \ record_delete_subfield_from, \ record_delete_fields, \ record_add_subfield_into, \ record_find_field, \ record_extract_oai_id from invenio.search_engine import get_record from invenio.dateutils import convert_datestruct_to_datetext from invenio.errorlib import register_exception from invenio.intbitset import intbitset from invenio.urlutils import make_user_agent_string -from invenio.config import CFG_WEBSUBMIT_FILEDIR +from invenio.config import CFG_BIBDOCFILE_FILEDIR from invenio.bibtask import task_init, write_message, \ task_set_option, task_get_option, task_get_task_param, task_update_status, \ task_update_progress, task_sleep_now_if_required, fix_argv_paths from invenio.bibdocfile import BibRecDocs, file_strip_ext, normalize_format, \ get_docname_from_url, check_valid_url, download_url, \ - KEEP_OLD_VALUE, decompose_bibdocfile_url, InvenioWebSubmitFileError, \ + KEEP_OLD_VALUE, decompose_bibdocfile_url, InvenioBibDocFileError, \ bibdocfile_url_p, CFG_BIBDOCFILE_AVAILABLE_FLAGS, guess_format_from_url from invenio.search_engine import search_pattern #Statistic variables stat = {} stat['nb_records_to_upload'] = 0 stat['nb_records_updated'] = 0 stat['nb_records_inserted'] = 0 stat['nb_errors'] = 0 stat['nb_holdingpen'] = 0 stat['exectime'] = time.localtime() _WRITING_RIGHTS = None CFG_BIBUPLOAD_ALLOWED_SPECIAL_TREATMENTS = ('oracle', ) ## Let's set a reasonable timeout for URL request (e.g. FFT) socket.setdefaulttimeout(40) _re_find_001 = re.compile('\\s*(\\d*)\\s*', re.S) def bibupload_pending_recids(): """This function embed a bit of A.I. and is more a hack than an elegant algorithm. It should be updated in case bibupload/bibsched are modified in incompatible ways. This function return the intbitset of all the records that are being (or are scheduled to be) touched by other bibuploads. """ options = run_sql("""SELECT arguments FROM schTASK WHERE status<>'DONE' AND proc='bibupload' AND (status='RUNNING' OR status='CONTINUING' OR status='WAITING' OR status='SCHEDULED' OR status='ABOUT TO STOP' OR status='ABOUT TO SLEEP')""") ret = intbitset() xmls = [] if options: for arguments in options: arguments = marshal.loads(arguments[0]) for argument in arguments[1:]: if argument.startswith('/'): # XMLs files are recognizable because they're absolute # files... xmls.append(argument) for xmlfile in xmls: # Let's grep for the 001 try: xml = open(xmlfile).read() ret += [int(group[1]) for group in _re_find_001.findall(xml)] except: continue return ret ### bibupload engine functions: def bibupload(record, opt_tag=None, opt_mode=None, opt_stage_to_start_from=1, opt_notimechange=0, oai_rec_id = "", pretend=False): """Main function: process a record and fit it in the tables bibfmt, bibrec, bibrec_bibxxx, bibxxx with proper record metadata. Return (error_code, recID) of the processed record. """ assert(opt_mode in ('insert', 'replace', 'replace_or_insert', 'reference', 'correct', 'append', 'format', 'holdingpen', 'delete')) error = None now = datetime.now() # will hold record creation/modification date # If there are special tags to proceed check if it exists in the record if opt_tag is not None and not(record.has_key(opt_tag)): msg = " Failed: Tag not found, enter a valid tag to update." write_message(msg, verbose=1, stream=sys.stderr) return (1, -1, msg) # Extraction of the Record Id from 001, SYSNO or OAIID tags: rec_id = retrieve_rec_id(record, opt_mode, pretend=pretend) if rec_id == -1: msg = " Failed: either the record already exists and insert was " \ "requested or the record does not exists and " \ "replace/correct/append has been used" write_message(msg, verbose=1, stream=sys.stderr) return (1, -1, msg) elif rec_id > 0: write_message(" -Retrieve record ID (found %s): DONE." % rec_id, verbose=2) if not record.has_key('001'): # Found record ID by means of SYSNO or OAIID, and the # input MARCXML buffer does not have this 001 tag, so we # should add it now: error = record_add_field(record, '001', controlfield_value=rec_id) if error is None: msg = " Failed: Error during adding the 001 controlfield " \ "to the record" write_message(msg, verbose=1, stream=sys.stderr) return (1, int(rec_id), msg) else: error = None write_message(" -Added tag 001: DONE.", verbose=2) write_message(" -Check if the xml marc file is already in the database: DONE" , verbose=2) # Reference mode check if there are reference tag if opt_mode == 'reference': error = extract_tag_from_record(record, CFG_BIBUPLOAD_REFERENCE_TAG) if error is None: msg = " Failed: No reference tags has been found..." write_message(msg, verbose=1, stream=sys.stderr) return (1, -1, msg) else: error = None write_message(" -Check if reference tags exist: DONE", verbose=2) record_deleted_p = False if opt_mode == 'insert' or \ (opt_mode == 'replace_or_insert') and rec_id is None: insert_mode_p = True # Insert the record into the bibrec databases to have a recordId rec_id = create_new_record(pretend=pretend) write_message(" -Creation of a new record id (%d): DONE" % rec_id, verbose=2) # we add the record Id control field to the record error = record_add_field(record, '001', controlfield_value=rec_id) if error is None: msg = " Failed: Error during adding the 001 controlfield " \ "to the record" write_message(msg, verbose=1, stream=sys.stderr) return (1, int(rec_id), msg) else: error = None error = record_add_field(record, '005', controlfield_value=now.strftime("%Y%m%d%H%M%S.0")) if error is None: write_message(" Failed: Error during adding to 005 controlfield to record",verbose=1,stream=sys.stderr) return (1, int(rec_id)) else: error=None elif opt_mode != 'insert' and opt_mode != 'format' and \ opt_stage_to_start_from != 5: insert_mode_p = False # Update Mode # Retrieve the old record to update rec_old = get_record(rec_id) record_had_altered_bit = record_get_field_values(rec_old, CFG_BIBUPLOAD_EXTERNAL_OAIID_TAG[:3], CFG_BIBUPLOAD_EXTERNAL_OAIID_TAG[3], CFG_BIBUPLOAD_EXTERNAL_OAIID_TAG[4], CFG_OAI_PROVENANCE_ALTERED_SUBFIELD) # Also save a copy to restore previous situation in case of errors original_record = get_record(rec_id) if original_record.has_key('005'): record_delete_field(original_record,'005') if rec_old is None: msg = " Failed during the creation of the old record!" write_message(msg, verbose=1, stream=sys.stderr) return (1, int(rec_id), msg) else: write_message(" -Retrieve the old record to update: DONE", verbose=2) if rec_old.has_key('005'): record_delete_field(rec_old,'005') # In Replace mode, take over old strong tags if applicable: if opt_mode == 'replace' or \ opt_mode == 'replace_or_insert': copy_strong_tags_from_old_record(record, rec_old) # Delete tags to correct in the record if opt_mode == 'correct' or opt_mode == 'reference': delete_tags_to_correct(record, rec_old, opt_tag) write_message(" -Delete the old tags to correct in the old record: DONE", verbose=2) # Delete tags specified if in delete mode if opt_mode == 'delete': record = delete_tags(record, rec_old) write_message(" -Delete specified tags in the old record: DONE", verbose=2) # Append new tag to the old record and update the new record with the old_record modified if opt_mode == 'append' or opt_mode == 'correct' or \ opt_mode == 'reference': record = append_new_tag_to_old_record(record, rec_old, opt_tag, opt_mode) write_message(" -Append new tags to the old record: DONE", verbose=2) # 005 tag should be added everytime the record is modified # If an exiting record is modified, its 005 tag should be overwritten with a new revision value if record.has_key('005'): record_delete_field(record, '005') write_message(" Deleted the existing 005 tag.", verbose=2) error = record_add_field(record, '005', controlfield_value=now.strftime("%Y%m%d%H%M%S.0")) if error is None: write_message(" Failed: Error during adding to 005 controlfield to record",verbose=1,stream=sys.stderr) return (1, int(rec_id)) else: error=None write_message(" -Added tag 005: DONE. "+ str(record_get_field_value(record,'005','','')), verbose=2) # if record_had_altered_bit, this must be set to true, since the # record has been altered. if record_had_altered_bit: oai_provenance_fields = record_get_field_instances(record, CFG_BIBUPLOAD_EXTERNAL_OAIID_TAG[:3], CFG_BIBUPLOAD_EXTERNAL_OAIID_TAG[3], CFG_BIBUPLOAD_EXTERNAL_OAIID_TAG[4]) for oai_provenance_field in oai_provenance_fields: for i, (code, dummy_value) in enumerate(oai_provenance_field[0]): if code == CFG_OAI_PROVENANCE_ALTERED_SUBFIELD: oai_provenance_field[0][i] = (code, 'true') # now we clear all the rows from bibrec_bibxxx from the old # record (they will be populated later (if needed) during # stage 4 below): delete_bibrec_bibxxx(rec_old, rec_id, pretend=pretend) record_deleted_p = True write_message(" -Clean bibrec_bibxxx: DONE", verbose=2) write_message(" -Stage COMPLETED", verbose=2) try: if not record_is_valid(record): msg = "ERROR: record is not valid" write_message(msg, verbose=1, stream=sys.stderr) return (1, -1, msg) # Have a look if we have FMT tags we_have_fmt_tags_p = extract_tag_from_record(record, 'FMT') is not None write_message("Stage 1: Start (Insert of FMT tags if exist).", verbose=2) if opt_stage_to_start_from <= 1 and we_have_fmt_tags_p: record = insert_fmt_tags(record, rec_id, opt_mode, pretend=pretend) if record is None: msg = " Stage 1 failed: Error while inserting FMT tags" write_message(msg, verbose=1, stream=sys.stderr) return (1, int(rec_id), msg) elif record == 0: # Mode format finished stat['nb_records_updated'] += 1 return (0, int(rec_id), "") write_message(" -Stage COMPLETED", verbose=2) else: write_message(" -Stage NOT NEEDED", verbose=2) # Have a look if we have FFT tags write_message("Stage 2: Start (Process FFT tags if exist).", verbose=2) record_had_FFT = False if opt_stage_to_start_from <= 2 and \ extract_tag_from_record(record, 'FFT') is not None: record_had_FFT = True if not writing_rights_p(): write_message(" Stage 2 failed: Error no rights to write fulltext files", verbose=1, stream=sys.stderr) task_update_status("ERROR") sys.exit(1) try: record = elaborate_fft_tags(record, rec_id, opt_mode, pretend=pretend) except Exception, e: register_exception() msg = " Stage 2 failed: Error while elaborating FFT tags: %s" % e write_message(msg, verbose=1, stream=sys.stderr) return (1, int(rec_id), msg) if record is None: msg = " Stage 2 failed: Error while elaborating FFT tags" write_message(msg, verbose=1, stream=sys.stderr) return (1, int(rec_id), msg) write_message(" -Stage COMPLETED", verbose=2) else: write_message(" -Stage NOT NEEDED", verbose=2) # Have a look if we have FFT tags write_message("Stage 2B: Start (Synchronize 8564 tags).", verbose=2) has_bibdocs = run_sql("SELECT count(id_bibdoc) FROM bibrec_bibdoc JOIN bibdoc ON id_bibdoc=id WHERE id_bibrec=%s AND status<>'DELETED'", (rec_id, ))[0][0] > 0 if opt_stage_to_start_from <= 2 and (has_bibdocs or record_had_FFT or extract_tag_from_record(record, '856') is not None): try: record = synchronize_8564(rec_id, record, record_had_FFT, pretend=pretend) except Exception, e: register_exception(alert_admin=True) msg = " Stage 2B failed: Error while synchronizing 8564 tags: %s" % e write_message(msg, verbose=1, stream=sys.stderr) return (1, int(rec_id), msg) if record is None: msg = " Stage 2B failed: Error while synchronizing 8564 tags" write_message(msg, verbose=1, stream=sys.stderr) return (1, int(rec_id), msg) write_message(" -Stage COMPLETED", verbose=2) else: write_message(" -Stage NOT NEEDED", verbose=2) # Update of the BibFmt write_message("Stage 3: Start (Update bibfmt).", verbose=2) if opt_stage_to_start_from <= 3: # format the single record as xml rec_xml_new = record_xml_output(record) # Update bibfmt with the format xm of this record if opt_mode != 'format': modification_date = time.strftime('%Y-%m-%d %H:%M:%S', time.strptime(record_get_field_value(record,'005'),'%Y%m%d%H%M%S.0')) error = update_bibfmt_format(rec_id, rec_xml_new, 'xm', modification_date, pretend=pretend) if error == 1: msg = " Failed: error during update_bibfmt_format 'xm'" write_message(msg, verbose=1, stream=sys.stderr) return (1, int(rec_id), msg) if CFG_BIBUPLOAD_SERIALIZE_RECORD_STRUCTURE: error = update_bibfmt_format(rec_id, marshal.dumps(record), 'recstruct', modification_date, pretend=pretend) if error == 1: msg = " Failed: error during update_bibfmt_format 'recstruct'" write_message(msg, verbose=1, stream=sys.stderr) return (1, int(rec_id), msg) if not we_have_fmt_tags_p: # delete some formats like HB upon record change: for format_to_delete in CFG_BIBUPLOAD_DELETE_FORMATS: try: delete_bibfmt_format(rec_id, format_to_delete, pretend=pretend) except: # OK, some formats like HB could not have been deleted, no big deal pass # archive MARCXML format of this record for version history purposes: error = archive_marcxml_for_history(rec_id, pretend=pretend) if error == 1: msg = " Failed to archive MARCXML for history" write_message(msg, verbose=1, stream=sys.stderr) return (1, int(rec_id), msg) else: write_message(" -Archived MARCXML for history : DONE", verbose=2) write_message(" -Stage COMPLETED", verbose=2) # Update the database MetaData write_message("Stage 4: Start (Update the database with the metadata).", verbose=2) if opt_stage_to_start_from <= 4: if opt_mode in ('insert', 'replace', 'replace_or_insert', 'append', 'correct', 'reference', 'delete'): update_database_with_metadata(record, rec_id, oai_rec_id, pretend=pretend) record_deleted_p = False else: write_message(" -Stage NOT NEEDED in mode %s" % opt_mode, verbose=2) write_message(" -Stage COMPLETED", verbose=2) else: write_message(" -Stage NOT NEEDED", verbose=2) # Finally we update the bibrec table with the current date write_message("Stage 5: Start (Update bibrec table with current date).", verbose=2) if opt_stage_to_start_from <= 5 and \ opt_notimechange == 0 and \ not insert_mode_p: write_message(" -Retrieved current localtime: DONE", verbose=2) update_bibrec_modif_date(now.strftime("%Y-%m-%d %H:%M:%S"), rec_id, pretend=pretend) write_message(" -Stage COMPLETED", verbose=2) else: write_message(" -Stage NOT NEEDED", verbose=2) # Increase statistics if insert_mode_p: stat['nb_records_inserted'] += 1 else: stat['nb_records_updated'] += 1 # Upload of this record finish write_message("Record "+str(rec_id)+" DONE", verbose=1) return (0, int(rec_id), "") finally: if record_deleted_p: ## BibUpload has failed living the record deleted. We should ## back the original record then. update_database_with_metadata(original_record, rec_id, oai_rec_id, pretend=pretend) write_message(" Restored original record", verbose=1, stream=sys.stderr) def record_is_valid(record): """ Check if the record is valid. Currently this simply checks if the record has exactly one rec_id. @param record: the record @type record: recstruct @return: True if the record is valid @rtype: bool """ rec_ids = record_get_field_values(record, tag="001") if len(rec_ids) != 1: write_message(" The record is not valid: it has not a single rec_id: %s" % (rec_ids), stream=sys.stderr) return False return True def find_record_ids_by_oai_id(oaiId): """ A method finding the records identifier provided the oai identifier returns a list of identifiers matching a given oai identifier """ # Is this record already in invenio (matching by oaiid) if oaiId: recids = search_pattern(p=oaiId, f=CFG_BIBUPLOAD_EXTERNAL_OAIID_TAG, m='e') # Is this record already in invenio (matching by reportnumber i.e. # particularly 037. Idea: to avoid doubbles insertions) repnumber = oaiId.split(":")[-1] if repnumber: recids |= search_pattern(p = repnumber, f = "reportnumber", m = 'e' ) # Is this record already in invenio (matching by reportnumber i.e. # particularly 037. Idea: to avoid double insertions) repnumber = "arXiv:" + oaiId.split(":")[-1] recids |= search_pattern(p = repnumber, f = "reportnumber", m = 'e' ) return recids else: return intbitset() def insert_record_into_holding_pen(record, oai_id, pretend=False): query = "INSERT INTO bibHOLDINGPEN (oai_id, changeset_date, changeset_xml, id_bibrec) VALUES (%s, NOW(), %s, %s)" xml_record = record_xml_output(record) bibrec_ids = find_record_ids_by_oai_id(oai_id) # here determining the identifier of the record if len(bibrec_ids) > 0: bibrec_id = bibrec_ids.pop() else: # id not found by using the oai_id, let's use a wider search based # on any information we might have. bibrec_id = retrieve_rec_id(record, 'holdingpen', pretend=pretend) if bibrec_id is None: bibrec_id = 0 if not pretend: run_sql(query, (oai_id, xml_record, bibrec_id)) # record_id is logged as 0! ( We are not inserting into the main database) log_record_uploading(oai_id, task_get_task_param('task_id', 0), 0, 'H', pretend=pretend) stat['nb_holdingpen'] += 1 def print_out_bibupload_statistics(): """Print the statistics of the process""" out = "Task stats: %(nb_input)d input records, %(nb_updated)d updated, " \ "%(nb_inserted)d inserted, %(nb_errors)d errors, %(nb_holdingpen)d inserted to holding pen. " \ "Time %(nb_sec).2f sec." % { \ 'nb_input': stat['nb_records_to_upload'], 'nb_updated': stat['nb_records_updated'], 'nb_inserted': stat['nb_records_inserted'], 'nb_errors': stat['nb_errors'], 'nb_holdingpen': stat['nb_holdingpen'], 'nb_sec': time.time() - time.mktime(stat['exectime']) } write_message(out) def open_marc_file(path): """Open a file and return the data""" try: # open the file containing the marc document marc_file = open(path,'r') marc = marc_file.read() marc_file.close() except IOError, erro: write_message("Error: %s" % erro, verbose=1, stream=sys.stderr) write_message("Exiting.", sys.stderr) if erro.errno == 2: # No such file or directory # Not scary task_update_status("CERROR") else: task_update_status("ERROR") sys.exit(1) return marc def xml_marc_to_records(xml_marc): """create the records""" # Creation of the records from the xml Marc in argument recs = create_records(xml_marc, 1, 1) if recs == []: write_message("Error: Cannot parse MARCXML file.", verbose=1, stream=sys.stderr) write_message("Exiting.", sys.stderr) task_update_status("ERROR") sys.exit(1) elif recs[0][0] is None: write_message("Error: MARCXML file has wrong format: %s" % recs, verbose=1, stream=sys.stderr) write_message("Exiting.", sys.stderr) task_update_status("CERROR") sys.exit(1) else: recs = map((lambda x:x[0]), recs) return recs def find_record_format(rec_id, format): """Look whether record REC_ID is formatted in FORMAT, i.e. whether FORMAT exists in the bibfmt table for this record. Return the number of times it is formatted: 0 if not, 1 if yes, 2 if found more than once (should never occur). """ out = 0 query = """SELECT COUNT(*) FROM bibfmt WHERE id_bibrec=%s AND format=%s""" params = (rec_id, format) res = [] try: res = run_sql(query, params) out = res[0][0] except Error, error: write_message(" Error during find_record_format() : %s " % error, verbose=1, stream=sys.stderr) return out def find_record_from_recid(rec_id): """ Try to find record in the database from the REC_ID number. Return record ID if found, None otherwise. """ try: res = run_sql("SELECT id FROM bibrec WHERE id=%s", (rec_id,)) except Error, error: write_message(" Error during find_record_bibrec() : %s " % error, verbose=1, stream=sys.stderr) if res: return res[0][0] else: return None def find_record_from_sysno(sysno): """ Try to find record in the database from the external SYSNO number. Return record ID if found, None otherwise. """ bibxxx = 'bib'+CFG_BIBUPLOAD_EXTERNAL_SYSNO_TAG[0:2]+'x' bibrec_bibxxx = 'bibrec_' + bibxxx try: res = run_sql("""SELECT bb.id_bibrec FROM %(bibrec_bibxxx)s AS bb, %(bibxxx)s AS b WHERE b.tag=%%s AND b.value=%%s AND bb.id_bibxxx=b.id""" % \ {'bibxxx': bibxxx, 'bibrec_bibxxx': bibrec_bibxxx}, (CFG_BIBUPLOAD_EXTERNAL_SYSNO_TAG, sysno,)) except Error, error: write_message(" Error during find_record_from_sysno(): %s " % error, verbose=1, stream=sys.stderr) if res: return res[0][0] else: return None def find_records_from_extoaiid(extoaiid, extoaisrc=None): """ Try to find records in the database from the external EXTOAIID number. Return list of record ID if found, None otherwise. """ assert(CFG_BIBUPLOAD_EXTERNAL_OAIID_TAG[:5] == CFG_BIBUPLOAD_EXTERNAL_OAIID_PROVENANCE_TAG[:5]) bibxxx = 'bib'+CFG_BIBUPLOAD_EXTERNAL_OAIID_TAG[0:2]+'x' bibrec_bibxxx = 'bibrec_' + bibxxx try: write_message(' Looking for extoaiid="%s" with extoaisrc="%s"' % (extoaiid, extoaisrc), verbose=9) id_bibrecs = intbitset(run_sql("""SELECT bb.id_bibrec FROM %(bibrec_bibxxx)s AS bb, %(bibxxx)s AS b WHERE b.tag=%%s AND b.value=%%s AND bb.id_bibxxx=b.id""" % \ {'bibxxx': bibxxx, 'bibrec_bibxxx': bibrec_bibxxx}, (CFG_BIBUPLOAD_EXTERNAL_OAIID_TAG, extoaiid,))) write_message(' Partially found %s for extoaiid="%s"' % (id_bibrecs, extoaiid), verbose=9) ret = intbitset() for id_bibrec in id_bibrecs: record = get_record(id_bibrec) instances = record_get_field_instances(record, CFG_BIBUPLOAD_EXTERNAL_OAIID_TAG[0:3], CFG_BIBUPLOAD_EXTERNAL_OAIID_TAG[3], CFG_BIBUPLOAD_EXTERNAL_OAIID_TAG[4]) write_message(' recid %s -> instances "%s"' % (id_bibrec, instances), verbose=9) for instance in instances: this_extoaisrc = field_get_subfield_values(instance, CFG_BIBUPLOAD_EXTERNAL_OAIID_PROVENANCE_TAG[5]) this_extoaisrc = this_extoaisrc and this_extoaisrc[0] or None this_extoaiid = field_get_subfield_values(instance, CFG_BIBUPLOAD_EXTERNAL_OAIID_TAG[5]) this_extoaiid = this_extoaiid and this_extoaiid[0] or None write_message(" this_extoaisrc -> %s, this_extoaiid -> %s" % (this_extoaisrc, this_extoaiid), verbose=9) if this_extoaiid == extoaiid: write_message(' recid %s -> provenance "%s"' % (id_bibrec, this_extoaisrc), verbose=9) if this_extoaisrc == extoaisrc: write_message('Found recid %s for extoaiid="%s" with provenance="%s"' % (id_bibrec, extoaiid, extoaisrc), verbose=9) ret.add(id_bibrec) break if this_extoaisrc is None: write_message('WARNING: Found recid %s for extoaiid="%s" that doesn\'t specify any provenance, while input record does.' % (id_bibrec, extoaiid), stream=sys.stderr) if extoaisrc is None: write_message('WARNING: Found recid %s for extoaiid="%s" that specify a provenance (%s), while input record does not have a provenance.' % (id_bibrec, extoaiid, this_extoaisrc), stream=sys.stderr) return ret except Error, error: write_message(" Error during find_records_from_extoaiid(): %s " % error, verbose=1, stream=sys.stderr) raise def find_record_from_oaiid(oaiid): """ Try to find record in the database from the OAI ID number and OAI SRC. Return record ID if found, None otherwise. """ bibxxx = 'bib'+CFG_OAI_ID_FIELD[0:2]+'x' bibrec_bibxxx = 'bibrec_' + bibxxx try: res = run_sql("""SELECT bb.id_bibrec FROM %(bibrec_bibxxx)s AS bb, %(bibxxx)s AS b WHERE b.tag=%%s AND b.value=%%s AND bb.id_bibxxx=b.id""" % \ {'bibxxx': bibxxx, 'bibrec_bibxxx': bibrec_bibxxx}, (CFG_OAI_ID_FIELD, oaiid,)) except Error, error: write_message(" Error during find_record_from_oaiid(): %s " % error, verbose=1, stream=sys.stderr) if res: return res[0][0] else: return None def extract_tag_from_record(record, tag_number): """ Extract the tag_number for record.""" # first step verify if the record is not already in the database if record: return record.get(tag_number, None) return None def retrieve_rec_id(record, opt_mode, pretend=False): """Retrieve the record Id from a record by using tag 001 or SYSNO or OAI ID tag. opt_mod is the desired mode.""" rec_id = None # 1st step: we look for the tag 001 tag_001 = extract_tag_from_record(record, '001') if tag_001 is not None: # We extract the record ID from the tag rec_id = tag_001[0][3] # if we are in insert mode => error if opt_mode == 'insert': write_message(" Failed: tag 001 found in the xml" \ " submitted, you should use the option replace," \ " correct or append to replace an existing" \ " record. (-h for help)", verbose=1, stream=sys.stderr) return -1 else: # we found the rec id and we are not in insert mode => continue # we try to match rec_id against the database: if find_record_from_recid(rec_id) is not None: # okay, 001 corresponds to some known record return int(rec_id) elif opt_mode in ('replace', 'replace_or_insert'): if task_get_option('force'): # we found the rec_id but it's not in the system and we are # requested to replace records. Therefore we create on the fly # a empty record allocating the recid. write_message(" Warning: tag 001 found in the xml with" " value %(rec_id)s, but rec_id %(rec_id)s does" " not exist. Since the mode replace was" " requested the rec_id %(rec_id)s is allocated" " on-the-fly." % {"rec_id" : rec_id}, stream=sys.stderr) return create_new_record(rec_id=rec_id, pretend=pretend) else: # Since --force was not used we are going to raise an error write_message(" Failed: tag 001 found in the xml" " submitted with value %(rec_id)s. The" " corresponding record however does not" " exists. If you want to really create" " such record, please use the --force" " parameter when calling bibupload." % { "rec_id": rec_id}, stream=sys.stderr) return -1 else: # The record doesn't exist yet. We shall have try to check # the SYSNO or OAI id later. write_message(" -Tag 001 value not found in database.", verbose=9) rec_id = None else: write_message(" -Tag 001 not found in the xml marc file.", verbose=9) if rec_id is None: # 2nd step we look for the SYSNO sysnos = record_get_field_values(record, CFG_BIBUPLOAD_EXTERNAL_SYSNO_TAG[0:3], CFG_BIBUPLOAD_EXTERNAL_SYSNO_TAG[3:4] != "_" and \ CFG_BIBUPLOAD_EXTERNAL_SYSNO_TAG[3:4] or "", CFG_BIBUPLOAD_EXTERNAL_SYSNO_TAG[4:5] != "_" and \ CFG_BIBUPLOAD_EXTERNAL_SYSNO_TAG[4:5] or "", CFG_BIBUPLOAD_EXTERNAL_SYSNO_TAG[5:6]) if sysnos: sysno = sysnos[0] # there should be only one external SYSNO write_message(" -Checking if SYSNO " + sysno + \ " exists in the database", verbose=9) # try to find the corresponding rec id from the database rec_id = find_record_from_sysno(sysno) if rec_id is not None: # rec_id found pass else: # The record doesn't exist yet. We will try to check # external and internal OAI ids later. write_message(" -Tag SYSNO value not found in database.", verbose=9) rec_id = None else: write_message(" -Tag SYSNO not found in the xml marc file.", verbose=9) if rec_id is None: # 2nd step we look for the external OAIID extoai_fields = record_get_field_instances(record, CFG_BIBUPLOAD_EXTERNAL_OAIID_TAG[0:3], CFG_BIBUPLOAD_EXTERNAL_OAIID_TAG[3:4] != "_" and \ CFG_BIBUPLOAD_EXTERNAL_OAIID_TAG[3:4] or "", CFG_BIBUPLOAD_EXTERNAL_OAIID_TAG[4:5] != "_" and \ CFG_BIBUPLOAD_EXTERNAL_OAIID_TAG[4:5] or "") if extoai_fields: for field in extoai_fields: extoaiid = field_get_subfield_values(field, CFG_BIBUPLOAD_EXTERNAL_OAIID_TAG[5:6]) extoaisrc = field_get_subfield_values(field, CFG_BIBUPLOAD_EXTERNAL_OAIID_PROVENANCE_TAG[5:6]) if extoaiid: extoaiid = extoaiid[0] if extoaisrc: extoaisrc = extoaisrc[0] else: extoaisrc = None write_message(" -Checking if EXTOAIID %s (%s) exists in the database" % (extoaiid, extoaisrc), verbose=9) # try to find the corresponding rec id from the database try: rec_ids = find_records_from_extoaiid(extoaiid, extoaisrc) except Error, e: write_message(e, verbose=1, stream=sys.stderr) return -1 if rec_ids: # rec_id found rec_id = rec_ids.pop() break else: # The record doesn't exist yet. We will try to check # OAI id later. write_message(" -Tag EXTOAIID value not found in database.", verbose=9) rec_id = None else: write_message(" -Tag EXTOAIID not found in the xml marc file.", verbose=9) if rec_id is None: # 4th step we look for the OAI ID oaiidvalues = record_get_field_values(record, CFG_OAI_ID_FIELD[0:3], CFG_OAI_ID_FIELD[3:4] != "_" and \ CFG_OAI_ID_FIELD[3:4] or "", CFG_OAI_ID_FIELD[4:5] != "_" and \ CFG_OAI_ID_FIELD[4:5] or "", CFG_OAI_ID_FIELD[5:6]) if oaiidvalues: oaiid = oaiidvalues[0] # there should be only one OAI ID write_message(" -Check if local OAI ID " + oaiid + \ " exist in the database", verbose=9) # try to find the corresponding rec id from the database rec_id = find_record_from_oaiid(oaiid) if rec_id is not None: # rec_id found pass else: write_message(" -Tag OAI ID value not found in database.", verbose=9) rec_id = None else: write_message(" -Tag SYSNO not found in the xml marc file.", verbose=9) # Now we should have detected rec_id from SYSNO or OAIID # tags. (None otherwise.) if rec_id: if opt_mode == 'insert': write_message(" Failed : Record found in the database," \ " you should use the option replace," \ " correct or append to replace an existing" \ " record. (-h for help)", verbose=1, stream=sys.stderr) return -1 else: if opt_mode != 'insert' and \ opt_mode != 'replace_or_insert': write_message(" Failed : Record not found in the database."\ " Please insert the file before updating it."\ " (-h for help)", verbose=1, stream=sys.stderr) return -1 return rec_id and int(rec_id) or None ### Insert functions def create_new_record(rec_id=None, pretend=False): """ Create new record in the database @param rec_id: if specified the new record will have this rec_id. @type rec_id: int @return: the allocated rec_id @rtype: int @note: in case of errors will be returned None """ if rec_id is not None: try: rec_id = int(rec_id) except (ValueError, TypeError), error: write_message(" Error during the creation_new_record function : %s " % error, verbose=1, stream=sys.stderr) return None if run_sql("SELECT id FROM bibrec WHERE id=%s", (rec_id, )): write_message(" Error during the creation_new_record function : the requested rec_id %s already exists." % rec_id) return None if pretend: if rec_id: return rec_id else: return run_sql("SELECT max(id)+1 FROM bibrec")[0][0] try: if rec_id is not None: return run_sql("INSERT INTO bibrec (id, creation_date, modification_date) VALUES (%s, NOW(), NOW())", (rec_id, )) else: return run_sql("INSERT INTO bibrec (creation_date, modification_date) VALUES (NOW(), NOW())") except Error, error: write_message(" Error during the creation_new_record function : %s " % error, verbose=1, stream=sys.stderr) return None def insert_bibfmt(id_bibrec, marc, format, modification_date='1970-01-01 00:00:00', pretend=False): """Insert the format in the table bibfmt""" # compress the marc value pickled_marc = compress(marc) try: time.strptime(modification_date, "%Y-%m-%d %H:%M:%S") except ValueError: modification_date = '1970-01-01 00:00:00' query = """INSERT LOW_PRIORITY INTO bibfmt (id_bibrec, format, last_updated, value) VALUES (%s, %s, %s, %s)""" try: if not pretend: row_id = run_sql(query, (id_bibrec, format, modification_date, pickled_marc)) return row_id else: return 1 except Error, error: write_message(" Error during the insert_bibfmt function : %s " % error, verbose=1, stream=sys.stderr) return None def insert_record_bibxxx(tag, value, pretend=False): """Insert the record into bibxxx""" # determine into which table one should insert the record table_name = 'bib'+tag[0:2]+'x' # check if the tag, value combination exists in the table query = """SELECT id,value FROM %s """ % table_name query += """ WHERE tag=%s AND value=%s""" params = (tag, value) try: res = run_sql(query, params) except Error, error: write_message(" Error during the insert_record_bibxxx function : %s " % error, verbose=1, stream=sys.stderr) # Note: compare now the found values one by one and look for # string binary equality (e.g. to respect lowercase/uppercase # match), regardless of the charset etc settings. Ideally we # could use a BINARY operator in the above SELECT statement, but # we would have to check compatibility on various MySQLdb versions # etc; this approach checks all matched values in Python, not in # MySQL, which is less cool, but more conservative, so it should # work better on most setups. for row in res: row_id = row[0] row_value = row[1] if row_value == value: return (table_name, row_id) # We got here only when the tag,value combination was not found, # so it is now necessary to insert the tag,value combination into # bibxxx table as new. query = """INSERT INTO %s """ % table_name query += """ (tag, value) values (%s , %s)""" params = (tag, value) try: if not pretend: row_id = run_sql(query, params) else: return (table_name, 1) except Error, error: write_message(" Error during the insert_record_bibxxx function : %s " % error, verbose=1, stream=sys.stderr) return (table_name, row_id) def insert_record_bibrec_bibxxx(table_name, id_bibxxx, field_number, id_bibrec, pretend=False): """Insert the record into bibrec_bibxxx""" # determine into which table one should insert the record full_table_name = 'bibrec_'+ table_name # insert the proper row into the table query = """INSERT INTO %s """ % full_table_name query += """(id_bibrec,id_bibxxx, field_number) values (%s , %s, %s)""" params = (id_bibrec, id_bibxxx, field_number) try: if not pretend: res = run_sql(query, params) else: return 1 except Error, error: write_message(" Error during the insert_record_bibrec_bibxxx" " function 2nd query : %s " % error, verbose=1, stream=sys.stderr) return res def synchronize_8564(rec_id, record, record_had_FFT, pretend=False): """ Synchronize 8564_ tags and BibDocFile tables. This function directly manipulate the record parameter. @type rec_id: positive integer @param rec_id: the record identifier. @param record: the record structure as created by bibrecord.create_record @type record_had_FFT: boolean @param record_had_FFT: True if the incoming bibuploaded-record used FFT @return: the manipulated record (which is also modified as a side effect) """ def merge_marc_into_bibdocfile(field, pretend=False): """ Internal function that reads a single field and store its content in BibDocFile tables. @param field: the 8564_ field containing a BibDocFile URL. """ write_message('Merging field: %s' % (field, ), verbose=9) url = field_get_subfield_values(field, 'u')[:1] or field_get_subfield_values(field, 'q')[:1] description = field_get_subfield_values(field, 'y')[:1] comment = field_get_subfield_values(field, 'z')[:1] if url: recid, docname, format = decompose_bibdocfile_url(url[0]) if recid != rec_id: write_message("INFO: URL %s is not pointing to a fulltext owned by this record (%s)" % (url, recid), stream=sys.stderr) else: try: bibdoc = BibRecDocs(recid).get_bibdoc(docname) if description and not pretend: bibdoc.set_description(description[0], format) if comment and not pretend: bibdoc.set_comment(comment[0], format) - except InvenioWebSubmitFileError: + except InvenioBibDocFileError: ## Apparently the referenced docname doesn't exist anymore. ## Too bad. Let's skip it. write_message("WARNING: docname %s does not seem to exist for record %s. Has it been renamed outside FFT?" % (docname, recid), stream=sys.stderr) def merge_bibdocfile_into_marc(field, subfields): """ Internal function that reads BibDocFile table entries referenced by the URL in the given 8564_ field and integrate the given information directly with the provided subfields. @param field: the 8564_ field containing a BibDocFile URL. @param subfields: the subfields corresponding to the BibDocFile URL generated after BibDocFile tables. """ write_message('Merging subfields %s into field %s' % (subfields, field), verbose=9) subfields = dict(subfields) ## We make a copy not to have side-effects subfield_to_delete = [] for subfield_position, (code, value) in enumerate(field_get_subfield_instances(field)): ## For each subfield instance already existing... if code in subfields: ## ...We substitute it with what is in BibDocFile tables record_modify_subfield(record, '856', code, subfields[code], subfield_position, field_position_global=field[4]) del subfields[code] else: ## ...We delete it otherwise subfield_to_delete.append(subfield_position) subfield_to_delete.sort() for counter, position in enumerate(subfield_to_delete): ## FIXME: Very hackish algorithm. Since deleting a subfield ## will alterate the position of following subfields, we ## are taking note of this and adjusting further position ## by using a counter. record_delete_subfield_from(record, '856', position - counter, field_position_global=field[4]) subfields = subfields.items() subfields.sort() for code, value in subfields: ## Let's add non-previously existing subfields record_add_subfield_into(record, '856', code, value, field_position_global=field[4]) def get_bibdocfile_managed_info(): """ Internal function to eturns a dictionary of BibDocFile URL -> wanna-be subfields. @rtype: mapping @return: BibDocFile URL -> wanna-be subfields dictionary """ ret = {} bibrecdocs = BibRecDocs(rec_id) latest_files = bibrecdocs.list_latest_files(list_hidden=False) for afile in latest_files: url = afile.get_url() ret[url] = {'u' : url} description = afile.get_description() comment = afile.get_comment() subformat = afile.get_subformat() if description: ret[url]['y'] = description if comment: ret[url]['z'] = comment if subformat: ret[url]['x'] = subformat return ret write_message("Synchronizing MARC of recid '%s' with:\n%s" % (rec_id, record), verbose=9) tags856s = record_get_field_instances(record, '856', '%', '%') write_message("Original 856%% instances: %s" % tags856s, verbose=9) tags8564s_to_add = get_bibdocfile_managed_info() write_message("BibDocFile instances: %s" % tags8564s_to_add, verbose=9) positions_tags8564s_to_remove = [] for local_position, field in enumerate(tags856s): if field[1] == '4' and field[2] == ' ': write_message('Analysing %s' % (field, ), verbose=9) for url in field_get_subfield_values(field, 'u') + field_get_subfield_values(field, 'q'): if url in tags8564s_to_add: if record_had_FFT: merge_bibdocfile_into_marc(field, tags8564s_to_add[url]) else: merge_marc_into_bibdocfile(field, pretend=pretend) del tags8564s_to_add[url] break elif bibdocfile_url_p(url) and decompose_bibdocfile_url(url)[0] == rec_id: positions_tags8564s_to_remove.append(local_position) write_message("%s to be deleted and re-synchronized" % (field, ), verbose=9) break record_delete_fields(record, '856', positions_tags8564s_to_remove) tags8564s_to_add = tags8564s_to_add.values() tags8564s_to_add.sort() for subfields in tags8564s_to_add: subfields = subfields.items() subfields.sort() record_add_field(record, '856', '4', ' ', subfields=subfields) write_message('Final record: %s' % record, verbose=9) return record def elaborate_fft_tags(record, rec_id, mode, pretend=False): """ Process FFT tags that should contain $a with file pathes or URLs to get the fulltext from. This function enriches record with proper 8564 URL tags, downloads fulltext files and stores them into var/data structure where appropriate. CFG_BIBUPLOAD_WGET_SLEEP_TIME defines time to sleep in seconds in between URL downloads. Note: if an FFT tag contains multiple $a subfields, we upload them into different 856 URL tags in the metadata. See regression test case test_multiple_fft_insert_via_http(). """ # Let's define some handy sub procedure. def _add_new_format(bibdoc, url, format, docname, doctype, newname, description, comment, flags, modification_date, pretend=False): """Adds a new format for a given bibdoc. Returns True when everything's fine.""" write_message('Add new format to %s url: %s, format: %s, docname: %s, doctype: %s, newname: %s, description: %s, comment: %s, flags: %s, modification_date: %s' % (repr(bibdoc), url, format, docname, doctype, newname, description, comment, flags, modification_date), verbose=9) try: if not url: # Not requesting a new url. Just updating comment & description return _update_description_and_comment(bibdoc, docname, format, description, comment, flags, pretend=pretend) try: if not pretend: bibdoc.add_file_new_format(url, description=description, comment=comment, flags=flags, modification_date=modification_date) except StandardError, e: write_message("('%s', '%s', '%s', '%s', '%s', '%s', '%s', '%s', '%s') not inserted because format already exists (%s)." % (url, format, docname, doctype, newname, description, comment, flags, modification_date, e), stream=sys.stderr) raise except Exception, e: write_message("Error in adding '%s' as a new format because of: %s" % (url, e), stream=sys.stderr) raise return True def _add_new_version(bibdoc, url, format, docname, doctype, newname, description, comment, flags, modification_date, pretend=False): """Adds a new version for a given bibdoc. Returns True when everything's fine.""" write_message('Add new version to %s url: %s, format: %s, docname: %s, doctype: %s, newname: %s, description: %s, comment: %s, flags: %s' % (repr(bibdoc), url, format, docname, doctype, newname, description, comment, flags)) try: if not url: return _update_description_and_comment(bibdoc, docname, format, description, comment, flags, pretend=pretend) try: if not pretend: bibdoc.add_file_new_version(url, description=description, comment=comment, flags=flags, modification_date=modification_date) except StandardError, e: write_message("('%s', '%s', '%s', '%s', '%s', '%s', '%s', '%s', '%s') not inserted because '%s'." % (url, format, docname, doctype, newname, description, comment, flags, modification_date, e), stream=sys.stderr) raise except Exception, e: write_message("Error in adding '%s' as a new version because of: %s" % (url, e), stream=sys.stderr) raise return True def _update_description_and_comment(bibdoc, docname, format, description, comment, flags, pretend=False): """Directly update comments and descriptions.""" write_message('Just updating description and comment for %s with format %s with description %s, comment %s and flags %s' % (docname, format, description, comment, flags), verbose=9) try: if not pretend: bibdoc.set_description(description, format) bibdoc.set_comment(comment, format) for flag in CFG_BIBDOCFILE_AVAILABLE_FLAGS: if flag in flags: bibdoc.set_flag(flag, format) else: bibdoc.unset_flag(flag, format) except StandardError, e: write_message("('%s', '%s', '%s', '%s', '%s') description and comment not updated because '%s'." % (docname, format, description, comment, flags, e)) raise return True if mode == 'delete': raise StandardError('FFT tag specified but bibupload executed in --delete mode') tuple_list = extract_tag_from_record(record, 'FFT') if tuple_list: # FFT Tags analysis write_message("FFTs: "+str(tuple_list), verbose=9) docs = {} # docnames and their data for fft in record_get_field_instances(record, 'FFT', ' ', ' '): # Let's discover the type of the document # This is a legacy field and will not be enforced any particular # check on it. doctype = field_get_subfield_values(fft, 't') if doctype: doctype = doctype[0] else: # Default is Main doctype = 'Main' # Let's discover the url. url = field_get_subfield_values(fft, 'a') if url: url = url[0] try: check_valid_url(url) except StandardError, e: raise StandardError, "fft '%s' specifies in $a a location ('%s') with problems: %s" % (fft, url, e) else: url = '' # Let's discover the description description = field_get_subfield_values(fft, 'd') if description != []: description = description[0] else: if mode == 'correct' and doctype != 'FIX-MARC': ## If the user require to correct, and do not specify ## a description this means she really want to ## modify the description. description = '' else: description = KEEP_OLD_VALUE # Let's discover the desired docname to be created/altered name = field_get_subfield_values(fft, 'n') if name: ## Let's remove undesired extensions name = file_strip_ext(name[0] + '.pdf') else: if url: name = get_docname_from_url(url) elif mode != 'correct' and doctype != 'FIX-MARC': raise StandardError, "Warning: fft '%s' doesn't specifies either a location in $a or a docname in $n" % str(fft) else: continue # Let's discover the desired new docname in case we want to change it newname = field_get_subfield_values(fft, 'm') if newname: newname = file_strip_ext(newname[0] + '.pdf') else: newname = name # Let's discover the desired format format = field_get_subfield_values(fft, 'f') if format: format = normalize_format(format[0]) else: if url: format = guess_format_from_url(url) else: format = "" # Let's discover the icon icon = field_get_subfield_values(fft, 'x') if icon != []: icon = icon[0] if icon != KEEP_OLD_VALUE: try: check_valid_url(icon) except StandardError, e: raise StandardError, "fft '%s' specifies in $x an icon ('%s') with problems: %s" % (fft, icon, e) else: icon = '' # Let's discover the comment comment = field_get_subfield_values(fft, 'z') if comment != []: comment = comment[0] else: if mode == 'correct' and doctype != 'FIX-MARC': ## See comment on description comment = '' else: comment = KEEP_OLD_VALUE # Let's discover the restriction restriction = field_get_subfield_values(fft, 'r') if restriction != []: restriction = restriction[0] else: if mode == 'correct' and doctype != 'FIX-MARC': ## See comment on description restriction = '' else: restriction = KEEP_OLD_VALUE version = field_get_subfield_values(fft, 'v') if version: version = version[0] else: version = '' # Let's discover the timestamp of the file (if any) timestamp = field_get_subfield_values(fft, 's') if timestamp: try: timestamp = datetime(*(time.strptime(timestamp[0], "%Y-%m-%d %H:%M:%S")[:6])) except ValueError: write_message('Warning: The timestamp is not in a good format, thus will be ignored. The format should be YYYY-MM-DD HH:MM:SS') timestamp = '' else: timestamp = '' flags = field_get_subfield_values(fft, 'o') for flag in flags: if flag not in CFG_BIBDOCFILE_AVAILABLE_FLAGS: raise StandardError, "fft '%s' specifies a non available flag: %s" % (fft, flag) if docs.has_key(name): # new format considered (doctype2, newname2, restriction2, version2, urls) = docs[name] if doctype2 != doctype: raise StandardError, "fft '%s' specifies a different doctype from previous fft with docname '%s'" % (str(fft), name) if newname2 != newname: raise StandardError, "fft '%s' specifies a different newname from previous fft with docname '%s'" % (str(fft), name) if restriction2 != restriction: raise StandardError, "fft '%s' specifies a different restriction from previous fft with docname '%s'" % (str(fft), name) if version2 != version: raise StandardError, "fft '%x' specifies a different version than the previous fft with docname '%s'" % (str(fft), name) for (url2, format2, description2, comment2, flags2, timestamp2) in urls: if format == format2: raise StandardError, "fft '%s' specifies a second file '%s' with the same format '%s' from previous fft with docname '%s'" % (str(fft), url, format, name) if url or format: urls.append((url, format, description, comment, flags, timestamp)) if icon: urls.append((icon, icon[len(file_strip_ext(icon)):] + ';icon', description, comment, flags, timestamp)) else: if url or format: docs[name] = (doctype, newname, restriction, version, [(url, format, description, comment, flags, timestamp)]) if icon: docs[name][4].append((icon, icon[len(file_strip_ext(icon)):] + ';icon', description, comment, flags, timestamp)) elif icon: docs[name] = (doctype, newname, restriction, version, [(icon, icon[len(file_strip_ext(icon)):] + ';icon', description, comment, flags, timestamp)]) else: docs[name] = (doctype, newname, restriction, version, []) write_message('Result of FFT analysis:\n\tDocs: %s' % (docs,), verbose=9) # Let's remove all FFT tags record_delete_field(record, 'FFT', ' ', ' ') # Preprocessed data elaboration bibrecdocs = BibRecDocs(rec_id) ## Let's pre-download all the URLs to see if, in case of mode 'correct' or 'append' ## we can avoid creating a new revision. for docname, (doctype, newname, restriction, version, urls) in docs.items(): downloaded_urls = [] try: bibdoc = bibrecdocs.get_bibdoc(docname) - except InvenioWebSubmitFileError: + except InvenioBibDocFileError: ## A bibdoc with the given docname does not exists. ## So there is no chance we are going to revise an existing ## format with an identical file :-) bibdoc = None new_revision_needed = False for url, format, description, comment, flags, timestamp in urls: if url: try: downloaded_url = download_url(url, format) write_message("%s saved into %s" % (url, downloaded_url), verbose=9) except Exception, err: write_message("Error in downloading '%s' because of: %s" % (url, err), stream=sys.stderr) raise if mode == 'correct' and bibdoc is not None and not new_revision_needed: downloaded_urls.append((downloaded_url, format, description, comment, flags, timestamp)) if not bibdoc.check_file_exists(downloaded_url, format): new_revision_needed = True else: write_message("WARNING: %s is already attached to bibdoc %s for recid %s" % (url, docname, rec_id), stream=sys.stderr) elif mode == 'append' and bibdoc is not None: if not bibdoc.check_file_exists(downloaded_url, format): downloaded_urls.append((downloaded_url, format, description, comment, flags, timestamp)) else: write_message("WARNING: %s is already attached to bibdoc %s for recid %s" % (url, docname, rec_id), stream=sys.stderr) else: downloaded_urls.append((downloaded_url, format, description, comment, flags, timestamp)) else: downloaded_urls.append(('', format, description, comment, flags, timestamp)) if mode == 'correct' and bibdoc is not None and not new_revision_needed: ## Since we don't need a new revision (because all the files ## that are being uploaded are different) ## we can simply remove the urls but keep the other information write_message("No need to add a new revision for docname %s for recid %s" % (docname, rec_id), verbose=2) docs[docname] = (doctype, newname, restriction, version, [('', format, description, comment, flags, timestamp) for (dummy, format, description, comment, flags, timestamp) in downloaded_urls]) for downloaded_url, dummy, dummy, dummy, dummy, dummy in downloaded_urls: ## Let's free up some space :-) if downloaded_url and os.path.exists(downloaded_url): os.remove(downloaded_url) else: if downloaded_urls or mode != 'append': docs[docname] = (doctype, newname, restriction, version, downloaded_urls) else: ## In case we are in append mode and there are no urls to append ## we discard the whole FFT del docs[docname] if mode == 'replace': # First we erase previous bibdocs if not pretend: for bibdoc in bibrecdocs.list_bibdocs(): bibdoc.delete() bibrecdocs.build_bibdoc_list() for docname, (doctype, newname, restriction, version, urls) in docs.iteritems(): write_message("Elaborating olddocname: '%s', newdocname: '%s', doctype: '%s', restriction: '%s', urls: '%s', mode: '%s'" % (docname, newname, doctype, restriction, urls, mode), verbose=9) if mode in ('insert', 'replace'): # new bibdocs, new docnames, new marc if newname in bibrecdocs.get_bibdoc_names(): write_message("('%s', '%s') not inserted because docname already exists." % (newname, urls), stream=sys.stderr) raise StandardError try: if not pretend: bibdoc = bibrecdocs.add_bibdoc(doctype, newname) bibdoc.set_status(restriction) else: bibdoc = None except Exception, e: write_message("('%s', '%s', '%s') not inserted because: '%s'." % (doctype, newname, urls, e), stream=sys.stderr) raise StandardError for (url, format, description, comment, flags, timestamp) in urls: assert(_add_new_format(bibdoc, url, format, docname, doctype, newname, description, comment, flags, timestamp, pretend=pretend)) elif mode == 'replace_or_insert': # to be thought as correct_or_insert for bibdoc in bibrecdocs.list_bibdocs(): if bibdoc.get_docname() == docname: if doctype not in ('PURGE', 'DELETE', 'EXPUNGE', 'REVERT', 'FIX-ALL', 'FIX-MARC', 'DELETE-FILE'): if newname != docname: try: if not pretend: bibdoc.change_name(newname) ## Let's refresh the list of bibdocs. bibrecdocs.build_bibdoc_list() except StandardError, e: write_message(e, stream=sys.stderr) raise found_bibdoc = False for bibdoc in bibrecdocs.list_bibdocs(): if bibdoc.get_docname() == newname: found_bibdoc = True if doctype == 'PURGE': if not pretend: bibdoc.purge() elif doctype == 'DELETE': if not pretend: bibdoc.delete() elif doctype == 'EXPUNGE': if not pretend: bibdoc.expunge() elif doctype == 'FIX-ALL': if not pretend: bibrecdocs.fix(docname) elif doctype == 'FIX-MARC': pass elif doctype == 'DELETE-FILE': if urls: for (url, format, description, comment, flags, timestamp) in urls: if not pretend: bibdoc.delete_file(format, version) elif doctype == 'REVERT': try: if not pretend: bibdoc.revert(version) except Exception, e: write_message('(%s, %s) not correctly reverted: %s' % (newname, version, e), stream=sys.stderr) raise else: if restriction != KEEP_OLD_VALUE: if not pretend: bibdoc.set_status(restriction) # Since the docname already existed we have to first # bump the version by pushing the first new file # then pushing the other files. if urls: (first_url, first_format, first_description, first_comment, first_flags, first_timestamp) = urls[0] other_urls = urls[1:] assert(_add_new_version(bibdoc, first_url, first_format, docname, doctype, newname, first_description, first_comment, first_flags, first_timestamp, pretend=pretend)) for (url, format, description, comment, flags, timestamp) in other_urls: assert(_add_new_format(bibdoc, url, format, docname, doctype, newname, description, comment, flags, timestamp, pretend=pretend)) ## Let's refresh the list of bibdocs. bibrecdocs.build_bibdoc_list() if not found_bibdoc: if not pretend: bibdoc = bibrecdocs.add_bibdoc(doctype, newname) bibdoc.set_status(restriction) for (url, format, description, comment, flags, timestamp) in urls: assert(_add_new_format(bibdoc, url, format, docname, doctype, newname, description, comment, flags, timestamp)) elif mode == 'correct': for bibdoc in bibrecdocs.list_bibdocs(): if bibdoc.get_docname() == docname: if doctype not in ('PURGE', 'DELETE', 'EXPUNGE', 'REVERT', 'FIX-ALL', 'FIX-MARC', 'DELETE-FILE'): if newname != docname: try: if not pretend: bibdoc.change_name(newname) ## Let's refresh the list of bibdocs. bibrecdocs.build_bibdoc_list() except StandardError, e: write_message('Error in renaming %s to %s: %s' % (docname, newname, e), stream=sys.stderr) raise found_bibdoc = False for bibdoc in bibrecdocs.list_bibdocs(): if bibdoc.get_docname() == newname: found_bibdoc = True if doctype == 'PURGE': if not pretend: bibdoc.purge() elif doctype == 'DELETE': if not pretend: bibdoc.delete() elif doctype == 'EXPUNGE': if not pretend: bibdoc.expunge() elif doctype == 'FIX-ALL': if not pretend: bibrecdocs.fix(newname) elif doctype == 'FIX-MARC': pass elif doctype == 'DELETE-FILE': if urls: for (url, format, description, comment, flags, timestamp) in urls: if not pretend: bibdoc.delete_file(format, version) elif doctype == 'REVERT': try: if not pretend: bibdoc.revert(version) except Exception, e: write_message('(%s, %s) not correctly reverted: %s' % (newname, version, e), stream=sys.stderr) raise else: if restriction != KEEP_OLD_VALUE: if not pretend: bibdoc.set_status(restriction) if urls: (first_url, first_format, first_description, first_comment, first_flags, first_timestamp) = urls[0] other_urls = urls[1:] assert(_add_new_version(bibdoc, first_url, first_format, docname, doctype, newname, first_description, first_comment, first_flags, first_timestamp, pretend=pretend)) for (url, format, description, comment, flags, timestamp) in other_urls: assert(_add_new_format(bibdoc, url, format, docname, doctype, newname, description, comment, flags, timestamp, pretend=pretend)) ## Let's refresh the list of bibdocs. bibrecdocs.build_bibdoc_list() if not found_bibdoc: if doctype in ('PURGE', 'DELETE', 'EXPUNGE', 'FIX-ALL', 'FIX-MARC', 'DELETE-FILE', 'REVERT'): write_message("('%s', '%s', '%s') not performed because '%s' docname didn't existed." % (doctype, newname, urls, docname), stream=sys.stderr) raise StandardError else: if not pretend: bibdoc = bibrecdocs.add_bibdoc(doctype, newname) bibdoc.set_status(restriction) for (url, format, description, comment, flags, timestamp) in urls: assert(_add_new_format(bibdoc, url, format, docname, doctype, newname, description, comment, flags, timestamp)) elif mode == 'append': try: found_bibdoc = False for bibdoc in bibrecdocs.list_bibdocs(): if bibdoc.get_docname() == docname: found_bibdoc = True for (url, format, description, comment, flags, timestamp) in urls: assert(_add_new_format(bibdoc, url, format, docname, doctype, newname, description, comment, flags, timestamp, pretend=pretend)) if not found_bibdoc: try: if not pretend: bibdoc = bibrecdocs.add_bibdoc(doctype, docname) bibdoc.set_status(restriction) for (url, format, description, comment, flags, timestamp) in urls: assert(_add_new_format(bibdoc, url, format, docname, doctype, newname, description, comment, flags, timestamp)) except Exception, e: register_exception() write_message("('%s', '%s', '%s') not appended because: '%s'." % (doctype, newname, urls, e), stream=sys.stderr) raise except: register_exception() raise return record def insert_fmt_tags(record, rec_id, opt_mode, pretend=False): """Process and insert FMT tags""" fmt_fields = record_get_field_instances(record, 'FMT') if fmt_fields: for fmt_field in fmt_fields: # Get the d, f, g subfields of the FMT tag try: d_value = field_get_subfield_values(fmt_field, "d")[0] except IndexError: d_value = "" try: f_value = field_get_subfield_values(fmt_field, "f")[0] except IndexError: f_value = "" try: g_value = field_get_subfield_values(fmt_field, "g")[0] except IndexError: g_value = "" # Update the format if not pretend: res = update_bibfmt_format(rec_id, g_value, f_value, d_value, pretend=pretend) if res == 1: write_message(" Failed: Error during update_bibfmt", verbose=1, stream=sys.stderr) # If we are in format mode, we only care about the FMT tag if opt_mode == 'format': return 0 # We delete the FMT Tag of the record record_delete_field(record, 'FMT') write_message(" -Delete field FMT from record : DONE", verbose=2) return record elif opt_mode == 'format': write_message(" Failed: Format updated failed : No tag FMT found", verbose=1, stream=sys.stderr) return None else: return record ### Update functions def update_bibrec_modif_date(now, bibrec_id, pretend=False): """Update the date of the record in bibrec table """ query = """UPDATE bibrec SET modification_date=%s WHERE id=%s""" params = (now, bibrec_id) try: if not pretend: run_sql(query, params) write_message(" -Update record modification date : DONE" , verbose=2) except Error, error: write_message(" Error during update_bibrec_modif_date function : %s" % error, verbose=1, stream=sys.stderr) def update_bibfmt_format(id_bibrec, format_value, format_name, modification_date=None, pretend=False): """Update the format in the table bibfmt""" if modification_date is None: modification_date = time.strftime('%Y-%m-%d %H:%M:%S') else: try: time.strptime(modification_date, "%Y-%m-%d %H:%M:%S") except ValueError: modification_date = '1970-01-01 00:00:00' # We check if the format is already in bibFmt nb_found = find_record_format(id_bibrec, format_name) if nb_found == 1: # we are going to update the format # compress the format_value value pickled_format_value = compress(format_value) # update the format: query = """UPDATE LOW_PRIORITY bibfmt SET last_updated=%s, value=%s WHERE id_bibrec=%s AND format=%s""" params = (modification_date, pickled_format_value, id_bibrec, format_name) try: if not pretend: row_id = run_sql(query, params) if not pretend and row_id is None: write_message(" Failed: Error during update_bibfmt_format function", verbose=1, stream=sys.stderr) return 1 else: write_message(" -Update the format %s in bibfmt : DONE" % format_name , verbose=2) return 0 except Error, error: write_message(" Error during the update_bibfmt_format function : %s " % error, verbose=1, stream=sys.stderr) elif nb_found > 1: write_message(" Failed: Same format %s found several time in bibfmt for the same record." % format_name, verbose=1, stream=sys.stderr) return 1 else: # Insert the format information in BibFMT res = insert_bibfmt(id_bibrec, format_value, format_name, modification_date, pretend=pretend) if res is None: write_message(" Failed: Error during insert_bibfmt", verbose=1, stream=sys.stderr) return 1 else: write_message(" -Insert the format %s in bibfmt : DONE" % format_name , verbose=2) return 0 def delete_bibfmt_format(id_bibrec, format_name, pretend=False): """ Delete format FORMAT_NAME from bibfmt table for record ID_BIBREC. """ if not pretend: run_sql("DELETE LOW_PRIORITY FROM bibfmt WHERE id_bibrec=%s and format=%s", (id_bibrec, format_name)) return 0 def archive_marcxml_for_history(recID, pretend=False): """ Archive current MARCXML format of record RECID from BIBFMT table into hstRECORD table. Useful to keep MARCXML history of records. Return 0 if everything went fine. Return 1 otherwise. """ try: res = run_sql("SELECT id_bibrec, value, last_updated FROM bibfmt WHERE format='xm' AND id_bibrec=%s", (recID,)) if res and not pretend: run_sql("""INSERT INTO hstRECORD (id_bibrec, marcxml, job_id, job_name, job_person, job_date, job_details) VALUES (%s,%s,%s,%s,%s,%s,%s)""", (res[0][0], res[0][1], task_get_task_param('task_id', 0), 'bibupload', task_get_task_param('user','UNKNOWN'), res[0][2], 'mode: ' + task_get_option('mode','UNKNOWN') + '; file: ' + task_get_option('file_path','UNKNOWN') + '.')) except Error, error: write_message(" Error during archive_marcxml_for_history: %s " % error, verbose=1, stream=sys.stderr) return 1 return 0 def update_database_with_metadata(record, rec_id, oai_rec_id = "oai", pretend=False): """Update the database tables with the record and the record id given in parameter""" for tag in record.keys(): # check if tag is not a special one: if tag not in CFG_BIBUPLOAD_SPECIAL_TAGS: # for each tag there is a list of tuples representing datafields tuple_list = record[tag] # this list should contain the elements of a full tag [tag, ind1, ind2, subfield_code] tag_list = [] tag_list.append(tag) for single_tuple in tuple_list: # these are the contents of a single tuple subfield_list = single_tuple[0] ind1 = single_tuple[1] ind2 = single_tuple[2] # append the ind's to the full tag if ind1 == '' or ind1 == ' ': tag_list.append('_') else: tag_list.append(ind1) if ind2 == '' or ind2 == ' ': tag_list.append('_') else: tag_list.append(ind2) datafield_number = single_tuple[4] if tag in CFG_BIBUPLOAD_SPECIAL_TAGS: # nothing to do for special tags (FFT, FMT) pass elif tag in CFG_BIBUPLOAD_CONTROLFIELD_TAGS and tag != "001": value = single_tuple[3] # get the full tag full_tag = ''.join(tag_list) # update the tables write_message(" insertion of the tag "+full_tag+" with the value "+value, verbose=9) # insert the tag and value into into bibxxx (table_name, bibxxx_row_id) = insert_record_bibxxx(full_tag, value, pretend=pretend) #print 'tname, bibrow', table_name, bibxxx_row_id; if table_name is None or bibxxx_row_id is None: write_message(" Failed : during insert_record_bibxxx", verbose=1, stream=sys.stderr) # connect bibxxx and bibrec with the table bibrec_bibxxx res = insert_record_bibrec_bibxxx(table_name, bibxxx_row_id, datafield_number, rec_id, pretend=pretend) if res is None: write_message(" Failed : during insert_record_bibrec_bibxxx", verbose=1, stream=sys.stderr) else: # get the tag and value from the content of each subfield for subfield in subfield_list: subtag = subfield[0] value = subfield[1] tag_list.append(subtag) # get the full tag full_tag = ''.join(tag_list) # update the tables write_message(" insertion of the tag "+full_tag+" with the value "+value, verbose=9) # insert the tag and value into into bibxxx (table_name, bibxxx_row_id) = insert_record_bibxxx(full_tag, value, pretend=pretend) if table_name is None or bibxxx_row_id is None: write_message(" Failed : during insert_record_bibxxx", verbose=1, stream=sys.stderr) # connect bibxxx and bibrec with the table bibrec_bibxxx res = insert_record_bibrec_bibxxx(table_name, bibxxx_row_id, datafield_number, rec_id, pretend=pretend) if res is None: write_message(" Failed : during insert_record_bibrec_bibxxx", verbose=1, stream=sys.stderr) # remove the subtag from the list tag_list.pop() tag_list.pop() tag_list.pop() tag_list.pop() write_message(" -Update the database with metadata : DONE", verbose=2) log_record_uploading(oai_rec_id, task_get_task_param('task_id', 0), rec_id, 'P', pretend=pretend) def append_new_tag_to_old_record(record, rec_old, opt_tag, opt_mode): """Append new tags to a old record""" def _append_tag(tag): # Reference mode append only reference tag if opt_mode == 'reference': if tag == CFG_BIBUPLOAD_REFERENCE_TAG: for single_tuple in record[tag]: # We retrieve the information of the tag subfield_list = single_tuple[0] ind1 = single_tuple[1] ind2 = single_tuple[2] # We add the datafield to the old record write_message(" Adding tag: %s ind1=%s ind2=%s code=%s" % (tag, ind1, ind2, subfield_list), verbose=9) newfield_number = record_add_field(rec_old, tag, ind1, ind2, subfields=subfield_list) if newfield_number is None: write_message(" Error when adding the field"+tag, verbose=1, stream=sys.stderr) else: if tag in CFG_BIBUPLOAD_CONTROLFIELD_TAGS: if tag == '001': pass else: # if it is a controlfield,just access the value for single_tuple in record[tag]: controlfield_value = single_tuple[3] # add the field to the old record newfield_number = record_add_field(rec_old, tag, controlfield_value=controlfield_value) if newfield_number is None: write_message(" Error when adding the field"+tag, verbose=1, stream=sys.stderr) else: # For each tag there is a list of tuples representing datafields for single_tuple in record[tag]: # We retrieve the information of the tag subfield_list = single_tuple[0] ind1 = single_tuple[1] ind2 = single_tuple[2] if '%s%s%s' % (tag, ind1 == ' ' and '_' or ind1, ind2 == ' ' and '_' or ind2) in (CFG_BIBUPLOAD_EXTERNAL_OAIID_TAG[:5], CFG_BIBUPLOAD_EXTERNAL_SYSNO_TAG[:5]): ## We don't want to append the external identifier ## if it is already existing. if record_find_field(rec_old, tag, single_tuple)[0] is not None: write_message(" Not adding tag: %s ind1=%s ind2=%s subfields=%s: it's already there" % (tag, ind1, ind2, subfield_list), verbose=9) continue # We add the datafield to the old record write_message(" Adding tag: %s ind1=%s ind2=%s subfields=%s" % (tag, ind1, ind2, subfield_list), verbose=9) newfield_number = record_add_field(rec_old, tag, ind1, ind2, subfields=subfield_list) if newfield_number is None: write_message(" Error when adding the field"+tag, verbose=1, stream=sys.stderr) if opt_tag is not None: _append_tag(opt_tag) else: # Go through each tag in the appended record for tag in record: _append_tag(tag) return rec_old def copy_strong_tags_from_old_record(record, rec_old): """ Look for strong tags in RECORD and REC_OLD. If no strong tags are found in RECORD, then copy them over from REC_OLD. This function modifies RECORD structure on the spot. """ for strong_tag in CFG_BIBUPLOAD_STRONG_TAGS: if not record_get_field_instances(record, strong_tag): strong_tag_old_field_instances = record_get_field_instances(rec_old, strong_tag) if strong_tag_old_field_instances: for strong_tag_old_field_instance in strong_tag_old_field_instances: sf_vals, fi_ind1, fi_ind2, controlfield, dummy = strong_tag_old_field_instance record_add_field(record, strong_tag, fi_ind1, fi_ind2, controlfield, sf_vals) return ### Delete functions def delete_tags(record, rec_old): """ Returns a record structure with all the fields in rec_old minus the fields in record. @param record: The record containing tags to delete. @type record: record structure @param rec_old: The original record. @type rec_old: record structure @return: The modified record. @rtype: record structure """ returned_record = copy.deepcopy(rec_old) for tag, fields in record.iteritems(): if tag in ('001', ): continue for field in fields: local_position = record_find_field(returned_record, tag, field)[1] if local_position is not None: record_delete_field(returned_record, tag, field_position_local=local_position) return returned_record def delete_tags_to_correct(record, rec_old, opt_tag): """ Delete tags from REC_OLD which are also existing in RECORD. When deleting, pay attention not only to tags, but also to indicators, so that fields with the same tags but different indicators are not deleted. """ ## Some fields are controlled via provenance information. ## We should re-add saved fields at the end. fields_to_readd = {} for tag in CFG_BIBUPLOAD_CONTROLLED_PROVENANCE_TAGS: if tag[:3] in record: tmp_field_instances = record_get_field_instances(record, tag[:3], tag[3], tag[4]) ## Let's discover the provenance that will be updated provenances_to_update = [] for instance in tmp_field_instances: for code, value in instance[0]: if code == tag[5]: if value not in provenances_to_update: provenances_to_update.append(value) break else: ## The provenance is not specified. ## let's add the special empty provenance. if '' not in provenances_to_update: provenances_to_update.append('') potential_fields_to_readd = record_get_field_instances(rec_old, tag[:3], tag[3], tag[4]) ## Let's take all the field corresponding to tag ## Let's save apart all the fields that should be updated, but ## since they have a different provenance not mentioned in record ## they should be preserved. fields = [] for sf_vals, ind1, ind2, dummy_cf, dummy_line in potential_fields_to_readd: for code, value in sf_vals: if code == tag[5]: if value not in provenances_to_update: fields.append(sf_vals) break else: if '' not in provenances_to_update: ## Empty provenance, let's protect in any case fields.append(sf_vals) fields_to_readd[tag] = fields # browse through all the tags from the MARCXML file: for tag in record: # do we have to delete only a special tag or any tag? if opt_tag is None or opt_tag == tag: # check if the tag exists in the old record too: if tag in rec_old and tag != '001': # the tag does exist, so delete all record's tag+ind1+ind2 combinations from rec_old for dummy_sf_vals, ind1, ind2, dummy_cf, field_number in record[tag]: write_message(" Delete tag: " + tag + " ind1=" + ind1 + " ind2=" + ind2, verbose=9) record_delete_field(rec_old, tag, ind1, ind2) ## Ok, we readd necessary fields! for tag, fields in fields_to_readd.iteritems(): for sf_vals in fields: write_message(" Adding tag: " + tag[:3] + " ind1=" + tag[3] + " ind2=" + tag[4] + " code=" + str(sf_vals), verbose=9) record_add_field(rec_old, tag[:3], tag[3], tag[4], subfields=sf_vals) def delete_bibrec_bibxxx(record, id_bibrec, pretend=False): """Delete the database record from the table bibxxx given in parameters""" # we clear all the rows from bibrec_bibxxx from the old record for tag in record.keys(): if tag not in CFG_BIBUPLOAD_SPECIAL_TAGS: # for each name construct the bibrec_bibxxx table name table_name = 'bibrec_bib'+tag[0:2]+'x' # delete all the records with proper id_bibrec query = """DELETE FROM `%s` where id_bibrec = %s""" params = (table_name, id_bibrec) if not pretend: try: run_sql(query % params) except Error, error: write_message(" Error during the delete_bibrec_bibxxx function : %s " % error, verbose=1, stream=sys.stderr) def main(): """Main that construct all the bibtask.""" task_init(authorization_action='runbibupload', authorization_msg="BibUpload Task Submission", description="""Receive MARC XML file and update appropriate database tables according to options. Examples: $ bibupload -i input.xml """, help_specific_usage=""" -a, --append\t\tnew fields are appended to the existing record -c, --correct\t\tfields are replaced by the new ones in the existing record, except \t\t\twhen overridden by CFG_BIBUPLOAD_CONTROLLED_PROVENANCE_TAGS -f, --format\t\ttakes only the FMT fields into account. Does not update -i, --insert\t\tinsert the new record in the database -r, --replace\t\tthe existing record is entirely replaced by the new one, \t\t\texcept for fields in CFG_BIBUPLOAD_STRONG_TAGS -z, --reference\tupdate references (update only 999 fields) -d, --delete\t\tspecified fields are deleted in existing record -S, --stage=STAGE\tstage to start from in the algorithm (0: always done; 1: FMT tags; \t\t\t2: FFT tags; 3: BibFmt; 4: Metadata update; 5: time update) -n, --notimechange\tdo not change record last modification date when updating -o, --holdingpen\tInsert record into holding pen instead of the normal database --pretend\t\tdo not really insert/append/correct/replace the input file --force\t\twhen --replace, use provided 001 tag values, even if the matching \t\t\trecord does not exist (thus allocating it on-the-fly) --callback-url\tSend via a POST request a JSON-serialized answer (see admin guide), in \t\t\torder to provide a feedback to an external service about the outcome of the operation. --nonce\t\twhen used together with --callback add the nonce value in the JSON message. --special-treatment=MODE\tif "oracle" is specified, when used together with --callback_url, \t\t\tPOST an application/x-www-form-urlencoded request where the JSON message is encoded \t\t\tinside a form field called "results". """, version=__revision__, specific_params=("ircazdS:fno", [ "insert", "replace", "correct", "append", "reference", "delete", "stage=", "format", "notimechange", "holdingpen", "pretend", "force", "callback-url=", "nonce=", "special-treatment=", ]), task_submit_elaborate_specific_parameter_fnc=task_submit_elaborate_specific_parameter, task_run_fnc=task_run_core) def task_submit_elaborate_specific_parameter(key, value, opts, args): """ Given the string key it checks it's meaning, eventually using the value. Usually it fills some key in the options dict. It must return True if it has elaborated the key, False, if it doesn't know that key. eg: if key in ['-n', '--number']: task_get_option(\1) = value return True return False """ # No time change option if key in ("-n", "--notimechange"): task_set_option('notimechange', 1) # Insert mode option elif key in ("-i", "--insert"): if task_get_option('mode') == 'replace': # if also replace found, then set to replace_or_insert task_set_option('mode', 'replace_or_insert') else: task_set_option('mode', 'insert') fix_argv_paths([args[0]]) task_set_option('file_path', os.path.abspath(args[0])) # Replace mode option elif key in ("-r", "--replace"): if task_get_option('mode') == 'insert': # if also insert found, then set to replace_or_insert task_set_option('mode', 'replace_or_insert') else: task_set_option('mode', 'replace') fix_argv_paths([args[0]]) task_set_option('file_path', os.path.abspath(args[0])) # Holding pen mode option elif key in ("-o", "--holdingpen"): write_message("Holding pen mode", verbose=3) task_set_option('mode', 'holdingpen') fix_argv_paths([args[0]]) task_set_option('file_path', os.path.abspath(args[0])) # Correct mode option elif key in ("-c", "--correct"): task_set_option('mode', 'correct') fix_argv_paths([args[0]]) task_set_option('file_path', os.path.abspath(args[0])) # Append mode option elif key in ("-a", "--append"): task_set_option('mode', 'append') fix_argv_paths([args[0]]) task_set_option('file_path', os.path.abspath(args[0])) # Reference mode option elif key in ("-z", "--reference"): task_set_option('mode', 'reference') fix_argv_paths([args[0]]) task_set_option('file_path', os.path.abspath(args[0])) elif key in ("-d", "--delete"): task_set_option('mode', 'delete') fix_argv_paths([args[0]]) task_set_option('file_path', os.path.abspath(args[0])) # Format mode option elif key in ("-f", "--format"): task_set_option('mode', 'format') fix_argv_paths([args[0]]) task_set_option('file_path', os.path.abspath(args[0])) elif key in ("--pretend",): task_set_option('pretend', True) fix_argv_paths([args[0]]) task_set_option('file_path', os.path.abspath(args[0])) elif key in ("--force",): task_set_option('force', True) fix_argv_paths([args[0]]) task_set_option('file_path', os.path.abspath(args[0])) # Stage elif key in ("-S", "--stage"): try: value = int(value) except ValueError: print >> sys.stderr, """The value specified for --stage must be a valid integer, not %s""" % value return False if not (0 <= value <= 5): print >> sys.stderr, """The value specified for --stage must be comprised between 0 and 5""" return False task_set_option('stage_to_start_from', value) elif key in ("--callback-url", ): task_set_option('callback_url', value) elif key in ("--nonce", ): task_set_option('nonce', value) elif key in ("--special-treatment", ): if value.lower() in CFG_BIBUPLOAD_ALLOWED_SPECIAL_TREATMENTS: if value.lower() == 'oracle': task_set_option('oracle_friendly', True) else: print >> sys.stderr, """The specified value is not in the list of allowed special treatments codes: %s""" % CFG_BIBUPLOAD_ALLOWED_SPECIAL_TREATMENTS return False else: return False return True def task_submit_check_options(): """ Reimplement this method for having the possibility to check options before submitting the task, in order for example to provide default values. It must return False if there are errors in the options. """ if task_get_option('mode') is None: write_message("Please specify at least one update/insert mode!") return False if task_get_option('file_path') is None: write_message("Missing filename! -h for help.") return False return True def writing_rights_p(): """Return True in case bibupload has the proper rights to write in the fulltext file folder.""" global _WRITING_RIGHTS if _WRITING_RIGHTS is not None: return _WRITING_RIGHTS try: - if not os.path.exists(CFG_WEBSUBMIT_FILEDIR): - os.makedirs(CFG_WEBSUBMIT_FILEDIR) - fd, filename = tempfile.mkstemp(suffix='.txt', prefix='test', dir=CFG_WEBSUBMIT_FILEDIR) + if not os.path.exists(CFG_BIBDOCFILE_FILEDIR): + os.makedirs(CFG_BIBDOCFILE_FILEDIR) + fd, filename = tempfile.mkstemp(suffix='.txt', prefix='test', dir=CFG_BIBDOCFILE_FILEDIR) test = os.fdopen(fd, 'w') test.write('TEST') test.close() if open(filename).read() != 'TEST': raise IOError("Can not successfully write and readback %s" % filename) os.remove(filename) except: register_exception(alert_admin=True) return False return True def post_results_to_callback_url(results, callback_url): if not CFG_JSON_AVAILABLE: from warnings import warn warn("--callback-url used but simplejson/json not available") return json_results = json.dumps(results) ## :///?# scheme, netloc, path, query, fragment = urlparse.urlsplit(callback_url) ## See: http://stackoverflow.com/questions/111945/is-there-any-way-to-do-http-put-in-python if scheme == 'http': opener = urllib2.build_opener(urllib2.HTTPHandler) elif scheme == 'https': opener = urllib2.build_opener(urllib2.HTTPSHandler) else: raise ValueError("Scheme not handled %s for callback_url %s" % (scheme, callback_url)) if task_get_option('oracle_friendly'): request = urllib2.Request(callback_url, data=urllib.urlencode({'results': json_results})) request.add_header('User-Agent', make_user_agent_string('BibUpload')) request.add_header('Content-Type', 'application/x-www-form-urlencoded') else: request = urllib2.Request(callback_url, data=json_results) request.add_header('Content-Type', 'application/json') request.get_method = lambda: 'POST' request.add_header('User-Agent', make_user_agent_string('BibUpload')) return opener.open(request) def task_run_core(): """ Reimplement to add the body of the task.""" error = 0 write_message("Input file '%s', input mode '%s'." % (task_get_option('file_path'), task_get_option('mode'))) write_message("STAGE 0:", verbose=2) if task_get_option('file_path') is not None: write_message("start preocessing", verbose=3) task_update_progress("Reading XML input") recs = xml_marc_to_records(open_marc_file(task_get_option('file_path'))) stat['nb_records_to_upload'] = len(recs) write_message(" -Open XML marc: DONE", verbose=2) task_sleep_now_if_required(can_stop_too=True) write_message("Entering records loop", verbose=3) callback_url = task_get_option('callback_url') results_for_callback = {'results': []} if recs is not None: # We proceed each record by record for record in recs: record_id = record_extract_oai_id(record) task_sleep_now_if_required(can_stop_too=True) if task_get_option("mode") == "holdingpen": #inserting into the holding pen write_message("Inserting into holding pen", verbose=3) insert_record_into_holding_pen(record, record_id) else: write_message("Inserting into main database", verbose=3) error = bibupload( record, opt_tag=task_get_option('tag'), opt_mode=task_get_option('mode'), opt_stage_to_start_from=task_get_option('stage_to_start_from'), opt_notimechange=task_get_option('notimechange'), oai_rec_id=record_id, pretend=task_get_option('pretend')) if error[0] == 1: if record: write_message(record_xml_output(record), stream=sys.stderr) else: write_message("Record could not have been parsed", stream=sys.stderr) stat['nb_errors'] += 1 if callback_url: results_for_callback['results'].append({'recid': error[1], 'success': False, 'error_message': error[2]}) elif error[0] == 2: if record: write_message(record_xml_output(record), stream=sys.stderr) else: write_message("Record could not have been parsed", stream=sys.stderr) if callback_url: results_for_callback['results'].append({'recid': error[1], 'success': False, 'error_message': error[2]}) elif error[0] == 0: if callback_url: from invenio.search_engine import print_record results_for_callback['results'].append({'recid': error[1], 'success': True, "marcxml": print_record(error[1], 'xm'), 'url': "%s/%s/%s" % (CFG_SITE_URL, CFG_SITE_RECORD, error[1])}) else: if callback_url: results_for_callback['results'].append({'recid': error[1], 'success': False, 'error_message': error[2]}) task_update_progress("Done %d out of %d." % \ (stat['nb_records_inserted'] + \ stat['nb_records_updated'], stat['nb_records_to_upload'])) else: write_message(" Error bibupload failed: No record found", verbose=1, stream=sys.stderr) callback_url = task_get_option("callback_url") if callback_url: nonce = task_get_option("nonce") if nonce: results_for_callback["nonce"] = nonce post_results_to_callback_url(results_for_callback, callback_url) if task_get_task_param('verbose') >= 1: # Print out the statistics print_out_bibupload_statistics() # Check if they were errors return not stat['nb_errors'] >= 1 def log_record_uploading(oai_rec_id, task_id, bibrec_id, insertion_db, pretend=False): if oai_rec_id != "" and oai_rec_id != None: query = """UPDATE oaiHARVESTLOG SET date_inserted=NOW(), inserted_to_db=%s, id_bibrec=%s WHERE oai_id = %s AND bibupload_task_id = %s ORDER BY date_harvested LIMIT 1""" try: if not pretend: run_sql(query, (str(insertion_db), str(bibrec_id), str(oai_rec_id), str(task_id), )) except Error, error: write_message(" Error during the log_record_uploading function : %s " % error, verbose=1, stream=sys.stderr) if __name__ == "__main__": main() diff --git a/modules/miscutil/lib/inveniocfg.py b/modules/miscutil/lib/inveniocfg.py index a0bde816e..a77a83a54 100644 --- a/modules/miscutil/lib/inveniocfg.py +++ b/modules/miscutil/lib/inveniocfg.py @@ -1,1377 +1,1402 @@ # -*- coding: utf-8 -*- ## ## This file is part of Invenio. ## Copyright (C) 2008, 2009, 2010, 2011, 2012 CERN. ## ## Invenio is free software; you can redistribute it and/or ## modify it under the terms of the GNU General Public License as ## published by the Free Software Foundation; either version 2 of the ## License, or (at your option) any later version. ## ## Invenio is distributed in the hope that it will be useful, but ## WITHOUT ANY WARRANTY; without even the implied warranty of ## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ## General Public License for more details. ## ## You should have received a copy of the GNU General Public License ## along with Invenio; if not, write to the Free Software Foundation, Inc., ## 59 Temple Place, Suite 330, Boston, MA 02111-1307, USA. """ Invenio configuration and administration CLI tool. Usage: inveniocfg [options] General options: -h, --help print this help -V, --version print version number Options to finish your installation: --create-apache-conf create Apache configuration files --create-tables create DB tables for Invenio --load-webstat-conf load the WebStat configuration --drop-tables drop DB tables of Invenio --check-openoffice check for correctly set up of openoffice temporary directory Options to set up and test a demo site: --create-demo-site create demo site --load-demo-records load demo records --remove-demo-records remove demo records, keeping demo site --drop-demo-site drop demo site configurations too --run-unit-tests run unit test suite (needs demo site) --run-regression-tests run regression test suite (needs demo site) --run-web-tests run web tests in a browser (needs demo site, Firefox, Selenium IDE) Options to update config files in situ: --update-all perform all the update options --update-config-py update config.py file from invenio.conf file --update-dbquery-py update dbquery.py with DB credentials from invenio.conf --update-dbexec update dbexec with DB credentials from invenio.conf --update-bibconvert-tpl update bibconvert templates with CFG_SITE_URL from invenio.conf --update-web-tests update web test cases with CFG_SITE_URL from invenio.conf Options to update DB tables: --reset-all perform all the reset options --reset-sitename reset tables to take account of new CFG_SITE_NAME* --reset-siteadminemail reset tables to take account of new CFG_SITE_ADMIN_EMAIL --reset-fieldnames reset tables to take account of new I18N names from PO files --reset-recstruct-cache reset record structure cache according to CFG_BIBUPLOAD_SERIALIZE_RECORD_STRUCTURE Options to help the work: --list print names and values of all options from conf files --get get value of a given option from conf files --conf-dir path to directory where invenio*.conf files are [optional] --detect-system-details print system details such as Apache/Python/MySQL versions """ __revision__ = "$Id$" from ConfigParser import ConfigParser import os import re import shutil import socket import sys def print_usage(): """Print help.""" print __doc__ def print_version(): """Print version information.""" print __revision__ def convert_conf_option(option_name, option_value): """ Convert conf option into Python config.py line, converting values to ints or strings as appropriate. """ ## 1) convert option name to uppercase: option_name = option_name.upper() + ## 1a) adjust renamed variables: + if option_name in ['CFG_WEBSUBMIT_DOCUMENT_FILE_MANAGER_DOCTYPES', + 'CFG_WEBSUBMIT_DOCUMENT_FILE_MANAGER_RESTRICTIONS', + 'CFG_WEBSUBMIT_DOCUMENT_FILE_MANAGER_MISC', + 'CFG_WEBSUBMIT_FILESYSTEM_BIBDOC_GROUP_LIMIT', + 'CFG_WEBSUBMIT_ADDITIONAL_KNOWN_FILE_EXTENSIONS', + 'CFG_WEBSUBMIT_DESIRED_CONVERSIONS']: + new_option_name = option_name.replace('WEBSUBMIT', 'BIBDOCFILE') + print >> sys.stderr, ("""WARNING: %s has been renamed to %s. +Please, update your invenio-local.conf file accordingly.""" % (option_name, new_option_name)) + option_name = new_option_name + + ## 2) convert option value to int or string: if option_name in ['CFG_BIBUPLOAD_REFERENCE_TAG', 'CFG_BIBUPLOAD_EXTERNAL_SYSNO_TAG', 'CFG_BIBUPLOAD_EXTERNAL_OAIID_TAG', 'CFG_BIBUPLOAD_EXTERNAL_OAIID_PROVENANCE_TAG', 'CFG_BIBUPLOAD_STRONG_TAGS', 'CFG_BIBFORMAT_HIDDEN_TAGS',]: # some options are supposed be string even when they look like # numeric option_value = '"' + option_value + '"' else: try: option_value = int(option_value) except ValueError: option_value = '"' + option_value + '"' ## 3a) special cases: chars regexps if option_name in ['CFG_BIBINDEX_CHARS_ALPHANUMERIC_SEPARATORS', 'CFG_BIBINDEX_CHARS_PUNCTUATION']: option_value = 'r"[' + option_value[1:-1] + ']"' ## 3abis) special cases: real regexps if option_name in ['CFG_BIBINDEX_PERFORM_OCR_ON_DOCNAMES', 'CFG_BATCHUPLOADER_WEB_ROBOT_AGENTS']: option_value = 'r"' + option_value[1:-1] + '"' ## 3b) special cases: True, False, None if option_value in ['"True"', '"False"', '"None"']: option_value = option_value[1:-1] ## 3c) special cases: dicts and real pythonic lists if option_name in ['CFG_WEBSEARCH_FIELDS_CONVERT', 'CFG_BATCHUPLOADER_WEB_ROBOT_RIGHTS', 'CFG_WEBSEARCH_FULLTEXT_SNIPPETS', 'CFG_WEBSEARCH_FULLTEXT_SNIPPETS_CHARS', 'CFG_SITE_EMERGENCY_EMAIL_ADDRESSES', 'CFG_BIBMATCH_FUZZY_WORDLIMITS', 'CFG_BIBMATCH_QUERY_TEMPLATES', 'CFG_WEBSEARCH_SYNONYM_KBRS', 'CFG_BIBINDEX_SYNONYM_KBRS', 'CFG_WEBCOMMENT_EMAIL_REPLIES_TO', 'CFG_WEBCOMMENT_RESTRICTION_DATAFIELD', 'CFG_WEBCOMMENT_ROUND_DATAFIELD', 'CFG_BIBUPLOAD_FFT_ALLOWED_EXTERNAL_URLS', 'CFG_BIBSCHED_NODE_TASKS', 'CFG_BIBEDIT_EXTEND_RECORD_WITH_COLLECTION_TEMPLATE', 'CFG_OAI_METADATA_FORMATS', - 'CFG_WEBSUBMIT_DESIRED_CONVERSIONS', + 'CFG_BIBDOCFILE_DESIRED_CONVERSIONS', 'CFG_BIBDOCFILE_BEST_FORMATS_TO_EXTRACT_TEXT_FROM', 'CFG_WEB_API_KEY_ALLOWED_URL', - 'CFG_WEBSUBMIT_DOCUMENT_FILE_MANAGER_MISC', - 'CFG_WEBSUBMIT_DOCUMENT_FILE_MANAGER_DOCTYPES', - 'CFG_WEBSUBMIT_DOCUMENT_FILE_MANAGER_RESTRICTIONS']: + 'CFG_BIBDOCFILE_DOCUMENT_FILE_MANAGER_MISC', + 'CFG_BIBDOCFILE_DOCUMENT_FILE_MANAGER_DOCTYPES', + 'CFG_BIBDOCFILE_DOCUMENT_FILE_MANAGER_RESTRICTIONS']: try: option_value = option_value[1:-1] except TypeError: if option_name in ('CFG_WEBSEARCH_FULLTEXT_SNIPPETS',): print >> sys.stderr, """WARNING: CFG_WEBSEARCH_FULLTEXT_SNIPPETS has changed syntax: it can be customised to display different snippets for different document types. See the corresponding documentation in invenio.conf. You may want to customise your invenio-local.conf configuration accordingly.""" option_value = """{'': %s}""" % option_value else: print >> sys.stderr, "ERROR: type error in %s value %s." % \ (option_name, option_value) sys.exit(1) ## 3cbis) very special cases: dicts with backward compatible string if option_name in ['CFG_BIBINDEX_SPLASH_PAGES']: if option_value.startswith('"{') and option_value.endswith('}"'): option_value = option_value[1:-1] else: option_value = """{%s: ".*"}""" % option_value ## 3d) special cases: comma-separated lists if option_name in ['CFG_SITE_LANGS', - 'CFG_WEBSUBMIT_ADDITIONAL_KNOWN_FILE_EXTENSIONS', + 'CFG_BIBDOCFILE_ADDITIONAL_KNOWN_FILE_EXTENSIONS', 'CFG_WEBSEARCH_USE_MATHJAX_FOR_FORMATS', 'CFG_BIBUPLOAD_STRONG_TAGS', 'CFG_BIBFORMAT_HIDDEN_TAGS', 'CFG_BIBSCHED_GC_TASKS_TO_REMOVE', 'CFG_BIBSCHED_GC_TASKS_TO_ARCHIVE', 'CFG_BIBUPLOAD_FFT_ALLOWED_LOCAL_PATHS', 'CFG_BIBUPLOAD_CONTROLLED_PROVENANCE_TAGS', 'CFG_BIBUPLOAD_DELETE_FORMATS', 'CFG_WEBSEARCH_ENABLED_SEARCH_INTERFACES', 'CFG_WEBSTYLE_HTTP_STATUS_ALERT_LIST', 'CFG_WEBSEARCH_RSS_I18N_COLLECTIONS', 'CFG_BATCHUPLOADER_FILENAME_MATCHING_POLICY', 'CFG_BIBAUTHORID_EXTERNAL_CLAIMED_RECORDS_KEY', 'CFG_PLOTEXTRACTOR_DISALLOWED_TEX', 'CFG_OAI_FRIENDS', 'CFG_WEBSTYLE_REVERSE_PROXY_IPS', 'CFG_BIBEDIT_AUTOCOMPLETE_INSTITUTIONS_FIELDS', 'CFG_BIBFORMAT_DISABLE_I18N_FOR_CACHED_FORMATS', 'CFG_BIBFORMAT_HIDDEN_FILE_FORMATS',]: out = "[" for elem in option_value[1:-1].split(","): if elem: if option_name in ['CFG_WEBSEARCH_ENABLED_SEARCH_INTERFACES']: # 3d1) integer values out += "%i, " % int(elem) else: # 3d2) string values out += "'%s', " % elem out += "]" option_value = out ## 3e) special cases: multiline if option_name == 'CFG_OAI_IDENTIFY_DESCRIPTION': # make triple quotes option_value = '""' + option_value + '""' ## 3f) ignore some options: if option_name.startswith('CFG_SITE_NAME_INTL'): # treated elsewhere return ## 3g) special cases: float if option_name in ['CFG_BIBDOCFILE_MD5_CHECK_PROBABILITY', 'CFG_BIBMATCH_LOCAL_SLEEPTIME', 'CFG_BIBMATCH_REMOTE_SLEEPTIME', 'CFG_PLOTEXTRACTOR_DOWNLOAD_TIMEOUT', 'CFG_BIBMATCH_FUZZY_MATCH_VALIDATION_LIMIT']: option_value = float(option_value[1:-1]) ## 3h) special cases: bibmatch validation list if option_name in ['CFG_BIBMATCH_MATCH_VALIDATION_RULESETS']: option_value = option_value[1:-1] ## 4a) dropped variables if option_name in ['CFG_BATCHUPLOADER_WEB_ROBOT_AGENT']: print >> sys.stderr, ("""ERROR: CFG_BATCHUPLOADER_WEB_ROBOT_AGENT has been dropped in favour of CFG_BATCHUPLOADER_WEB_ROBOT_AGENTS. Please, update your invenio-local.conf file accordingly.""") option_value = option_value[1:-1] + elif option_name in ['CFG_WEBSUBMIT_DOCUMENT_FILE_MANAGER_DOCTYPES', + 'CFG_WEBSUBMIT_DOCUMENT_FILE_MANAGER_RESTRICTIONS', + 'CFG_WEBSUBMIT_DOCUMENT_FILE_MANAGER_MISC', + 'CFG_WEBSUBMIT_FILESYSTEM_BIBDOC_GROUP_LIMIT', + 'CFG_WEBSUBMIT_ADDITIONAL_KNOWN_FILE_EXTENSIONS', + 'CFG_WEBSUBMIT_DESIRED_CONVERSIONS']: + new_option_name = option_name.replace('WEBSUBMIT', 'BIBDOCFILE') + print >> sys.stderr, ("""ERROR: %s has been renamed to %s. +Please, update your invenio-local.conf file accordingly.""" % (option_name, new_option_name)) + option_name = new_option_name + + ## 5) finally, return output line: return '%s = %s' % (option_name, option_value) def cli_cmd_update_config_py(conf): """ Update new config.py from conf options, keeping previous config.py in a backup copy. """ print ">>> Going to update config.py..." ## location where config.py is: configpyfile = conf.get("Invenio", "CFG_PYLIBDIR") + \ os.sep + 'invenio' + os.sep + 'config.py' ## backup current config.py file: if os.path.exists(configpyfile): shutil.copy(configpyfile, configpyfile + '.OLD') ## here we go: fdesc = open(configpyfile, 'w') ## generate preamble: fdesc.write("# -*- coding: utf-8 -*-\n") fdesc.write("# DO NOT EDIT THIS FILE! IT WAS AUTOMATICALLY GENERATED\n") fdesc.write("# FROM INVENIO.CONF BY EXECUTING:\n") fdesc.write("# " + " ".join(sys.argv) + "\n") ## special treatment for CFG_SITE_NAME_INTL options: fdesc.write("CFG_SITE_NAME_INTL = {}\n") for lang in conf.get("Invenio", "CFG_SITE_LANGS").split(","): fdesc.write("CFG_SITE_NAME_INTL['%s'] = \"%s\"\n" % (lang, conf.get("Invenio", "CFG_SITE_NAME_INTL_" + lang))) ## special treatment for CFG_SITE_SECURE_URL that may be empty, in ## which case it should be put equal to CFG_SITE_URL: if not conf.get("Invenio", "CFG_SITE_SECURE_URL"): conf.set("Invenio", "CFG_SITE_SECURE_URL", conf.get("Invenio", "CFG_SITE_URL")) ## process all the options normally: sections = conf.sections() sections.sort() for section in sections: options = conf.options(section) options.sort() for option in options: if not option.startswith('CFG_DATABASE_'): # put all options except for db credentials into config.py line_out = convert_conf_option(option, conf.get(section, option)) if line_out: fdesc.write(line_out + "\n") ## FIXME: special treatment for experimental variables ## CFG_WEBSEARCH_ENABLED_SEARCH_INTERFACES and CFG_WEBSEARCH_DEFAULT_SEARCH_INTERFACE ## (not offering them in invenio.conf since they will be refactored) fdesc.write("CFG_WEBSEARCH_DEFAULT_SEARCH_INTERFACE = 0\n") fdesc.write("CFG_WEBSEARCH_ENABLED_SEARCH_INTERFACES = [0, 1,]\n") ## generate postamble: fdesc.write("") fdesc.write("# END OF GENERATED FILE") ## we are done: fdesc.close() print "You may want to restart Apache now." print ">>> config.py updated successfully." def cli_cmd_update_dbquery_py(conf): """ Update lib/dbquery.py file with DB parameters read from conf file. Note: this edits dbquery.py in situ, taking a backup first. Use only when you know what you are doing. """ print ">>> Going to update dbquery.py..." ## location where dbquery.py is: dbquerypyfile = conf.get("Invenio", "CFG_PYLIBDIR") + \ os.sep + 'invenio' + os.sep + 'dbquery.py' ## backup current dbquery.py file: if os.path.exists(dbquerypyfile): shutil.copy(dbquerypyfile, dbquerypyfile + '.OLD') ## replace db parameters: out = '' for line in open(dbquerypyfile, 'r').readlines(): match = re.search(r'^CFG_DATABASE_(HOST|PORT|NAME|USER|PASS|SLAVE)(\s*=\s*)\'.*\'$', line) if match: dbparam = 'CFG_DATABASE_' + match.group(1) out += "%s%s'%s'\n" % (dbparam, match.group(2), conf.get('Invenio', dbparam)) else: out += line fdesc = open(dbquerypyfile, 'w') fdesc.write(out) fdesc.close() print "You may want to restart Apache now." print ">>> dbquery.py updated successfully." def cli_cmd_update_dbexec(conf): """ Update bin/dbexec file with DB parameters read from conf file. Note: this edits dbexec in situ, taking a backup first. Use only when you know what you are doing. """ print ">>> Going to update dbexec..." ## location where dbexec is: dbexecfile = conf.get("Invenio", "CFG_BINDIR") + \ os.sep + 'dbexec' ## backup current dbexec file: if os.path.exists(dbexecfile): shutil.copy(dbexecfile, dbexecfile + '.OLD') ## replace db parameters via sed: out = '' for line in open(dbexecfile, 'r').readlines(): match = re.search(r'^CFG_DATABASE_(HOST|PORT|NAME|USER|PASS|SLAVE)(\s*=\s*)\'.*\'$', line) if match: dbparam = 'CFG_DATABASE_' + match.group(1) out += "%s%s'%s'\n" % (dbparam, match.group(2), conf.get("Invenio", dbparam)) else: out += line fdesc = open(dbexecfile, 'w') fdesc.write(out) fdesc.close() print ">>> dbexec updated successfully." def cli_cmd_update_bibconvert_tpl(conf): """ Update bibconvert/config/*.tpl files looking for 856 http://.../CFG_SITE_RECORD lines, replacing URL with CFG_SITE_URL taken from conf file. Note: this edits tpl files in situ, taking a backup first. Use only when you know what you are doing. """ print ">>> Going to update bibconvert templates..." ## location where bibconvert/config/*.tpl are: tpldir = conf.get("Invenio", 'CFG_ETCDIR') + \ os.sep + 'bibconvert' + os.sep + 'config' ## find all *.tpl files: for tplfilename in os.listdir(tpldir): if tplfilename.endswith(".tpl"): ## change tpl file: tplfile = tpldir + os.sep + tplfilename shutil.copy(tplfile, tplfile + '.OLD') out = '' for line in open(tplfile, 'r').readlines(): match = re.search(r'^(.*)http://.*?/%s/(.*)$' % conf.get("Invenio", 'CFG_SITE_RECORD'), line) if match: out += "%s%s/%s/%s\n" % (match.group(1), conf.get("Invenio", 'CFG_SITE_URL'), conf.get("Invenio", 'CFG_SITE_RECORD'), match.group(2)) else: out += line fdesc = open(tplfile, 'w') fdesc.write(out) fdesc.close() print ">>> bibconvert templates updated successfully." def cli_cmd_update_web_tests(conf): """ Update web test cases lib/webtest/test_*.html looking for http://.+?[>> Going to update web tests..." ## location where test_*.html files are: testdir = conf.get("Invenio", 'CFG_PREFIX') + os.sep + \ 'lib' + os.sep + 'webtest' + os.sep + 'invenio' ## find all test_*.html files: for testfilename in os.listdir(testdir): if testfilename.startswith("test_") and \ testfilename.endswith(".html"): ## change test file: testfile = testdir + os.sep + testfilename shutil.copy(testfile, testfile + '.OLD') out = '' for line in open(testfile, 'r').readlines(): match = re.search(r'^(.*)http://.+?([)/opt/invenio(.*)$', line) if match: out += "%s%s%s\n" % (match.group(1), conf.get("Invenio", 'CFG_PREFIX'), match.group(2)) else: out += line fdesc = open(testfile, 'w') fdesc.write(out) fdesc.close() print ">>> web tests updated successfully." def cli_cmd_reset_sitename(conf): """ Reset collection-related tables with new CFG_SITE_NAME and CFG_SITE_NAME_INTL* read from conf files. """ print ">>> Going to reset CFG_SITE_NAME and CFG_SITE_NAME_INTL..." from invenio.dbquery import run_sql, IntegrityError # reset CFG_SITE_NAME: sitename = conf.get("Invenio", "CFG_SITE_NAME") try: run_sql("""INSERT INTO collection (id, name, dbquery, reclist) VALUES (1,%s,NULL,NULL)""", (sitename,)) except IntegrityError: run_sql("""UPDATE collection SET name=%s WHERE id=1""", (sitename,)) # reset CFG_SITE_NAME_INTL: for lang in conf.get("Invenio", "CFG_SITE_LANGS").split(","): sitename_lang = conf.get("Invenio", "CFG_SITE_NAME_INTL_" + lang) try: run_sql("""INSERT INTO collectionname (id_collection, ln, type, value) VALUES (%s,%s,%s,%s)""", (1, lang, 'ln', sitename_lang)) except IntegrityError: run_sql("""UPDATE collectionname SET value=%s WHERE ln=%s AND id_collection=1 AND type='ln'""", (sitename_lang, lang)) print "You may want to restart Apache now." print ">>> CFG_SITE_NAME and CFG_SITE_NAME_INTL* reset successfully." def cli_cmd_reset_recstruct_cache(conf): """If CFG_BIBUPLOAD_SERIALIZE_RECORD_STRUCTURE is changed, this function will adapt the database to either store or not store the recstruct format.""" from invenio.intbitset import intbitset from invenio.dbquery import run_sql, serialize_via_marshal from invenio.search_engine import get_record from invenio.bibsched import server_pid, pidfile enable_recstruct_cache = conf.get("Invenio", "CFG_BIBUPLOAD_SERIALIZE_RECORD_STRUCTURE") enable_recstruct_cache = enable_recstruct_cache in ('True', '1') pid = server_pid(ping_the_process=False) if pid: print >> sys.stderr, "ERROR: bibsched seems to run with pid %d, according to %s." % (pid, pidfile) print >> sys.stderr, " Please stop bibsched before running this procedure." sys.exit(1) if enable_recstruct_cache: print ">>> Searching records which need recstruct cache resetting; this may take a while..." all_recids = intbitset(run_sql("SELECT id FROM bibrec")) good_recids = intbitset(run_sql("SELECT bibrec.id FROM bibrec JOIN bibfmt ON bibrec.id = bibfmt.id_bibrec WHERE format='recstruct' AND modification_date < last_updated")) recids = all_recids - good_recids print ">>> Generating recstruct cache..." tot = len(recids) count = 0 for recid in recids: value = serialize_via_marshal(get_record(recid)) run_sql("DELETE FROM bibfmt WHERE id_bibrec=%s AND format='recstruct'", (recid, )) run_sql("INSERT INTO bibfmt(id_bibrec, format, last_updated, value) VALUES(%s, 'recstruct', NOW(), %s)", (recid, value)) count += 1 if count % 1000 == 0: print " ... done records %s/%s" % (count, tot) if count % 1000 != 0: print " ... done records %s/%s" % (count, tot) print ">>> recstruct cache generated successfully." else: print ">>> Cleaning recstruct cache..." run_sql("DELETE FROM bibfmt WHERE format='recstruct'") def cli_cmd_reset_siteadminemail(conf): """ Reset user-related tables with new CFG_SITE_ADMIN_EMAIL read from conf files. """ print ">>> Going to reset CFG_SITE_ADMIN_EMAIL..." from invenio.dbquery import run_sql siteadminemail = conf.get("Invenio", "CFG_SITE_ADMIN_EMAIL") run_sql("DELETE FROM user WHERE id=1") run_sql("""INSERT INTO user (id, email, password, note, nickname) VALUES (1, %s, AES_ENCRYPT(email, ''), 1, 'admin')""", (siteadminemail,)) print "You may want to restart Apache now." print ">>> CFG_SITE_ADMIN_EMAIL reset successfully." def cli_cmd_reset_fieldnames(conf): """ Reset I18N field names such as author, title, etc and other I18N ranking method names such as word similarity. Their translations are taken from the PO files. """ print ">>> Going to reset I18N field names..." from invenio.messages import gettext_set_language, language_list_long from invenio.dbquery import run_sql, IntegrityError ## get field id and name list: field_id_name_list = run_sql("SELECT id, name FROM field") ## get rankmethod id and name list: rankmethod_id_name_list = run_sql("SELECT id, name FROM rnkMETHOD") ## update names for every language: for lang, dummy in language_list_long(): _ = gettext_set_language(lang) ## this list is put here in order for PO system to pick names ## suitable for translation field_name_names = {"any field": _("any field"), "title": _("title"), "author": _("author"), "abstract": _("abstract"), "keyword": _("keyword"), "report number": _("report number"), "subject": _("subject"), "reference": _("reference"), "fulltext": _("fulltext"), "collection": _("collection"), "division": _("division"), "year": _("year"), "journal": _("journal"), "experiment": _("experiment"), "record ID": _("record ID")} ## update I18N names for every language: for (field_id, field_name) in field_id_name_list: if field_name_names.has_key(field_name): try: run_sql("""INSERT INTO fieldname (id_field,ln,type,value) VALUES (%s,%s,%s,%s)""", (field_id, lang, 'ln', field_name_names[field_name])) except IntegrityError: run_sql("""UPDATE fieldname SET value=%s WHERE id_field=%s AND ln=%s AND type=%s""", (field_name_names[field_name], field_id, lang, 'ln',)) ## ditto for rank methods: rankmethod_name_names = {"wrd": _("word similarity"), "demo_jif": _("journal impact factor"), "citation": _("times cited"), "citerank_citation_t": _("time-decay cite count"), "citerank_pagerank_c": _("all-time-best cite rank"), "citerank_pagerank_t": _("time-decay cite rank"),} for (rankmethod_id, rankmethod_name) in rankmethod_id_name_list: if rankmethod_name_names.has_key(rankmethod_name): try: run_sql("""INSERT INTO rnkMETHODNAME (id_rnkMETHOD,ln,type,value) VALUES (%s,%s,%s,%s)""", (rankmethod_id, lang, 'ln', rankmethod_name_names[rankmethod_name])) except IntegrityError: run_sql("""UPDATE rnkMETHODNAME SET value=%s WHERE id_rnkMETHOD=%s AND ln=%s AND type=%s""", (rankmethod_name_names[rankmethod_name], rankmethod_id, lang, 'ln',)) print ">>> I18N field names reset successfully." def cli_check_openoffice(conf): """ If OpenOffice.org integration is enabled, checks whether the system is properly configured. """ from invenio.bibtask import check_running_process_user from invenio.websubmit_file_converter import can_unoconv, get_file_converter_logger logger = get_file_converter_logger() for handler in logger.handlers: logger.removeHandler(handler) check_running_process_user() print ">>> Checking if Libre/OpenOffice.org is correctly integrated...", sys.stdout.flush() if can_unoconv(True): print "ok" else: sys.exit(1) def test_db_connection(): """ Test DB connection, and if fails, advise user how to set it up. Useful to be called during table creation. """ print "Testing DB connection...", from invenio.textutils import wrap_text_in_a_box from invenio.dbquery import run_sql, Error ## first, test connection to the DB server: try: run_sql("SHOW TABLES") except Error, err: from invenio.dbquery import CFG_DATABASE_HOST, CFG_DATABASE_PORT, \ CFG_DATABASE_NAME, CFG_DATABASE_USER, CFG_DATABASE_PASS print wrap_text_in_a_box("""\ DATABASE CONNECTIVITY ERROR %(errno)d: %(errmsg)s.\n Perhaps you need to set up database and connection rights? If yes, then please login as MySQL admin user and run the following commands now: $ mysql -h %(dbhost)s -P %(dbport)s -u root -p mysql mysql> CREATE DATABASE %(dbname)s DEFAULT CHARACTER SET utf8; mysql> GRANT ALL PRIVILEGES ON %(dbname)s.* TO %(dbuser)s@%(webhost)s IDENTIFIED BY '%(dbpass)s'; mysql> QUIT The values printed above were detected from your configuration. If they are not right, then please edit your invenio-local.conf file and rerun 'inveniocfg --update-all' first. If the problem is of different nature, then please inspect the above error message and fix the problem before continuing.""" % \ {'errno': err.args[0], 'errmsg': err.args[1], 'dbname': CFG_DATABASE_NAME, 'dbhost': CFG_DATABASE_HOST, 'dbport': CFG_DATABASE_PORT, 'dbuser': CFG_DATABASE_USER, 'dbpass': CFG_DATABASE_PASS, 'webhost': CFG_DATABASE_HOST == 'localhost' and 'localhost' or os.popen('hostname -f', 'r').read().strip(), }) sys.exit(1) print "ok" ## second, test insert/select of a Unicode string to detect ## possible Python/MySQL/MySQLdb mis-setup: print "Testing Python/MySQL/MySQLdb UTF-8 chain...", try: try: beta_in_utf8 = "β" # Greek beta in UTF-8 is 0xCEB2 run_sql("CREATE TABLE test__invenio__utf8 (x char(1), y varbinary(2)) DEFAULT CHARACTER SET utf8 ENGINE=MyISAM;") run_sql("INSERT INTO test__invenio__utf8 (x, y) VALUES (%s, %s)", (beta_in_utf8, beta_in_utf8)) res = run_sql("SELECT x,y,HEX(x),HEX(y),LENGTH(x),LENGTH(y),CHAR_LENGTH(x),CHAR_LENGTH(y) FROM test__invenio__utf8") assert res[0] == ('\xce\xb2', '\xce\xb2', 'CEB2', 'CEB2', 2L, 2L, 1L, 2L) run_sql("DROP TABLE test__invenio__utf8") except Exception, err: print wrap_text_in_a_box("""\ DATABASE RELATED ERROR %s\n A problem was detected with the UTF-8 treatment in the chain between the Python application, the MySQLdb connector, and the MySQL database. You may perhaps have installed older versions of some prerequisite packages?\n Please check the INSTALL file and please fix this problem before continuing.""" % err) sys.exit(1) finally: run_sql("DROP TABLE IF EXISTS test__invenio__utf8") print "ok" def cli_cmd_create_tables(conf): """Create and fill Invenio DB tables. Useful for the installation process.""" print ">>> Going to create and fill tables..." from invenio.config import CFG_PREFIX test_db_connection() for cmd in ["%s/bin/dbexec < %s/lib/sql/invenio/tabcreate.sql" % (CFG_PREFIX, CFG_PREFIX), "%s/bin/dbexec < %s/lib/sql/invenio/tabfill.sql" % (CFG_PREFIX, CFG_PREFIX)]: if os.system(cmd): print "ERROR: failed execution of", cmd sys.exit(1) cli_cmd_reset_sitename(conf) cli_cmd_reset_siteadminemail(conf) cli_cmd_reset_fieldnames(conf) for cmd in ["%s/bin/webaccessadmin -u admin -c -a" % CFG_PREFIX]: if os.system(cmd): print "ERROR: failed execution of", cmd sys.exit(1) print ">>> Tables created and filled successfully." def cli_cmd_load_webstat_conf(conf): print ">>> Going to load WebStat config..." from invenio.config import CFG_PREFIX cmd = "%s/bin/webstatadmin --load-config" % CFG_PREFIX if os.system(cmd): print "ERROR: failed execution of", cmd sys.exit(1) print ">>> WebStat config load successfully." def cli_cmd_drop_tables(conf): """Drop Invenio DB tables. Useful for the uninstallation process.""" print ">>> Going to drop tables..." from invenio.config import CFG_PREFIX from invenio.textutils import wrap_text_in_a_box, wait_for_user from invenio.webstat import destroy_customevents wait_for_user(wrap_text_in_a_box("""WARNING: You are going to destroy your database tables!""")) msg = destroy_customevents() if msg: print msg cmd = "%s/bin/dbexec < %s/lib/sql/invenio/tabdrop.sql" % (CFG_PREFIX, CFG_PREFIX) if os.system(cmd): print "ERROR: failed execution of", cmd sys.exit(1) print ">>> Tables dropped successfully." def cli_cmd_create_demo_site(conf): """Create demo site. Useful for testing purposes.""" print ">>> Going to create demo site..." from invenio.config import CFG_PREFIX from invenio.dbquery import run_sql run_sql("TRUNCATE schTASK") run_sql("TRUNCATE session") run_sql("DELETE FROM user WHERE email=''") for cmd in ["%s/bin/dbexec < %s/lib/sql/invenio/democfgdata.sql" % \ (CFG_PREFIX, CFG_PREFIX),]: if os.system(cmd): print "ERROR: failed execution of", cmd sys.exit(1) cli_cmd_reset_fieldnames(conf) # needed for I18N demo ranking method names for cmd in ["%s/bin/webaccessadmin -u admin -c -r -D" % CFG_PREFIX, "%s/bin/webcoll -u admin" % CFG_PREFIX, "%s/bin/webcoll 1" % CFG_PREFIX, "%s/bin/bibsort -u admin --load-config" % CFG_PREFIX, "%s/bin/bibsort 2" % CFG_PREFIX, ]: if os.system(cmd): print "ERROR: failed execution of", cmd sys.exit(1) print ">>> Demo site created successfully." def cli_cmd_load_demo_records(conf): """Load demo records. Useful for testing purposes.""" from invenio.config import CFG_PREFIX from invenio.dbquery import run_sql print ">>> Going to load demo records..." run_sql("TRUNCATE schTASK") for cmd in ["%s/bin/bibupload -u admin -i %s/var/tmp/demobibdata.xml" % (CFG_PREFIX, CFG_PREFIX), "%s/bin/bibupload 1" % CFG_PREFIX, "%s/bin/bibdocfile --textify --with-ocr --recid 97" % CFG_PREFIX, "%s/bin/bibdocfile --textify --all" % CFG_PREFIX, "%s/bin/bibindex -u admin" % CFG_PREFIX, "%s/bin/bibindex 2" % CFG_PREFIX, "%s/bin/bibreformat -u admin -o HB" % CFG_PREFIX, "%s/bin/bibreformat 3" % CFG_PREFIX, "%s/bin/webcoll -u admin" % CFG_PREFIX, "%s/bin/webcoll 4" % CFG_PREFIX, "%s/bin/bibrank -u admin" % CFG_PREFIX, "%s/bin/bibrank 5" % CFG_PREFIX, "%s/bin/bibsort -u admin -R" % CFG_PREFIX, "%s/bin/bibsort 6" % CFG_PREFIX, "%s/bin/oairepositoryupdater -u admin" % CFG_PREFIX, "%s/bin/oairepositoryupdater 7" % CFG_PREFIX, "%s/bin/bibupload 8" % CFG_PREFIX,]: if os.system(cmd): print "ERROR: failed execution of", cmd sys.exit(1) print ">>> Demo records loaded successfully." def cli_cmd_remove_demo_records(conf): """Remove demo records. Useful when you are finished testing.""" print ">>> Going to remove demo records..." from invenio.config import CFG_PREFIX from invenio.dbquery import run_sql from invenio.textutils import wrap_text_in_a_box, wait_for_user wait_for_user(wrap_text_in_a_box("""WARNING: You are going to destroy your records and documents!""")) if os.path.exists(CFG_PREFIX + os.sep + 'var' + os.sep + 'data'): shutil.rmtree(CFG_PREFIX + os.sep + 'var' + os.sep + 'data') run_sql("TRUNCATE schTASK") for cmd in ["%s/bin/dbexec < %s/lib/sql/invenio/tabbibclean.sql" % (CFG_PREFIX, CFG_PREFIX), "%s/bin/webcoll -u admin" % CFG_PREFIX, "%s/bin/webcoll 1" % CFG_PREFIX,]: if os.system(cmd): print "ERROR: failed execution of", cmd sys.exit(1) print ">>> Demo records removed successfully." def cli_cmd_drop_demo_site(conf): """Drop demo site completely. Useful when you are finished testing.""" print ">>> Going to drop demo site..." from invenio.textutils import wrap_text_in_a_box, wait_for_user wait_for_user(wrap_text_in_a_box("""WARNING: You are going to destroy your site and documents!""")) cli_cmd_drop_tables(conf) cli_cmd_create_tables(conf) cli_cmd_remove_demo_records(conf) print ">>> Demo site dropped successfully." def cli_cmd_run_unit_tests(conf): """Run unit tests, usually on the working demo site.""" from invenio.testutils import build_and_run_unit_test_suite if not build_and_run_unit_test_suite(): sys.exit(1) def cli_cmd_run_regression_tests(conf): """Run regression tests, usually on the working demo site.""" from invenio.testutils import build_and_run_regression_test_suite if not build_and_run_regression_test_suite(): sys.exit(1) def cli_cmd_run_web_tests(conf): """Run web tests in a browser. Requires Firefox with Selenium.""" from invenio.testutils import build_and_run_web_test_suite if not build_and_run_web_test_suite(): sys.exit(1) def _detect_ip_address(): """Detect IP address of this computer. Useful for creating Apache vhost conf snippet on RHEL like machines. @return: IP address, or '*' if cannot detect @rtype: string @note: creates socket for real in order to detect real IP address, not the loopback one. """ try: s = socket.socket(socket.AF_INET, socket.SOCK_DGRAM) s.connect(('invenio-software.org', 0)) return s.getsockname()[0] except: return '*' def cli_cmd_create_apache_conf(conf): """ Create Apache conf files for this site, keeping previous files in a backup copy. """ print ">>> Going to create Apache conf files..." from invenio.textutils import wrap_text_in_a_box from invenio.access_control_config import CFG_EXTERNAL_AUTH_USING_SSO apache_conf_dir = conf.get("Invenio", 'CFG_ETCDIR') + \ os.sep + 'apache' ## Preparation of XSendFile directive xsendfile_directive_needed = int(conf.get("Invenio", 'CFG_BIBDOCFILE_USE_XSENDFILE')) != 0 if xsendfile_directive_needed: xsendfile_directive = "XSendFile On\n" else: xsendfile_directive = "#XSendFile On\n" - for path in (conf.get('Invenio', 'CFG_WEBSUBMIT_FILEDIR'), # BibDocFile + for path in (conf.get('Invenio', 'CFG_BIBDOCFILE_FILEDIR'), # BibDocFile conf.get('Invenio', 'CFG_WEBDIR'), conf.get('Invenio', 'CFG_WEBSUBMIT_STORAGEDIR'), # WebSubmit conf.get('Invenio', 'CFG_TMPDIR'), os.path.join(conf.get('Invenio', 'CFG_PREFIX'), 'var', 'tmp', 'attachfile'), os.path.join(conf.get('Invenio', 'CFG_PREFIX'), 'var', 'data', 'comments'), os.path.join(conf.get('Invenio', 'CFG_PREFIX'), 'var', 'data', 'baskets', 'comments'), '/tmp'): # BibExport if xsendfile_directive_needed: xsendfile_directive += ' XSendFilePath %s\n' % path else: xsendfile_directive += ' #XSendFilePath %s\n' % path xsendfile_directive = xsendfile_directive.strip() ## Preparation of deflate directive deflate_directive_needed = int(conf.get("Invenio", 'CFG_WEBSTYLE_HTTP_USE_COMPRESSION')) != 0 if deflate_directive_needed: deflate_directive = r""" ## Configuration snippet taken from: ## SetOutputFilter DEFLATE # Netscape 4.x has some problems... BrowserMatch ^Mozilla/4 gzip-only-text/html # Netscape 4.06-4.08 have some more problems BrowserMatch ^Mozilla/4\.0[678] no-gzip # MSIE masquerades as Netscape, but it is fine # BrowserMatch \bMSIE !no-gzip !gzip-only-text/html # NOTE: Due to a bug in mod_setenvif up to Apache 2.0.48 # the above regex won't work. You can use the following # workaround to get the desired effect: BrowserMatch \bMSI[E] !no-gzip !gzip-only-text/html # Don't compress images SetEnvIfNoCase Request_URI \ \.(?:gif|jpe?g|png)$ no-gzip dont-vary # Make sure proxies don't deliver the wrong content Header append Vary User-Agent env=!dont-vary """ else: deflate_directive = "" if CFG_EXTERNAL_AUTH_USING_SSO: shibboleth_directive = r""" SSLRequireSSL # The modules only work using HTTPS AuthType shibboleth ShibRequireSession On ShibRequireAll On ShibExportAssertion Off require valid-user """ else: shibboleth_directive = "" ## Apache vhost conf file is distro specific, so analyze needs: # Gentoo (and generic defaults): listen_directive_needed = True ssl_pem_directive_needed = False ssl_pem_path = '/etc/apache2/ssl/apache.pem' ssl_crt_path = '/etc/apache2/ssl/server.crt' ssl_key_path = '/etc/apache2/ssl/server.key' vhost_ip_address_needed = False wsgi_socket_directive_needed = False # Debian: if os.path.exists(os.path.sep + 'etc' + os.path.sep + 'debian_version'): listen_directive_needed = False ssl_pem_directive_needed = True # RHEL/SLC: if os.path.exists(os.path.sep + 'etc' + os.path.sep + 'redhat-release'): listen_directive_needed = False ssl_crt_path = '/etc/pki/tls/certs/localhost.crt' ssl_key_path = '/etc/pki/tls/private/localhost.key' vhost_ip_address_needed = True wsgi_socket_directive_needed = True # maybe we are using non-standard ports? vhost_site_url = conf.get('Invenio', 'CFG_SITE_URL').replace("http://", "") if vhost_site_url.startswith("https://"): ## The installation is configured to require HTTPS for any connection vhost_site_url = vhost_site_url.replace("https://", "") vhost_site_url_port = '80' vhost_site_secure_url = conf.get('Invenio', 'CFG_SITE_SECURE_URL').replace("https://", "") vhost_site_secure_url_port = '443' if ':' in vhost_site_url: vhost_site_url, vhost_site_url_port = vhost_site_url.split(':', 1) if ':' in vhost_site_secure_url: vhost_site_secure_url, vhost_site_secure_url_port = vhost_site_secure_url.split(':', 1) if vhost_site_url_port != '80' or vhost_site_secure_url_port != '443': listen_directive_needed = True ## OK, let's create Apache vhost files: if not os.path.exists(apache_conf_dir): os.mkdir(apache_conf_dir) apache_vhost_file = apache_conf_dir + os.sep + \ 'invenio-apache-vhost.conf' apache_vhost_ssl_file = apache_conf_dir + os.sep + \ 'invenio-apache-vhost-ssl.conf' apache_vhost_body = """\ AddDefaultCharset UTF-8 ServerSignature Off ServerTokens Prod NameVirtualHost %(vhost_ip_address)s:%(vhost_site_url_port)s %(listen_directive)s %(wsgi_socket_directive)s WSGIRestrictStdout Off deny from all deny from all ServerName %(servername)s ServerAlias %(serveralias)s ServerAdmin %(serveradmin)s DocumentRoot %(webdir)s Options FollowSymLinks MultiViews AllowOverride None Order allow,deny Allow from all ErrorLog %(logdir)s/apache.err LogLevel warn LogFormat "%%h %%l %%u %%t \\"%%r\\" %%>s %%b \\"%%{Referer}i\\" \\"%%{User-agent}i\\" %%D" combined_with_timing CustomLog %(logdir)s/apache.log combined_with_timing DirectoryIndex index.en.html index.html Alias /static/ %(webdir)s/static/ Alias /img/ %(webdir)s/img/ Alias /js/ %(webdir)s/js/ Alias /flash/ %(webdir)s/flash/ Alias /css/ %(webdir)s/css/ Alias /export/ %(webdir)s/export/ Alias /MathJax/ %(webdir)s/MathJax/ Alias /jsCalendar/ %(webdir)s/jsCalendar/ Alias /ckeditor/ %(webdir)s/ckeditor/ Alias /mediaelement/ %(webdir)s/mediaelement/ AliasMatch /sitemap-(.*) %(webdir)s/sitemap-$1 Alias /robots.txt %(webdir)s/robots.txt Alias /favicon.ico %(webdir)s/favicon.ico WSGIDaemonProcess invenio processes=5 threads=1 display-name=%%{GROUP} inactivity-timeout=3600 maximum-requests=10000 WSGIImportScript %(wsgidir)s/invenio.wsgi process-group=invenio application-group=%%{GLOBAL} WSGIScriptAlias / %(wsgidir)s/invenio.wsgi WSGIPassAuthorization On %(xsendfile_directive)s WSGIProcessGroup invenio WSGIApplicationGroup %%{GLOBAL} Options FollowSymLinks MultiViews AllowOverride None Order allow,deny Allow from all %(deflate_directive)s """ % {'vhost_site_url_port': vhost_site_url_port, 'servername': vhost_site_url, 'serveralias': vhost_site_url.split('.')[0], 'serveradmin': conf.get('Invenio', 'CFG_SITE_ADMIN_EMAIL'), 'webdir': conf.get('Invenio', 'CFG_WEBDIR'), 'logdir': conf.get('Invenio', 'CFG_LOGDIR'), 'libdir' : conf.get('Invenio', 'CFG_PYLIBDIR'), 'wsgidir': os.path.join(conf.get('Invenio', 'CFG_PREFIX'), 'var', 'www-wsgi'), 'vhost_ip_address': vhost_ip_address_needed and _detect_ip_address() or '*', 'listen_directive': listen_directive_needed and 'Listen ' + vhost_site_url_port or \ '#Listen ' + vhost_site_url_port, 'wsgi_socket_directive': (wsgi_socket_directive_needed and \ 'WSGISocketPrefix ' or '#WSGISocketPrefix ') + \ conf.get('Invenio', 'CFG_PREFIX') + os.sep + 'var' + os.sep + 'run', 'xsendfile_directive' : xsendfile_directive, 'deflate_directive': deflate_directive, } apache_vhost_ssl_body = """\ ServerSignature Off ServerTokens Prod %(listen_directive)s NameVirtualHost %(vhost_ip_address)s:%(vhost_site_secure_url_port)s %(ssl_pem_directive)s %(ssl_crt_directive)s %(ssl_key_directive)s WSGIRestrictStdout Off deny from all deny from all ServerName %(servername)s ServerAlias %(serveralias)s ServerAdmin %(serveradmin)s SSLEngine on DocumentRoot %(webdir)s Options FollowSymLinks MultiViews AllowOverride None Order allow,deny Allow from all ErrorLog %(logdir)s/apache-ssl.err LogLevel warn LogFormat "%%h %%l %%u %%t \\"%%r\\" %%>s %%b \\"%%{Referer}i\\" \\"%%{User-agent}i\\" %%D" combined_with_timing CustomLog %(logdir)s/apache-ssl.log combined_with_timing DirectoryIndex index.en.html index.html Alias /static/ %(webdir)s/static/ Alias /img/ %(webdir)s/img/ Alias /js/ %(webdir)s/js/ Alias /flash/ %(webdir)s/flash/ Alias /css/ %(webdir)s/css/ Alias /export/ %(webdir)s/export/ Alias /MathJax/ %(webdir)s/MathJax/ Alias /jsCalendar/ %(webdir)s/jsCalendar/ Alias /ckeditor/ %(webdir)s/ckeditor/ Alias /mediaelement/ %(webdir)s/mediaelement/ AliasMatch /sitemap-(.*) %(webdir)s/sitemap-$1 Alias /robots.txt %(webdir)s/robots.txt Alias /favicon.ico %(webdir)s/favicon.ico RedirectMatch /sslredirect/(.*) http://$1 WSGIScriptAlias / %(wsgidir)s/invenio.wsgi WSGIPassAuthorization On %(xsendfile_directive)s WSGIProcessGroup invenio WSGIApplicationGroup %%{GLOBAL} Options FollowSymLinks MultiViews AllowOverride None Order allow,deny Allow from all %(deflate_directive)s %(shibboleth_directive)s """ % {'vhost_site_secure_url_port': vhost_site_secure_url_port, 'servername': vhost_site_secure_url, 'serveralias': vhost_site_secure_url.split('.')[0], 'serveradmin': conf.get('Invenio', 'CFG_SITE_ADMIN_EMAIL'), 'webdir': conf.get('Invenio', 'CFG_WEBDIR'), 'logdir': conf.get('Invenio', 'CFG_LOGDIR'), 'libdir' : conf.get('Invenio', 'CFG_PYLIBDIR'), 'wsgidir' : os.path.join(conf.get('Invenio', 'CFG_PREFIX'), 'var', 'www-wsgi'), 'vhost_ip_address': vhost_ip_address_needed and _detect_ip_address() or '*', 'listen_directive' : listen_directive_needed and 'Listen ' + vhost_site_secure_url_port or \ '#Listen ' + vhost_site_secure_url_port, 'ssl_pem_directive': ssl_pem_directive_needed and \ 'SSLCertificateFile %s' % ssl_pem_path or \ '#SSLCertificateFile %s' % ssl_pem_path, 'ssl_crt_directive': ssl_pem_directive_needed and \ '#SSLCertificateFile %s' % ssl_crt_path or \ 'SSLCertificateFile %s' % ssl_crt_path, 'ssl_key_directive': ssl_pem_directive_needed and \ '#SSLCertificateKeyFile %s' % ssl_key_path or \ 'SSLCertificateKeyFile %s' % ssl_key_path, 'xsendfile_directive' : xsendfile_directive, 'deflate_directive': deflate_directive, 'shibboleth_directive': shibboleth_directive, } # write HTTP vhost snippet: if os.path.exists(apache_vhost_file): shutil.copy(apache_vhost_file, apache_vhost_file + '.OLD') fdesc = open(apache_vhost_file, 'w') fdesc.write(apache_vhost_body) fdesc.close() print print "Created file", apache_vhost_file # write HTTPS vhost snippet: vhost_ssl_created = False if conf.get('Invenio', 'CFG_SITE_SECURE_URL').startswith("https://"): if os.path.exists(apache_vhost_ssl_file): shutil.copy(apache_vhost_ssl_file, apache_vhost_ssl_file + '.OLD') fdesc = open(apache_vhost_ssl_file, 'w') fdesc.write(apache_vhost_ssl_body) fdesc.close() vhost_ssl_created = True print "Created file", apache_vhost_ssl_file print wrap_text_in_a_box("""\ Apache virtual host configuration file(s) for your Invenio site was(were) created. Please check created file(s) and activate virtual host(s). For example, you can put the following include statements in your httpd.conf:\n Include %s %s Please see the INSTALL file for more details. """ % (apache_vhost_file, (vhost_ssl_created and 'Include ' or '#Include ') + apache_vhost_ssl_file)) print ">>> Apache conf files created." def cli_cmd_get(conf, varname): """ Return value of VARNAME read from CONF files. Useful for third-party programs to access values of conf options such as CFG_PREFIX. Return None if VARNAME is not found. """ # do not pay attention to upper/lower case: varname = varname.lower() # do not pay attention to section names yet: all_options = {} for section in conf.sections(): for option in conf.options(section): all_options[option] = conf.get(section, option) return all_options.get(varname, None) def cli_cmd_list(conf): """ Print a list of all conf options and values from CONF. """ sections = conf.sections() sections.sort() for section in sections: options = conf.options(section) options.sort() for option in options: print option.upper(), '=', conf.get(section, option) def _grep_version_from_executable(path_to_exec, version_regexp): """ Try to detect a program version by digging into its binary PATH_TO_EXEC and looking for VERSION_REGEXP. Return program version as a string. Return empty string if not succeeded. """ from invenio.shellutils import run_shell_command exec_version = "" if os.path.exists(path_to_exec): dummy1, cmd2_out, dummy2 = run_shell_command("strings %s | grep %s", (path_to_exec, version_regexp)) if cmd2_out: for cmd2_out_line in cmd2_out.split("\n"): if len(cmd2_out_line) > len(exec_version): # the longest the better exec_version = cmd2_out_line return exec_version def detect_apache_version(): """ Try to detect Apache version by localizing httpd or apache executables and grepping inside binaries. Return list of all found Apache versions and paths. (For a given executable, the returned format is 'apache_version [apache_path]'.) Return empty list if no success. """ from invenio.shellutils import run_shell_command out = [] dummy1, cmd_out, dummy2 = run_shell_command("locate bin/httpd bin/apache") for apache in cmd_out.split("\n"): apache_version = _grep_version_from_executable(apache, '^Apache\/') if apache_version: out.append("%s [%s]" % (apache_version, apache)) return out def cli_cmd_detect_system_details(conf): """ Detect and print system details such as Apache/Python/MySQL versions etc. Useful for debugging problems on various OS. """ import MySQLdb print ">>> Going to detect system details..." print "* Hostname: " + socket.gethostname() print "* Invenio version: " + conf.get("Invenio", "CFG_VERSION") print "* Python version: " + sys.version.replace("\n", " ") print "* Apache version: " + ";\n ".join(detect_apache_version()) print "* MySQLdb version: " + MySQLdb.__version__ try: from invenio.dbquery import run_sql print "* MySQL version:" for key, val in run_sql("SHOW VARIABLES LIKE 'version%'") + \ run_sql("SHOW VARIABLES LIKE 'charact%'") + \ run_sql("SHOW VARIABLES LIKE 'collat%'"): if False: print " - %s: %s" % (key, val) elif key in ['version', 'character_set_client', 'character_set_connection', 'character_set_database', 'character_set_results', 'character_set_server', 'character_set_system', 'collation_connection', 'collation_database', 'collation_server']: print " - %s: %s" % (key, val) except ImportError: print "* ERROR: cannot import dbquery" print ">>> System details detected successfully." def main(): """Main entry point.""" conf = ConfigParser() if '--help' in sys.argv or \ '-h' in sys.argv: print_usage() elif '--version' in sys.argv or \ '-V' in sys.argv: print_version() else: confdir = None if '--conf-dir' in sys.argv: try: confdir = sys.argv[sys.argv.index('--conf-dir') + 1] except IndexError: pass # missing --conf-dir argument value if not os.path.exists(confdir): print "ERROR: bad or missing --conf-dir option value." sys.exit(1) else: ## try to detect path to conf dir (relative to this bin dir): confdir = re.sub(r'/bin$', '/etc', sys.path[0]) ## read conf files: for conffile in [confdir + os.sep + 'invenio.conf', confdir + os.sep + 'invenio-autotools.conf', confdir + os.sep + 'invenio-local.conf',]: if os.path.exists(conffile): conf.read(conffile) else: if not conffile.endswith("invenio-local.conf"): # invenio-local.conf is optional, otherwise stop print "ERROR: Badly guessed conf file location", conffile print "(Please use --conf-dir option.)" sys.exit(1) ## decide what to do: done = False for opt_idx in range(0, len(sys.argv)): opt = sys.argv[opt_idx] if opt == '--conf-dir': # already treated before, so skip silently: pass elif opt == '--get': try: varname = sys.argv[opt_idx + 1] except IndexError: print "ERROR: bad or missing --get option value." sys.exit(1) if varname.startswith('-'): print "ERROR: bad or missing --get option value." sys.exit(1) varvalue = cli_cmd_get(conf, varname) if varvalue is not None: print varvalue else: sys.exit(1) done = True elif opt == '--list': cli_cmd_list(conf) done = True elif opt == '--detect-system-details': cli_cmd_detect_system_details(conf) done = True elif opt == '--create-tables': cli_cmd_create_tables(conf) done = True elif opt == '--load-webstat-conf': cli_cmd_load_webstat_conf(conf) done = True elif opt == '--drop-tables': cli_cmd_drop_tables(conf) done = True elif opt == '--check-openoffice': cli_check_openoffice(conf) done = True elif opt == '--create-demo-site': cli_cmd_create_demo_site(conf) done = True elif opt == '--load-demo-records': cli_cmd_load_demo_records(conf) done = True elif opt == '--remove-demo-records': cli_cmd_remove_demo_records(conf) done = True elif opt == '--drop-demo-site': cli_cmd_drop_demo_site(conf) done = True elif opt == '--run-unit-tests': cli_cmd_run_unit_tests(conf) done = True elif opt == '--run-regression-tests': cli_cmd_run_regression_tests(conf) done = True elif opt == '--run-web-tests': cli_cmd_run_web_tests(conf) done = True elif opt == '--update-all': cli_cmd_update_config_py(conf) cli_cmd_update_dbquery_py(conf) cli_cmd_update_dbexec(conf) cli_cmd_update_bibconvert_tpl(conf) cli_cmd_update_web_tests(conf) done = True elif opt == '--update-config-py': cli_cmd_update_config_py(conf) done = True elif opt == '--update-dbquery-py': cli_cmd_update_dbquery_py(conf) done = True elif opt == '--update-dbexec': cli_cmd_update_dbexec(conf) done = True elif opt == '--update-bibconvert-tpl': cli_cmd_update_bibconvert_tpl(conf) done = True elif opt == '--update-web-tests': cli_cmd_update_web_tests(conf) done = True elif opt == '--reset-all': cli_cmd_reset_sitename(conf) cli_cmd_reset_siteadminemail(conf) cli_cmd_reset_fieldnames(conf) cli_cmd_reset_recstruct_cache(conf) done = True elif opt == '--reset-sitename': cli_cmd_reset_sitename(conf) done = True elif opt == '--reset-siteadminemail': cli_cmd_reset_siteadminemail(conf) done = True elif opt == '--reset-fieldnames': cli_cmd_reset_fieldnames(conf) done = True elif opt == '--reset-recstruct-cache': cli_cmd_reset_recstruct_cache(conf) done = True elif opt == '--create-apache-conf': cli_cmd_create_apache_conf(conf) done = True elif opt.startswith("-") and opt != '--yes-i-know': print "ERROR: unknown option", opt sys.exit(1) if not done: print """ERROR: Please specify a command. Please see '--help'.""" sys.exit(1) if __name__ == '__main__': main() diff --git a/modules/miscutil/sql/tabfill.sql b/modules/miscutil/sql/tabfill.sql index 8c4250687..c281532b7 100644 --- a/modules/miscutil/sql/tabfill.sql +++ b/modules/miscutil/sql/tabfill.sql @@ -1,659 +1,659 @@ -- This file is part of Invenio. -- Copyright (C) 2008, 2009, 2010, 2011, 2012 CERN. -- -- Invenio is free software; you can redistribute it and/or -- modify it under the terms of the GNU General Public License as -- published by the Free Software Foundation; either version 2 of the -- License, or (at your option) any later version. -- -- Invenio is distributed in the hope that it will be useful, but -- WITHOUT ANY WARRANTY; without even the implied warranty of -- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -- General Public License for more details. -- -- You should have received a copy of the GNU General Public License -- along with Invenio; if not, write to the Free Software Foundation, Inc., -- 59 Temple Place, Suite 330, Boston, MA 02111-1307, USA. -- Fill Invenio configuration tables with defaults suitable for any site. INSERT INTO rnkMETHOD (id,name,last_updated) VALUES (1,'wrd','0000-00-00 00:00:00'); INSERT INTO collection_rnkMETHOD (id_collection,id_rnkMETHOD,score) VALUES (1,1,100); INSERT INTO rnkCITATIONDATA VALUES (1,'citationdict',NULL,'0000-00-00'); INSERT INTO rnkCITATIONDATA VALUES (2,'reversedict',NULL,'0000-00-00'); INSERT INTO rnkCITATIONDATA VALUES (3,'selfcitdict',NULL,'0000-00-00'); INSERT INTO rnkCITATIONDATA VALUES (4,'selfcitedbydict',NULL,'0000-00-00'); INSERT INTO field VALUES (1,'any field','anyfield'); INSERT INTO field VALUES (2,'title','title'); INSERT INTO field VALUES (3,'author','author'); INSERT INTO field VALUES (4,'abstract','abstract'); INSERT INTO field VALUES (5,'keyword','keyword'); INSERT INTO field VALUES (6,'report number','reportnumber'); INSERT INTO field VALUES (7,'subject','subject'); INSERT INTO field VALUES (8,'reference','reference'); INSERT INTO field VALUES (9,'fulltext','fulltext'); INSERT INTO field VALUES (10,'collection','collection'); INSERT INTO field VALUES (11,'division','division'); INSERT INTO field VALUES (12,'year','year'); INSERT INTO field VALUES (13,'experiment','experiment'); INSERT INTO field VALUES (14,'record ID','recid'); INSERT INTO field VALUES (15,'isbn','isbn'); INSERT INTO field VALUES (16,'issn','issn'); INSERT INTO field VALUES (17,'coden','coden'); -- INSERT INTO field VALUES (18,'doi','doi'); INSERT INTO field VALUES (19,'journal','journal'); INSERT INTO field VALUES (20,'collaboration','collaboration'); INSERT INTO field VALUES (21,'affiliation','affiliation'); INSERT INTO field VALUES (22,'exact author','exactauthor'); INSERT INTO field VALUES (23,'date created','datecreated'); INSERT INTO field VALUES (24,'date modified','datemodified'); INSERT INTO field VALUES (25,'refers to','refersto'); INSERT INTO field VALUES (26,'cited by','citedby'); INSERT INTO field VALUES (27,'caption','caption'); INSERT INTO field VALUES (28,'first author','firstauthor'); INSERT INTO field VALUES (29,'exact first author','exactfirstauthor'); INSERT INTO field VALUES (30,'author count','authorcount'); INSERT INTO field_tag VALUES (1,100,10); INSERT INTO field_tag VALUES (1,102,10); INSERT INTO field_tag VALUES (1,103,10); INSERT INTO field_tag VALUES (1,104,10); INSERT INTO field_tag VALUES (1,105,10); INSERT INTO field_tag VALUES (1,106,10); INSERT INTO field_tag VALUES (1,107,10); INSERT INTO field_tag VALUES (1,108,10); INSERT INTO field_tag VALUES (1,109,10); INSERT INTO field_tag VALUES (1,110,10); INSERT INTO field_tag VALUES (1,111,10); INSERT INTO field_tag VALUES (1,112,10); INSERT INTO field_tag VALUES (1,113,10); INSERT INTO field_tag VALUES (1,114,10); INSERT INTO field_tag VALUES (1,16,10); INSERT INTO field_tag VALUES (1,17,10); INSERT INTO field_tag VALUES (1,18,10); INSERT INTO field_tag VALUES (1,19,10); INSERT INTO field_tag VALUES (1,20,10); INSERT INTO field_tag VALUES (1,21,10); INSERT INTO field_tag VALUES (1,22,10); INSERT INTO field_tag VALUES (1,23,10); INSERT INTO field_tag VALUES (1,24,10); INSERT INTO field_tag VALUES (1,25,10); INSERT INTO field_tag VALUES (1,26,10); INSERT INTO field_tag VALUES (1,27,10); INSERT INTO field_tag VALUES (1,28,10); INSERT INTO field_tag VALUES (1,29,10); INSERT INTO field_tag VALUES (1,30,10); INSERT INTO field_tag VALUES (1,31,10); INSERT INTO field_tag VALUES (1,32,10); INSERT INTO field_tag VALUES (1,33,10); INSERT INTO field_tag VALUES (1,34,10); INSERT INTO field_tag VALUES (1,35,10); INSERT INTO field_tag VALUES (1,36,10); INSERT INTO field_tag VALUES (1,37,10); INSERT INTO field_tag VALUES (1,38,10); INSERT INTO field_tag VALUES (1,39,10); INSERT INTO field_tag VALUES (1,40,10); INSERT INTO field_tag VALUES (1,41,10); INSERT INTO field_tag VALUES (1,42,10); INSERT INTO field_tag VALUES (1,43,10); INSERT INTO field_tag VALUES (1,44,10); INSERT INTO field_tag VALUES (1,45,10); INSERT INTO field_tag VALUES (1,46,10); INSERT INTO field_tag VALUES (1,47,10); INSERT INTO field_tag VALUES (1,48,10); INSERT INTO field_tag VALUES (1,49,10); INSERT INTO field_tag VALUES (1,50,10); INSERT INTO field_tag VALUES (1,51,10); INSERT INTO field_tag VALUES (1,52,10); INSERT INTO field_tag VALUES (1,53,10); INSERT INTO field_tag VALUES (1,54,10); INSERT INTO field_tag VALUES (1,55,10); INSERT INTO field_tag VALUES (1,56,10); INSERT INTO field_tag VALUES (1,57,10); INSERT INTO field_tag VALUES (1,58,10); INSERT INTO field_tag VALUES (1,59,10); INSERT INTO field_tag VALUES (1,60,10); INSERT INTO field_tag VALUES (1,61,10); INSERT INTO field_tag VALUES (1,62,10); INSERT INTO field_tag VALUES (1,63,10); INSERT INTO field_tag VALUES (1,64,10); INSERT INTO field_tag VALUES (1,65,10); INSERT INTO field_tag VALUES (1,66,10); INSERT INTO field_tag VALUES (1,67,10); INSERT INTO field_tag VALUES (1,68,10); INSERT INTO field_tag VALUES (1,69,10); INSERT INTO field_tag VALUES (1,70,10); INSERT INTO field_tag VALUES (1,71,10); INSERT INTO field_tag VALUES (1,72,10); INSERT INTO field_tag VALUES (1,73,10); INSERT INTO field_tag VALUES (1,74,10); INSERT INTO field_tag VALUES (1,75,10); INSERT INTO field_tag VALUES (1,76,10); INSERT INTO field_tag VALUES (1,77,10); INSERT INTO field_tag VALUES (1,78,10); INSERT INTO field_tag VALUES (1,79,10); INSERT INTO field_tag VALUES (1,80,10); INSERT INTO field_tag VALUES (1,81,10); INSERT INTO field_tag VALUES (1,82,10); INSERT INTO field_tag VALUES (1,83,10); INSERT INTO field_tag VALUES (1,84,10); INSERT INTO field_tag VALUES (1,85,10); INSERT INTO field_tag VALUES (1,86,10); INSERT INTO field_tag VALUES (1,87,10); INSERT INTO field_tag VALUES (1,88,10); INSERT INTO field_tag VALUES (1,89,10); INSERT INTO field_tag VALUES (1,90,10); INSERT INTO field_tag VALUES (1,91,10); INSERT INTO field_tag VALUES (1,92,10); INSERT INTO field_tag VALUES (1,93,10); INSERT INTO field_tag VALUES (1,94,10); INSERT INTO field_tag VALUES (1,95,10); INSERT INTO field_tag VALUES (1,96,10); INSERT INTO field_tag VALUES (1,97,10); INSERT INTO field_tag VALUES (1,98,10); INSERT INTO field_tag VALUES (1,99,10); INSERT INTO field_tag VALUES (1,122,10); INSERT INTO field_tag VALUES (1,123,10); INSERT INTO field_tag VALUES (1,124,10); INSERT INTO field_tag VALUES (1,125,10); INSERT INTO field_tag VALUES (1,126,10); INSERT INTO field_tag VALUES (1,127,10); INSERT INTO field_tag VALUES (1,128,10); INSERT INTO field_tag VALUES (1,129,10); INSERT INTO field_tag VALUES (1,130,10); INSERT INTO field_tag VALUES (10,11,100); INSERT INTO field_tag VALUES (11,14,100); INSERT INTO field_tag VALUES (12,15,10); INSERT INTO field_tag VALUES (13,116,10); INSERT INTO field_tag VALUES (2,3,100); INSERT INTO field_tag VALUES (2,4,90); INSERT INTO field_tag VALUES (3,1,100); INSERT INTO field_tag VALUES (3,2,90); INSERT INTO field_tag VALUES (4,5,100); INSERT INTO field_tag VALUES (5,6,100); INSERT INTO field_tag VALUES (6,7,30); INSERT INTO field_tag VALUES (6,8,10); INSERT INTO field_tag VALUES (6,9,20); INSERT INTO field_tag VALUES (7,12,100); INSERT INTO field_tag VALUES (7,13,90); INSERT INTO field_tag VALUES (8,10,100); INSERT INTO field_tag VALUES (9,115,100); INSERT INTO field_tag VALUES (14,117,100); INSERT INTO field_tag VALUES (15,118,100); INSERT INTO field_tag VALUES (16,119,100); INSERT INTO field_tag VALUES (17,120,100); -- INSERT INTO field_tag VALUES (18,121,100); INSERT INTO field_tag VALUES (19,131,100); INSERT INTO field_tag VALUES (20,132,100); INSERT INTO field_tag VALUES (21,133,100); INSERT INTO field_tag VALUES (21,134,90); INSERT INTO field_tag VALUES (22,1,100); INSERT INTO field_tag VALUES (22,2,90); INSERT INTO field_tag VALUES (27,135,100); INSERT INTO field_tag VALUES (28,1,100); INSERT INTO field_tag VALUES (29,1,100); INSERT INTO field_tag VALUES (30,1,100); INSERT INTO field_tag VALUES (30,2,90); INSERT INTO format (id,name,code,description,content_type,visibility) VALUES (1,'HTML brief','hb', 'HTML brief output format, used for search results pages.', 'text/html', 1); INSERT INTO format (id,name,code,description,content_type,visibility) VALUES (2,'HTML detailed','hd', 'HTML detailed output format, used for Detailed record pages.', 'text/html', 1); INSERT INTO format (id,name,code,description,content_type,visibility) VALUES (3,'MARC','hm', 'HTML MARC.', 'text/html', 1); INSERT INTO format (id,name,code,description,content_type,visibility) VALUES (4,'Dublin Core','xd', 'XML Dublin Core.', 'text/xml', 1); INSERT INTO format (id,name,code,description,content_type,visibility) VALUES (5,'MARCXML','xm', 'XML MARC.', 'text/xml', 1); INSERT INTO format (id,name,code,description,content_type,visibility) VALUES (6,'portfolio','hp', 'HTML portfolio-style output format for photos.', 'text/html', 1); INSERT INTO format (id,name,code,description,content_type,visibility) VALUES (7,'photo captions only','hc', 'HTML caption-only output format for photos.', 'text/html', 1); INSERT INTO format (id,name,code,description,content_type,visibility) VALUES (8,'BibTeX','hx', 'BibTeX.', 'text/html', 1); INSERT INTO format (id,name,code,description,content_type,visibility) VALUES (9,'EndNote','xe', 'XML EndNote.', 'text/xml', 1); INSERT INTO format (id,name,code,description,content_type,visibility) VALUES (10,'NLM','xn', 'XML NLM.', 'text/xml', 1); INSERT INTO format (id,name,code,description,content_type,visibility) VALUES (11,'Excel','excel', 'Excel csv output', 'application/ms-excel', 0); INSERT INTO format (id,name,code,description,content_type,visibility) VALUES (12,'HTML similarity','hs', 'Very short HTML output for similarity box (people also viewed..).', 'text/html', 0); INSERT INTO format (id,name,code,description,content_type,visibility) VALUES (13,'RSS','xr', 'RSS.', 'text/xml', 0); INSERT INTO format (id,name,code,description,content_type,visibility) VALUES (14,'OAI DC','xoaidc', 'OAI DC.', 'text/xml', 0); INSERT INTO format (id,name,code,description,content_type,visibility) VALUES (15,'File mini-panel', 'hdfile', 'Used to show fulltext files in mini-panel of detailed record pages.', 'text/html', 0); INSERT INTO format (id,name,code,description,content_type,visibility) VALUES (16,'Actions mini-panel', 'hdact', 'Used to display actions in mini-panel of detailed record pages.', 'text/html', 0); INSERT INTO format (id,name,code,description,content_type,visibility) VALUES (17,'References tab', 'hdref', 'Display record references in References tab.', 'text/html', 0); INSERT INTO format (id,name,code,description,content_type,visibility) VALUES (18,'HTML citesummary','hcs', 'HTML cite summary format, used for search results pages.', 'text/html', 1); INSERT INTO format (id,name,code,description,content_type,visibility) VALUES (19,'RefWorks','xw', 'RefWorks.', 'text/xml', 1); INSERT INTO format (id,name,code,description,content_type,visibility) VALUES (20,'MODS', 'xo', 'Metadata Object Description Schema', 'application/xml', 1); INSERT INTO format (id,name,code,description,content_type,visibility) VALUES (21,'HTML author claiming', 'ha', 'Very brief HTML output format for author/paper claiming facility.', 'text/html', 0); INSERT INTO format (id,name,code,description,content_type,visibility) VALUES (22,'Podcast', 'xp', 'Sample format suitable for multimedia feeds, such as podcasts', 'application/rss+xml', 0); INSERT INTO format (id,name,code,description,content_type,visibility) VALUES (23,'WebAuthorProfile affiliations helper','wapaff', 'cPickled dicts', 'text', 0); INSERT INTO format (id,name,code,description,content_type,visibility) VALUES (24,'EndNote (8-X)','xe8x', 'XML EndNote (8-X).', 'text/xml', 1); INSERT INTO tag VALUES (1,'first author name','100__a'); INSERT INTO tag VALUES (2,'additional author name','700__a'); INSERT INTO tag VALUES (3,'main title','245__%'); INSERT INTO tag VALUES (4,'additional title','246__%'); INSERT INTO tag VALUES (5,'abstract','520__%'); INSERT INTO tag VALUES (6,'keyword','6531_a'); INSERT INTO tag VALUES (7,'primary report number','037__a'); INSERT INTO tag VALUES (8,'additional report number','088__a'); INSERT INTO tag VALUES (9,'added report number','909C0r'); INSERT INTO tag VALUES (10,'reference','999C5%'); INSERT INTO tag VALUES (11,'collection identifier','980__%'); INSERT INTO tag VALUES (12,'main subject','65017a'); INSERT INTO tag VALUES (13,'additional subject','65027a'); INSERT INTO tag VALUES (14,'division','909C0p'); INSERT INTO tag VALUES (15,'year','909C0y'); INSERT INTO tag VALUES (16,'00x','00%'); INSERT INTO tag VALUES (17,'01x','01%'); INSERT INTO tag VALUES (18,'02x','02%'); INSERT INTO tag VALUES (19,'03x','03%'); INSERT INTO tag VALUES (20,'lang','04%'); INSERT INTO tag VALUES (21,'05x','05%'); INSERT INTO tag VALUES (22,'06x','06%'); INSERT INTO tag VALUES (23,'07x','07%'); INSERT INTO tag VALUES (24,'08x','08%'); INSERT INTO tag VALUES (25,'09x','09%'); INSERT INTO tag VALUES (26,'10x','10%'); INSERT INTO tag VALUES (27,'11x','11%'); INSERT INTO tag VALUES (28,'12x','12%'); INSERT INTO tag VALUES (29,'13x','13%'); INSERT INTO tag VALUES (30,'14x','14%'); INSERT INTO tag VALUES (31,'15x','15%'); INSERT INTO tag VALUES (32,'16x','16%'); INSERT INTO tag VALUES (33,'17x','17%'); INSERT INTO tag VALUES (34,'18x','18%'); INSERT INTO tag VALUES (35,'19x','19%'); INSERT INTO tag VALUES (36,'20x','20%'); INSERT INTO tag VALUES (37,'21x','21%'); INSERT INTO tag VALUES (38,'22x','22%'); INSERT INTO tag VALUES (39,'23x','23%'); INSERT INTO tag VALUES (40,'24x','24%'); INSERT INTO tag VALUES (41,'25x','25%'); INSERT INTO tag VALUES (42,'internal','26%'); INSERT INTO tag VALUES (43,'27x','27%'); INSERT INTO tag VALUES (44,'28x','28%'); INSERT INTO tag VALUES (45,'29x','29%'); INSERT INTO tag VALUES (46,'pages','30%'); INSERT INTO tag VALUES (47,'31x','31%'); INSERT INTO tag VALUES (48,'32x','32%'); INSERT INTO tag VALUES (49,'33x','33%'); INSERT INTO tag VALUES (50,'34x','34%'); INSERT INTO tag VALUES (51,'35x','35%'); INSERT INTO tag VALUES (52,'36x','36%'); INSERT INTO tag VALUES (53,'37x','37%'); INSERT INTO tag VALUES (54,'38x','38%'); INSERT INTO tag VALUES (55,'39x','39%'); INSERT INTO tag VALUES (56,'40x','40%'); INSERT INTO tag VALUES (57,'41x','41%'); INSERT INTO tag VALUES (58,'42x','42%'); INSERT INTO tag VALUES (59,'43x','43%'); INSERT INTO tag VALUES (60,'44x','44%'); INSERT INTO tag VALUES (61,'45x','45%'); INSERT INTO tag VALUES (62,'46x','46%'); INSERT INTO tag VALUES (63,'47x','47%'); INSERT INTO tag VALUES (64,'48x','48%'); INSERT INTO tag VALUES (65,'series','49%'); INSERT INTO tag VALUES (66,'50x','50%'); INSERT INTO tag VALUES (67,'51x','51%'); INSERT INTO tag VALUES (68,'52x','52%'); INSERT INTO tag VALUES (69,'53x','53%'); INSERT INTO tag VALUES (70,'54x','54%'); INSERT INTO tag VALUES (71,'55x','55%'); INSERT INTO tag VALUES (72,'56x','56%'); INSERT INTO tag VALUES (73,'57x','57%'); INSERT INTO tag VALUES (74,'58x','58%'); INSERT INTO tag VALUES (75,'summary','59%'); INSERT INTO tag VALUES (76,'60x','60%'); INSERT INTO tag VALUES (77,'61x','61%'); INSERT INTO tag VALUES (78,'62x','62%'); INSERT INTO tag VALUES (79,'63x','63%'); INSERT INTO tag VALUES (80,'64x','64%'); INSERT INTO tag VALUES (81,'65x','65%'); INSERT INTO tag VALUES (82,'66x','66%'); INSERT INTO tag VALUES (83,'67x','67%'); INSERT INTO tag VALUES (84,'68x','68%'); INSERT INTO tag VALUES (85,'subject','69%'); INSERT INTO tag VALUES (86,'70x','70%'); INSERT INTO tag VALUES (87,'71x','71%'); INSERT INTO tag VALUES (88,'author-ad','72%'); INSERT INTO tag VALUES (89,'73x','73%'); INSERT INTO tag VALUES (90,'74x','74%'); INSERT INTO tag VALUES (91,'75x','75%'); INSERT INTO tag VALUES (92,'76x','76%'); INSERT INTO tag VALUES (93,'77x','77%'); INSERT INTO tag VALUES (94,'78x','78%'); INSERT INTO tag VALUES (95,'79x','79%'); INSERT INTO tag VALUES (96,'80x','80%'); INSERT INTO tag VALUES (97,'81x','81%'); INSERT INTO tag VALUES (98,'82x','82%'); INSERT INTO tag VALUES (99,'83x','83%'); INSERT INTO tag VALUES (100,'84x','84%'); INSERT INTO tag VALUES (101,'electr','85%'); INSERT INTO tag VALUES (102,'86x','86%'); INSERT INTO tag VALUES (103,'87x','87%'); INSERT INTO tag VALUES (104,'88x','88%'); INSERT INTO tag VALUES (105,'89x','89%'); INSERT INTO tag VALUES (106,'publication','90%'); INSERT INTO tag VALUES (107,'pub-conf-cit','91%'); INSERT INTO tag VALUES (108,'92x','92%'); INSERT INTO tag VALUES (109,'93x','93%'); INSERT INTO tag VALUES (110,'94x','94%'); INSERT INTO tag VALUES (111,'95x','95%'); INSERT INTO tag VALUES (112,'catinfo','96%'); INSERT INTO tag VALUES (113,'97x','97%'); INSERT INTO tag VALUES (114,'98x','98%'); INSERT INTO tag VALUES (115,'url','8564_u'); INSERT INTO tag VALUES (116,'experiment','909C0e'); INSERT INTO tag VALUES (117,'record ID','001'); INSERT INTO tag VALUES (118,'isbn','020__a'); INSERT INTO tag VALUES (119,'issn','022__a'); INSERT INTO tag VALUES (120,'coden','030__a'); INSERT INTO tag VALUES (121,'doi','909C4a'); INSERT INTO tag VALUES (122,'850x','850%'); INSERT INTO tag VALUES (123,'851x','851%'); INSERT INTO tag VALUES (124,'852x','852%'); INSERT INTO tag VALUES (125,'853x','853%'); INSERT INTO tag VALUES (126,'854x','854%'); INSERT INTO tag VALUES (127,'855x','855%'); INSERT INTO tag VALUES (128,'857x','857%'); INSERT INTO tag VALUES (129,'858x','858%'); INSERT INTO tag VALUES (130,'859x','859%'); INSERT INTO tag VALUES (131,'journal','909C4%'); INSERT INTO tag VALUES (132,'collaboration','710__g'); INSERT INTO tag VALUES (133,'first author affiliation','100__u'); INSERT INTO tag VALUES (134,'additional author affiliation','700__u'); INSERT INTO tag VALUES (135,'caption','8564_y'); INSERT INTO tag VALUES (136,'journal page','909C4c'); INSERT INTO tag VALUES (137,'journal title','909C4p'); INSERT INTO tag VALUES (138,'journal volume','909C4v'); INSERT INTO tag VALUES (139,'journal year','909C4y'); INSERT INTO tag VALUES (140,'comment','500__a'); INSERT INTO tag VALUES (141,'title','245__a'); INSERT INTO tag VALUES (142,'main abstract','245__a'); INSERT INTO tag VALUES (143,'internal notes','595__a'); INSERT INTO tag VALUES (144,'other relationship entry', '787%'); INSERT INTO idxINDEX VALUES (1,'global','This index contains words/phrases from global fields.','0000-00-00 00:00:00', ''); INSERT INTO idxINDEX VALUES (2,'collection','This index contains words/phrases from collection identifiers fields.','0000-00-00 00:00:00', ''); INSERT INTO idxINDEX VALUES (3,'abstract','This index contains words/phrases from abstract fields.','0000-00-00 00:00:00', ''); INSERT INTO idxINDEX VALUES (4,'author','This index contains fuzzy words/phrases from author fields.','0000-00-00 00:00:00', ''); INSERT INTO idxINDEX VALUES (5,'keyword','This index contains words/phrases from keyword fields.','0000-00-00 00:00:00', ''); INSERT INTO idxINDEX VALUES (6,'reference','This index contains words/phrases from references fields.','0000-00-00 00:00:00', ''); INSERT INTO idxINDEX VALUES (7,'reportnumber','This index contains words/phrases from report numbers fields.','0000-00-00 00:00:00', ''); INSERT INTO idxINDEX VALUES (8,'title','This index contains words/phrases from title fields.','0000-00-00 00:00:00', ''); INSERT INTO idxINDEX VALUES (9,'fulltext','This index contains words/phrases from fulltext fields.','0000-00-00 00:00:00', ''); INSERT INTO idxINDEX VALUES (10,'year','This index contains words/phrases from year fields.','0000-00-00 00:00:00', ''); INSERT INTO idxINDEX VALUES (11,'journal','This index contains words/phrases from journal publication information fields.','0000-00-00 00:00:00', ''); INSERT INTO idxINDEX VALUES (12,'collaboration','This index contains words/phrases from collaboration name fields.','0000-00-00 00:00:00', ''); INSERT INTO idxINDEX VALUES (13,'affiliation','This index contains words/phrases from institutional affiliation fields.','0000-00-00 00:00:00', ''); INSERT INTO idxINDEX VALUES (14,'exactauthor','This index contains exact words/phrases from author fields.','0000-00-00 00:00:00', ''); INSERT INTO idxINDEX VALUES (15,'caption','This index contains exact words/phrases from figure captions.','0000-00-00 00:00:00', ''); INSERT INTO idxINDEX VALUES (16,'firstauthor','This index contains fuzzy words/phrases from first author field.','0000-00-00 00:00:00', ''); INSERT INTO idxINDEX VALUES (17,'exactfirstauthor','This index contains exact words/phrases from first author field.','0000-00-00 00:00:00', ''); INSERT INTO idxINDEX VALUES (18,'authorcount','This index contains number of authors of the record.','0000-00-00 00:00:00', ''); INSERT INTO idxINDEX_field (id_idxINDEX, id_field) VALUES (1,1); INSERT INTO idxINDEX_field (id_idxINDEX, id_field) VALUES (2,10); INSERT INTO idxINDEX_field (id_idxINDEX, id_field) VALUES (3,4); INSERT INTO idxINDEX_field (id_idxINDEX, id_field) VALUES (4,3); INSERT INTO idxINDEX_field (id_idxINDEX, id_field) VALUES (5,5); INSERT INTO idxINDEX_field (id_idxINDEX, id_field) VALUES (6,8); INSERT INTO idxINDEX_field (id_idxINDEX, id_field) VALUES (7,6); INSERT INTO idxINDEX_field (id_idxINDEX, id_field) VALUES (8,2); INSERT INTO idxINDEX_field (id_idxINDEX, id_field) VALUES (9,9); INSERT INTO idxINDEX_field (id_idxINDEX, id_field) VALUES (10,12); INSERT INTO idxINDEX_field (id_idxINDEX, id_field) VALUES (11,19); INSERT INTO idxINDEX_field (id_idxINDEX, id_field) VALUES (12,20); INSERT INTO idxINDEX_field (id_idxINDEX, id_field) VALUES (13,21); INSERT INTO idxINDEX_field (id_idxINDEX, id_field) VALUES (14,22); INSERT INTO idxINDEX_field (id_idxINDEX, id_field) VALUES (15,27); INSERT INTO idxINDEX_field (id_idxINDEX, id_field) VALUES (16,28); INSERT INTO idxINDEX_field (id_idxINDEX, id_field) VALUES (17,29); INSERT INTO idxINDEX_field (id_idxINDEX, id_field) VALUES (18,30); INSERT INTO sbmACTION VALUES ('Submit New Record','SBI','running','1998-08-17','2001-08-08','','Submit New Record'); INSERT INTO sbmACTION VALUES ('Modify Record','MBI','modify','1998-08-17','2001-11-07','','Modify Record'); INSERT INTO sbmACTION VALUES ('Submit New File','SRV','revise','0000-00-00','2001-11-07','','Submit New File'); INSERT INTO sbmACTION VALUES ('Approve Record','APP','approve','2001-11-08','2002-06-11','','Approve Record'); INSERT INTO sbmALLFUNCDESCR VALUES ('Ask_For_Record_Details_Confirmation',''); INSERT INTO sbmALLFUNCDESCR VALUES ('CaseEDS',''); INSERT INTO sbmALLFUNCDESCR VALUES ('Create_Modify_Interface',NULL); INSERT INTO sbmALLFUNCDESCR VALUES ('Create_Recid',NULL); INSERT INTO sbmALLFUNCDESCR VALUES ('Finish_Submission',''); INSERT INTO sbmALLFUNCDESCR VALUES ('Get_Info',''); INSERT INTO sbmALLFUNCDESCR VALUES ('Get_Recid', 'This function gets the recid for a document with a given report-number (as stored in the global variable rn).'); INSERT INTO sbmALLFUNCDESCR VALUES ('Get_Report_Number',NULL); INSERT INTO sbmALLFUNCDESCR VALUES ('Get_Sysno',NULL); INSERT INTO sbmALLFUNCDESCR VALUES ('Insert_Modify_Record',''); INSERT INTO sbmALLFUNCDESCR VALUES ('Insert_Record',NULL); INSERT INTO sbmALLFUNCDESCR VALUES ('Is_Original_Submitter',''); INSERT INTO sbmALLFUNCDESCR VALUES ('Is_Referee','This function checks whether the logged user is a referee for the current document'); INSERT INTO sbmALLFUNCDESCR VALUES ('Mail_Approval_Request_to_Referee',NULL); INSERT INTO sbmALLFUNCDESCR VALUES ('Mail_Approval_Withdrawn_to_Referee',NULL); INSERT INTO sbmALLFUNCDESCR VALUES ('Mail_Submitter',NULL); INSERT INTO sbmALLFUNCDESCR VALUES ('Make_Modify_Record',NULL); INSERT INTO sbmALLFUNCDESCR VALUES ('Make_Record',''); INSERT INTO sbmALLFUNCDESCR VALUES ('Move_From_Pending',''); INSERT INTO sbmALLFUNCDESCR VALUES ('Move_to_Done',NULL); INSERT INTO sbmALLFUNCDESCR VALUES ('Move_to_Pending',NULL); INSERT INTO sbmALLFUNCDESCR VALUES ('Print_Success',''); INSERT INTO sbmALLFUNCDESCR VALUES ('Print_Success_Approval_Request',NULL); INSERT INTO sbmALLFUNCDESCR VALUES ('Print_Success_APP',''); INSERT INTO sbmALLFUNCDESCR VALUES ('Print_Success_DEL','Prepare a message for the user informing them that their record was successfully deleted.'); INSERT INTO sbmALLFUNCDESCR VALUES ('Print_Success_MBI',NULL); INSERT INTO sbmALLFUNCDESCR VALUES ('Print_Success_SRV',NULL); INSERT INTO sbmALLFUNCDESCR VALUES ('Register_Approval_Request',NULL); INSERT INTO sbmALLFUNCDESCR VALUES ('Register_Referee_Decision',NULL); INSERT INTO sbmALLFUNCDESCR VALUES ('Withdraw_Approval_Request',NULL); INSERT INTO sbmALLFUNCDESCR VALUES ('Report_Number_Generation',NULL); INSERT INTO sbmALLFUNCDESCR VALUES ('Second_Report_Number_Generation','Generate a secondary report number for a document.'); INSERT INTO sbmALLFUNCDESCR VALUES ('Send_Approval_Request',NULL); INSERT INTO sbmALLFUNCDESCR VALUES ('Send_APP_Mail',''); INSERT INTO sbmALLFUNCDESCR VALUES ('Send_Delete_Mail',''); INSERT INTO sbmALLFUNCDESCR VALUES ('Send_Modify_Mail',NULL); INSERT INTO sbmALLFUNCDESCR VALUES ('Send_SRV_Mail',NULL); INSERT INTO sbmALLFUNCDESCR VALUES ('Set_Embargo','Set an embargo on all the documents of a given record.'); INSERT INTO sbmALLFUNCDESCR VALUES ('Stamp_Replace_Single_File_Approval','Stamp a single file when a document is approved.'); INSERT INTO sbmALLFUNCDESCR VALUES ('Stamp_Uploaded_Files','Stamp some of the files that were uploaded during a submission.'); INSERT INTO sbmALLFUNCDESCR VALUES ('Test_Status',''); INSERT INTO sbmALLFUNCDESCR VALUES ('Update_Approval_DB',NULL); INSERT INTO sbmALLFUNCDESCR VALUES ('User_is_Record_Owner_or_Curator','Check if user is owner or special editor of a record'); INSERT INTO sbmALLFUNCDESCR VALUES ('Move_Files_to_Storage','Attach files received from chosen file input element(s)'); INSERT INTO sbmALLFUNCDESCR VALUES ('Move_Revised_Files_to_Storage','Revise files initially uploaded with "Move_Files_to_Storage"'); INSERT INTO sbmALLFUNCDESCR VALUES ('Make_Dummy_MARC_XML_Record',''); INSERT INTO sbmALLFUNCDESCR VALUES ('Move_CKEditor_Files_to_Storage','Transfer files attached to the record with the CKEditor'); INSERT INTO sbmALLFUNCDESCR VALUES ('Create_Upload_Files_Interface','Display generic interface to add/revise/delete files. To be used before function "Move_Uploaded_Files_to_Storage"'); INSERT INTO sbmALLFUNCDESCR VALUES ('Move_Uploaded_Files_to_Storage','Attach files uploaded with "Create_Upload_Files_Interface"'); INSERT INTO sbmALLFUNCDESCR VALUES ('Move_Photos_to_Storage','Attach/edit the pictures uploaded with the "create_photos_manager_interface()" function'); INSERT INTO sbmALLFUNCDESCR VALUES ('Link_Records','Link two records toghether via MARC'); INSERT INTO sbmALLFUNCDESCR VALUES ('Video_Processing',NULL); INSERT INTO sbmALLFUNCDESCR VALUES ('Set_RN_From_Sysno', 'Set the value of global rn variable to the report number identified by sysno (recid)'); INSERT INTO sbmALLFUNCDESCR VALUES ('Notify_URL','Access URL, possibly to post content'); INSERT INTO sbmFIELDDESC VALUES ('Upload_Photos',NULL,'','R',NULL,NULL,NULL,NULL,NULL,'\"\"\"\r\nThis is an example of element that creates a photos upload interface.\r\nClone it, customize it and integrate it into your submission. Then add function \r\n\'Move_Photos_to_Storage\' to your submission functions list, in order for files \r\nuploaded with this interface to be attached to the record. More information in \r\nthe WebSubmit admin guide.\r\n\"\"\"\r\n\r\nfrom invenio.websubmit_functions.Shared_Functions import ParamFromFile\r\nfrom invenio.websubmit_functions.Move_Photos_to_Storage import \\\r\n read_param_file, \\\r\n create_photos_manager_interface, \\\r\n get_session_id\r\n\r\n# Retrieve session id\r\ntry:\r\n # User info is defined only in MBI/MPI actions...\r\n session_id = get_session_id(None, uid, user_info) \r\nexcept:\r\n session_id = get_session_id(req, uid, {})\r\n\r\n# Retrieve context\r\nindir = curdir.split(\'/\')[-3]\r\ndoctype = curdir.split(\'/\')[-2]\r\naccess = curdir.split(\'/\')[-1]\r\n\r\n# Get the record ID, if any\r\nsysno = ParamFromFile(\"%s/%s\" % (curdir,\'SN\')).strip()\r\n\r\n\"\"\"\r\nModify below the configuration of the photos manager interface.\r\nNote: `can_reorder_photos\' parameter is not yet fully taken into consideration\r\n\r\nDocumentation of the function is available at \r\n\"\"\"\r\ntext += create_photos_manager_interface(sysno, session_id, uid,\r\n doctype, indir, curdir, access,\r\n can_delete_photos=True,\r\n can_reorder_photos=True,\r\n can_upload_photos=True,\r\n editor_width=700,\r\n editor_height=400,\r\n initial_slider_value=100,\r\n max_slider_value=200,\r\n min_slider_value=80)','0000-00-00','0000-00-00',NULL,NULL,0); INSERT INTO sbmCHECKS VALUES ('AUCheck','function AUCheck(txt) {\r\n var res=1;\r\n tmp=txt.indexOf(\"\\015\");\r\n while (tmp != -1) {\r\n left=txt.substring(0,tmp);\r\n right=txt.substring(tmp+2,txt.length);\r\n txt=left + \"\\012\" + right;\r\n tmp=txt.indexOf(\"\\015\");\r\n }\r\n tmp=txt.indexOf(\"\\012\");\r\n if (tmp==-1){\r\n line=txt;\r\n txt=\'\';}\r\n else{\r\n line=txt.substring(0,tmp);\r\n txt=txt.substring(tmp+1,txt.length);}\r\n while (line != \"\"){\r\n coma=line.indexOf(\",\");\r\n left=line.substring(0,coma);\r\n right=line.substring(coma+1,line.length);\r\n coma2=right.indexOf(\",\");\r\n space=right.indexOf(\" \");\r\n if ((coma==-1)||(left==\"\")||(right==\"\")||(space!=0)||(coma2!=-1)){\r\n res=0;\r\n error_log=line;\r\n }\r\n tmp=txt.indexOf(\"\\012\");\r\n if (tmp==-1){\r\n line=txt;\r\n txt=\'\';}\r\n else{\r\n line=txt.substring(0,tmp-1);\r\n txt=txt.substring(tmp+1,txt.length);}\r\n }\r\n if (res == 0){\r\n alert(\"This author name cannot be managed \\: \\012\\012\" + error_log + \" \\012\\012It is not in the required format!\\012Put one author per line and a comma (,) between the name and the firstname initial letters. \\012The name is going first, followed by the firstname initial letters.\\012Do not forget the whitespace after the comma!!!\\012\\012Example \\: Put\\012\\012Le Meur, J Y \\012Baron, T \\012\\012for\\012\\012Le Meur Jean-Yves & Baron Thomas.\");\r\n return 0;\r\n } \r\n return 1; \r\n}','1998-08-18','0000-00-00','',''); INSERT INTO sbmCHECKS VALUES ('DatCheckNew','function DatCheckNew(txt) {\r\n var res=1;\r\n if (txt.length != 10){res=0;}\r\n if (txt.indexOf(\"/\") != 2){res=0;}\r\n if (txt.lastIndexOf(\"/\") != 5){res=0;}\r\n tmp=parseInt(txt.substring(0,2),10);\r\n if ((tmp > 31)||(tmp < 1)||(isNaN(tmp))){res=0;}\r\n tmp=parseInt(txt.substring(3,5),10);\r\n if ((tmp > 12)||(tmp < 1)||(isNaN(tmp))){res=0;}\r\n tmp=parseInt(txt.substring(6,10),10);\r\n if ((tmp < 1)||(isNaN(tmp))){res=0;}\r\n if (txt.length == 0){res=1;}\r\n if (res == 0){\r\n alert(\"Please enter a correct Date \\012Format: dd/mm/yyyy\");\r\n return 0;\r\n }\r\n return 1; \r\n}','0000-00-00','0000-00-00','',''); -INSERT INTO sbmFIELDDESC VALUES ('Upload_Files',NULL,'','R',NULL,NULL,NULL,NULL,NULL,'\"\"\"\r\nThis is an example of element that creates a file upload interface.\r\nClone it, customize it and integrate it into your submission. Then add function \r\n\'Move_Uploaded_Files_to_Storage\' to your submission functions list, in order for files \r\nuploaded with this interface to be attached to the record. More information in \r\nthe WebSubmit admin guide.\r\n\"\"\"\r\nimport os\r\nfrom invenio.websubmit_managedocfiles import create_file_upload_interface\r\nfrom invenio.websubmit_functions.Shared_Functions import ParamFromFile\r\n\r\nindir = ParamFromFile(os.path.join(curdir, \'indir\'))\r\ndoctype = ParamFromFile(os.path.join(curdir, \'doctype\'))\r\naccess = ParamFromFile(os.path.join(curdir, \'access\'))\r\ntry:\r\n sysno = int(ParamFromFile(os.path.join(curdir, \'SN\')).strip())\r\nexcept:\r\n sysno = -1\r\nln = ParamFromFile(os.path.join(curdir, \'ln\'))\r\n\r\n\"\"\"\r\nRun the following to get the list of parameters of function \'create_file_upload_interface\':\r\necho -e \'from invenio.websubmit_managedocfiles import create_file_upload_interface as f\\nprint f.__doc__\' | python\r\n\"\"\"\r\ntext = create_file_upload_interface(recid=sysno,\r\n print_outside_form_tag=False,\r\n include_headers=True,\r\n ln=ln,\r\n doctypes_and_desc=[(\'main\',\'Main document\'),\r\n (\'additional\',\'Figure, schema, etc.\')],\r\n can_revise_doctypes=[\'*\'],\r\n can_describe_doctypes=[\'main\'],\r\n can_delete_doctypes=[\'additional\'],\r\n can_rename_doctypes=[\'main\'],\r\n sbm_indir=indir, sbm_doctype=doctype, sbm_access=access)[1]\r\n','0000-00-00','0000-00-00',NULL,NULL,0); +INSERT INTO sbmFIELDDESC VALUES ('Upload_Files',NULL,'','R',NULL,NULL,NULL,NULL,NULL,'\"\"\"\r\nThis is an example of element that creates a file upload interface.\r\nClone it, customize it and integrate it into your submission. Then add function \r\n\'Move_Uploaded_Files_to_Storage\' to your submission functions list, in order for files \r\nuploaded with this interface to be attached to the record. More information in \r\nthe WebSubmit admin guide.\r\n\"\"\"\r\nimport os\r\nfrom invenio.bibdocfile_managedocfiles import create_file_upload_interface\r\nfrom invenio.websubmit_functions.Shared_Functions import ParamFromFile\r\n\r\nindir = ParamFromFile(os.path.join(curdir, \'indir\'))\r\ndoctype = ParamFromFile(os.path.join(curdir, \'doctype\'))\r\naccess = ParamFromFile(os.path.join(curdir, \'access\'))\r\ntry:\r\n sysno = int(ParamFromFile(os.path.join(curdir, \'SN\')).strip())\r\nexcept:\r\n sysno = -1\r\nln = ParamFromFile(os.path.join(curdir, \'ln\'))\r\n\r\n\"\"\"\r\nRun the following to get the list of parameters of function \'create_file_upload_interface\':\r\necho -e \'from invenio.bibdocfile_managedocfiles import create_file_upload_interface as f\\nprint f.__doc__\' | python\r\n\"\"\"\r\ntext = create_file_upload_interface(recid=sysno,\r\n print_outside_form_tag=False,\r\n include_headers=True,\r\n ln=ln,\r\n doctypes_and_desc=[(\'main\',\'Main document\'),\r\n (\'additional\',\'Figure, schema, etc.\')],\r\n can_revise_doctypes=[\'*\'],\r\n can_describe_doctypes=[\'main\'],\r\n can_delete_doctypes=[\'additional\'],\r\n can_rename_doctypes=[\'main\'],\r\n sbm_indir=indir, sbm_doctype=doctype, sbm_access=access)[1]\r\n','0000-00-00','0000-00-00',NULL,NULL,0); INSERT INTO sbmFORMATEXTENSION VALUES ('WORD','.doc'); INSERT INTO sbmFORMATEXTENSION VALUES ('PostScript','.ps'); INSERT INTO sbmFORMATEXTENSION VALUES ('PDF','.pdf'); INSERT INTO sbmFORMATEXTENSION VALUES ('JPEG','.jpg'); INSERT INTO sbmFORMATEXTENSION VALUES ('JPEG','.jpeg'); INSERT INTO sbmFORMATEXTENSION VALUES ('GIF','.gif'); INSERT INTO sbmFORMATEXTENSION VALUES ('PPT','.ppt'); INSERT INTO sbmFORMATEXTENSION VALUES ('HTML','.htm'); INSERT INTO sbmFORMATEXTENSION VALUES ('HTML','.html'); INSERT INTO sbmFORMATEXTENSION VALUES ('Latex','.tex'); INSERT INTO sbmFORMATEXTENSION VALUES ('Compressed PostScript','.ps.gz'); INSERT INTO sbmFORMATEXTENSION VALUES ('Tarred Tex (.tar)','.tar'); INSERT INTO sbmFORMATEXTENSION VALUES ('Text','.txt'); INSERT INTO sbmFUNDESC VALUES ('Get_Recid','record_search_pattern'); INSERT INTO sbmFUNDESC VALUES ('Get_Report_Number','edsrn'); INSERT INTO sbmFUNDESC VALUES ('Send_Modify_Mail','addressesMBI'); INSERT INTO sbmFUNDESC VALUES ('Send_Modify_Mail','sourceDoc'); INSERT INTO sbmFUNDESC VALUES ('Register_Approval_Request','categ_file_appreq'); INSERT INTO sbmFUNDESC VALUES ('Register_Approval_Request','categ_rnseek_appreq'); INSERT INTO sbmFUNDESC VALUES ('Register_Approval_Request','note_file_appreq'); INSERT INTO sbmFUNDESC VALUES ('Register_Referee_Decision','decision_file'); INSERT INTO sbmFUNDESC VALUES ('Withdraw_Approval_Request','categ_file_withd'); INSERT INTO sbmFUNDESC VALUES ('Withdraw_Approval_Request','categ_rnseek_withd'); INSERT INTO sbmFUNDESC VALUES ('Report_Number_Generation','edsrn'); INSERT INTO sbmFUNDESC VALUES ('Report_Number_Generation','autorngen'); INSERT INTO sbmFUNDESC VALUES ('Report_Number_Generation','rnin'); INSERT INTO sbmFUNDESC VALUES ('Report_Number_Generation','counterpath'); INSERT INTO sbmFUNDESC VALUES ('Report_Number_Generation','rnformat'); INSERT INTO sbmFUNDESC VALUES ('Report_Number_Generation','yeargen'); INSERT INTO sbmFUNDESC VALUES ('Report_Number_Generation','nblength'); INSERT INTO sbmFUNDESC VALUES ('Mail_Approval_Request_to_Referee','categ_file_appreq'); INSERT INTO sbmFUNDESC VALUES ('Mail_Approval_Request_to_Referee','categ_rnseek_appreq'); INSERT INTO sbmFUNDESC VALUES ('Mail_Approval_Request_to_Referee','edsrn'); INSERT INTO sbmFUNDESC VALUES ('Mail_Approval_Withdrawn_to_Referee','categ_file_withd'); INSERT INTO sbmFUNDESC VALUES ('Mail_Approval_Withdrawn_to_Referee','categ_rnseek_withd'); INSERT INTO sbmFUNDESC VALUES ('Mail_Submitter','authorfile'); INSERT INTO sbmFUNDESC VALUES ('Mail_Submitter','status'); INSERT INTO sbmFUNDESC VALUES ('Send_Approval_Request','authorfile'); INSERT INTO sbmFUNDESC VALUES ('Create_Modify_Interface','fieldnameMBI'); INSERT INTO sbmFUNDESC VALUES ('Send_Modify_Mail','fieldnameMBI'); INSERT INTO sbmFUNDESC VALUES ('Update_Approval_DB','categformatDAM'); INSERT INTO sbmFUNDESC VALUES ('Update_Approval_DB','decision_file'); INSERT INTO sbmFUNDESC VALUES ('Send_SRV_Mail','categformatDAM'); INSERT INTO sbmFUNDESC VALUES ('Send_SRV_Mail','addressesSRV'); INSERT INTO sbmFUNDESC VALUES ('Send_Approval_Request','directory'); INSERT INTO sbmFUNDESC VALUES ('Send_Approval_Request','categformatDAM'); INSERT INTO sbmFUNDESC VALUES ('Send_Approval_Request','addressesDAM'); INSERT INTO sbmFUNDESC VALUES ('Send_Approval_Request','titleFile'); INSERT INTO sbmFUNDESC VALUES ('Send_APP_Mail','edsrn'); INSERT INTO sbmFUNDESC VALUES ('Mail_Submitter','titleFile'); INSERT INTO sbmFUNDESC VALUES ('Send_Modify_Mail','emailFile'); INSERT INTO sbmFUNDESC VALUES ('Get_Info','authorFile'); INSERT INTO sbmFUNDESC VALUES ('Get_Info','emailFile'); INSERT INTO sbmFUNDESC VALUES ('Get_Info','titleFile'); INSERT INTO sbmFUNDESC VALUES ('Make_Modify_Record','modifyTemplate'); INSERT INTO sbmFUNDESC VALUES ('Send_APP_Mail','addressesAPP'); INSERT INTO sbmFUNDESC VALUES ('Send_APP_Mail','categformatAPP'); INSERT INTO sbmFUNDESC VALUES ('Send_APP_Mail','newrnin'); INSERT INTO sbmFUNDESC VALUES ('Send_APP_Mail','decision_file'); INSERT INTO sbmFUNDESC VALUES ('Send_APP_Mail','comments_file'); INSERT INTO sbmFUNDESC VALUES ('CaseEDS','casevariable'); INSERT INTO sbmFUNDESC VALUES ('CaseEDS','casevalues'); INSERT INTO sbmFUNDESC VALUES ('CaseEDS','casesteps'); INSERT INTO sbmFUNDESC VALUES ('CaseEDS','casedefault'); INSERT INTO sbmFUNDESC VALUES ('Send_SRV_Mail','noteFile'); INSERT INTO sbmFUNDESC VALUES ('Send_SRV_Mail','emailFile'); INSERT INTO sbmFUNDESC VALUES ('Mail_Submitter','emailFile'); INSERT INTO sbmFUNDESC VALUES ('Mail_Submitter','edsrn'); INSERT INTO sbmFUNDESC VALUES ('Mail_Submitter','newrnin'); INSERT INTO sbmFUNDESC VALUES ('Make_Record','sourceTemplate'); INSERT INTO sbmFUNDESC VALUES ('Make_Record','createTemplate'); INSERT INTO sbmFUNDESC VALUES ('Print_Success','edsrn'); INSERT INTO sbmFUNDESC VALUES ('Print_Success','newrnin'); INSERT INTO sbmFUNDESC VALUES ('Print_Success','status'); INSERT INTO sbmFUNDESC VALUES ('Make_Modify_Record','sourceTemplate'); INSERT INTO sbmFUNDESC VALUES ('Move_Files_to_Storage','documenttype'); INSERT INTO sbmFUNDESC VALUES ('Move_Files_to_Storage','iconsize'); INSERT INTO sbmFUNDESC VALUES ('Move_Files_to_Storage','paths_and_suffixes'); INSERT INTO sbmFUNDESC VALUES ('Move_Files_to_Storage','rename'); INSERT INTO sbmFUNDESC VALUES ('Move_Files_to_Storage','paths_and_restrictions'); INSERT INTO sbmFUNDESC VALUES ('Move_Files_to_Storage','paths_and_doctypes'); INSERT INTO sbmFUNDESC VALUES ('Move_Revised_Files_to_Storage','elementNameToDoctype'); INSERT INTO sbmFUNDESC VALUES ('Move_Revised_Files_to_Storage','createIconDoctypes'); INSERT INTO sbmFUNDESC VALUES ('Move_Revised_Files_to_Storage','createRelatedFormats'); INSERT INTO sbmFUNDESC VALUES ('Move_Revised_Files_to_Storage','iconsize'); INSERT INTO sbmFUNDESC VALUES ('Move_Revised_Files_to_Storage','keepPreviousVersionDoctypes'); INSERT INTO sbmFUNDESC VALUES ('Set_Embargo','date_file'); INSERT INTO sbmFUNDESC VALUES ('Set_Embargo','date_format'); INSERT INTO sbmFUNDESC VALUES ('Stamp_Uploaded_Files','files_to_be_stamped'); INSERT INTO sbmFUNDESC VALUES ('Stamp_Uploaded_Files','latex_template'); INSERT INTO sbmFUNDESC VALUES ('Stamp_Uploaded_Files','latex_template_vars'); INSERT INTO sbmFUNDESC VALUES ('Stamp_Uploaded_Files','stamp'); INSERT INTO sbmFUNDESC VALUES ('Stamp_Uploaded_Files','layer'); INSERT INTO sbmFUNDESC VALUES ('Stamp_Uploaded_Files','switch_file'); INSERT INTO sbmFUNDESC VALUES ('Make_Dummy_MARC_XML_Record','dummyrec_source_tpl'); INSERT INTO sbmFUNDESC VALUES ('Make_Dummy_MARC_XML_Record','dummyrec_create_tpl'); INSERT INTO sbmFUNDESC VALUES ('Print_Success_APP','decision_file'); INSERT INTO sbmFUNDESC VALUES ('Print_Success_APP','newrnin'); INSERT INTO sbmFUNDESC VALUES ('Send_Delete_Mail','edsrn'); INSERT INTO sbmFUNDESC VALUES ('Send_Delete_Mail','record_managers'); INSERT INTO sbmFUNDESC VALUES ('Second_Report_Number_Generation','2nd_rn_file'); INSERT INTO sbmFUNDESC VALUES ('Second_Report_Number_Generation','2nd_rn_format'); INSERT INTO sbmFUNDESC VALUES ('Second_Report_Number_Generation','2nd_rn_yeargen'); INSERT INTO sbmFUNDESC VALUES ('Second_Report_Number_Generation','2nd_rncateg_file'); INSERT INTO sbmFUNDESC VALUES ('Second_Report_Number_Generation','2nd_counterpath'); INSERT INTO sbmFUNDESC VALUES ('Second_Report_Number_Generation','2nd_nb_length'); INSERT INTO sbmFUNDESC VALUES ('Stamp_Replace_Single_File_Approval','file_to_be_stamped'); INSERT INTO sbmFUNDESC VALUES ('Stamp_Replace_Single_File_Approval','latex_template'); INSERT INTO sbmFUNDESC VALUES ('Stamp_Replace_Single_File_Approval','latex_template_vars'); INSERT INTO sbmFUNDESC VALUES ('Stamp_Replace_Single_File_Approval','new_file_name'); INSERT INTO sbmFUNDESC VALUES ('Stamp_Replace_Single_File_Approval','stamp'); INSERT INTO sbmFUNDESC VALUES ('Stamp_Replace_Single_File_Approval','layer'); INSERT INTO sbmFUNDESC VALUES ('Stamp_Replace_Single_File_Approval','switch_file'); INSERT INTO sbmFUNDESC VALUES ('Move_CKEditor_Files_to_Storage','input_fields'); INSERT INTO sbmFUNDESC VALUES ('Create_Upload_Files_Interface','maxsize'); INSERT INTO sbmFUNDESC VALUES ('Create_Upload_Files_Interface','minsize'); INSERT INTO sbmFUNDESC VALUES ('Create_Upload_Files_Interface','doctypes'); INSERT INTO sbmFUNDESC VALUES ('Create_Upload_Files_Interface','restrictions'); INSERT INTO sbmFUNDESC VALUES ('Create_Upload_Files_Interface','canDeleteDoctypes'); INSERT INTO sbmFUNDESC VALUES ('Create_Upload_Files_Interface','canReviseDoctypes'); INSERT INTO sbmFUNDESC VALUES ('Create_Upload_Files_Interface','canDescribeDoctypes'); INSERT INTO sbmFUNDESC VALUES ('Create_Upload_Files_Interface','canCommentDoctypes'); INSERT INTO sbmFUNDESC VALUES ('Create_Upload_Files_Interface','canKeepDoctypes'); INSERT INTO sbmFUNDESC VALUES ('Create_Upload_Files_Interface','canAddFormatDoctypes'); INSERT INTO sbmFUNDESC VALUES ('Create_Upload_Files_Interface','canRestrictDoctypes'); INSERT INTO sbmFUNDESC VALUES ('Create_Upload_Files_Interface','canRenameDoctypes'); INSERT INTO sbmFUNDESC VALUES ('Create_Upload_Files_Interface','canNameNewFiles'); INSERT INTO sbmFUNDESC VALUES ('Create_Upload_Files_Interface','createRelatedFormats'); INSERT INTO sbmFUNDESC VALUES ('Create_Upload_Files_Interface','keepDefault'); INSERT INTO sbmFUNDESC VALUES ('Create_Upload_Files_Interface','showLinks'); INSERT INTO sbmFUNDESC VALUES ('Create_Upload_Files_Interface','fileLabel'); INSERT INTO sbmFUNDESC VALUES ('Create_Upload_Files_Interface','filenameLabel'); INSERT INTO sbmFUNDESC VALUES ('Create_Upload_Files_Interface','descriptionLabel'); INSERT INTO sbmFUNDESC VALUES ('Create_Upload_Files_Interface','commentLabel'); INSERT INTO sbmFUNDESC VALUES ('Create_Upload_Files_Interface','restrictionLabel'); INSERT INTO sbmFUNDESC VALUES ('Create_Upload_Files_Interface','startDoc'); INSERT INTO sbmFUNDESC VALUES ('Create_Upload_Files_Interface','endDoc'); INSERT INTO sbmFUNDESC VALUES ('Create_Upload_Files_Interface','defaultFilenameDoctypes'); INSERT INTO sbmFUNDESC VALUES ('Create_Upload_Files_Interface','maxFilesDoctypes'); INSERT INTO sbmFUNDESC VALUES ('Move_Uploaded_Files_to_Storage','iconsize'); INSERT INTO sbmFUNDESC VALUES ('Move_Uploaded_Files_to_Storage','createIconDoctypes'); INSERT INTO sbmFUNDESC VALUES ('Move_Uploaded_Files_to_Storage','forceFileRevision'); INSERT INTO sbmFUNDESC VALUES ('Move_Photos_to_Storage','iconsize'); INSERT INTO sbmFUNDESC VALUES ('Move_Photos_to_Storage','iconformat'); INSERT INTO sbmFUNDESC VALUES ('User_is_Record_Owner_or_Curator','curator_role'); INSERT INTO sbmFUNDESC VALUES ('User_is_Record_Owner_or_Curator','curator_flag'); INSERT INTO sbmFUNDESC VALUES ('Link_Records','edsrn'); INSERT INTO sbmFUNDESC VALUES ('Link_Records','edsrn2'); INSERT INTO sbmFUNDESC VALUES ('Link_Records','directRelationship'); INSERT INTO sbmFUNDESC VALUES ('Link_Records','reverseRelationship'); INSERT INTO sbmFUNDESC VALUES ('Video_Processing','aspect'); INSERT INTO sbmFUNDESC VALUES ('Video_Processing','batch_template'); INSERT INTO sbmFUNDESC VALUES ('Video_Processing','title'); INSERT INTO sbmFUNDESC VALUES ('Set_RN_From_Sysno','edsrn'); INSERT INTO sbmFUNDESC VALUES ('Set_RN_From_Sysno','rep_tags'); INSERT INTO sbmFUNDESC VALUES ('Set_RN_From_Sysno','record_search_pattern'); INSERT INTO sbmFUNDESC VALUES ('Notify_URL','url'); INSERT INTO sbmFUNDESC VALUES ('Notify_URL','data'); INSERT INTO sbmFUNDESC VALUES ('Notify_URL','admin_emails'); INSERT INTO sbmFUNDESC VALUES ('Notify_URL','content_type'); INSERT INTO sbmFUNDESC VALUES ('Notify_URL','attempt_times'); INSERT INTO sbmFUNDESC VALUES ('Notify_URL','attempt_sleeptime'); INSERT INTO sbmFUNDESC VALUES ('Notify_URL','user'); INSERT INTO sbmGFILERESULT VALUES ('HTML','HTML document'); INSERT INTO sbmGFILERESULT VALUES ('WORD','data'); INSERT INTO sbmGFILERESULT VALUES ('PDF','PDF document'); INSERT INTO sbmGFILERESULT VALUES ('PostScript','PostScript document'); INSERT INTO sbmGFILERESULT VALUES ('PostScript','data '); INSERT INTO sbmGFILERESULT VALUES ('PostScript','HP Printer Job Language data'); INSERT INTO sbmGFILERESULT VALUES ('jpg','JPEG image'); INSERT INTO sbmGFILERESULT VALUES ('Compressed PostScript','gzip compressed data'); INSERT INTO sbmGFILERESULT VALUES ('Tarred Tex (.tar)','tar archive'); INSERT INTO sbmGFILERESULT VALUES ('JPEG','JPEG image'); INSERT INTO sbmGFILERESULT VALUES ('GIF','GIF'); INSERT INTO collectiondetailedrecordpagetabs VALUES (8, 'usage;comments;metadata'); INSERT INTO collectiondetailedrecordpagetabs VALUES (19, 'usage;comments;metadata'); INSERT INTO collectiondetailedrecordpagetabs VALUES (18, 'usage;comments;metadata'); INSERT INTO collectiondetailedrecordpagetabs VALUES (17, 'usage;comments;metadata'); INSERT INTO swrREMOTESERVER VALUES (1, 'arXiv', 'arxiv.org', 'CDS_Invenio', 'sword_invenio', 'admin', 'SWORD at arXiv', 'http://arxiv.org/abs', 'https://arxiv.org/sword-app/servicedocument', '', 0); -- end of file diff --git a/modules/webaccess/lib/access_control_config.py b/modules/webaccess/lib/access_control_config.py index b40e628ae..8c6105079 100644 --- a/modules/webaccess/lib/access_control_config.py +++ b/modules/webaccess/lib/access_control_config.py @@ -1,373 +1,373 @@ ## This file is part of Invenio. ## Copyright (C) 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011, 2012 CERN. ## ## Invenio is free software; you can redistribute it and/or ## modify it under the terms of the GNU General Public License as ## published by the Free Software Foundation; either version 2 of the ## License, or (at your option) any later version. ## ## Invenio is distributed in the hope that it will be useful, but ## WITHOUT ANY WARRANTY; without even the implied warranty of ## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ## General Public License for more details. ## ## You should have received a copy of the GNU General Public License ## along with Invenio; if not, write to the Free Software Foundation, Inc., ## 59 Temple Place, Suite 330, Boston, MA 02111-1307, USA. """Invenio Access Control Config. """ __revision__ = \ "$Id$" # pylint: disable=C0301 from invenio.config import CFG_SITE_NAME, CFG_SITE_URL, CFG_SITE_LANG, \ CFG_SITE_SECURE_URL, CFG_SITE_SUPPORT_EMAIL, CFG_CERN_SITE, \ CFG_OPENAIRE_SITE, CFG_SITE_RECORD, CFG_INSPIRE_SITE from invenio.messages import gettext_set_language class InvenioWebAccessFireroleError(Exception): """Just an Exception to discover if it's a FireRole problem""" pass # VALUES TO BE EXPORTED # CURRENTLY USED BY THE FILES access_control_engine.py access_control_admin.py webaccessadmin_lib.py # name of the role giving superadmin rights SUPERADMINROLE = 'superadmin' # name of the webaccess webadmin role WEBACCESSADMINROLE = 'webaccessadmin' # name of the action allowing roles to access the web administrator interface WEBACCESSACTION = 'cfgwebaccess' # name of the action allowing roles to access the web administrator interface VIEWRESTRCOLL = 'viewrestrcoll' # name of the action allowing roles to delegate the rights to other roles # ex: libraryadmin to delegate libraryworker DELEGATEADDUSERROLE = 'accdelegaterole' # max number of users to display in the drop down selects MAXSELECTUSERS = 25 # max number of users to display in a page (mainly for user area) MAXPAGEUSERS = 25 # default role definition, source: CFG_ACC_EMPTY_ROLE_DEFINITION_SRC = 'deny all' # default role definition, compiled: CFG_ACC_EMPTY_ROLE_DEFINITION_OBJ = (False, ()) # default role definition, compiled and serialized: CFG_ACC_EMPTY_ROLE_DEFINITION_SER = None # List of tags containing (multiple) emails of users who should authorize # to access the corresponding record regardless of collection restrictions. if CFG_CERN_SITE: CFG_ACC_GRANT_AUTHOR_RIGHTS_TO_EMAILS_IN_TAGS = ['859__f', '270__m', '506__m'] else: CFG_ACC_GRANT_AUTHOR_RIGHTS_TO_EMAILS_IN_TAGS = ['8560_f'] # Use external source for access control? # CFG_EXTERNAL_AUTHENTICATION -- this is a dictionary with the enabled login method. # The key is the name of the login method and the value is an instance of # of the login method (see /help/admin/webaccess-admin-guide#5). Set the value # to None if you wish to use the local Invenio authentication method. # CFG_EXTERNAL_AUTH_DEFAULT -- set this to the key in CFG_EXTERNAL_AUTHENTICATION # that should be considered as default login method # CFG_EXTERNAL_AUTH_USING_SSO -- set this to the login method name of an SSO # login method, if any, otherwise set this to None. # CFG_EXTERNAL_AUTH_LOGOUT_SSO -- if CFG_EXTERNAL_AUTH_USING_SSO was not None # set this to the URL that should be contacted to perform an SSO logout from invenio.external_authentication_robot import ExternalAuthRobot if CFG_CERN_SITE: import external_authentication_sso as ea_sso CFG_EXTERNAL_AUTH_USING_SSO = "CERN" CFG_EXTERNAL_AUTH_DEFAULT = CFG_EXTERNAL_AUTH_USING_SSO CFG_EXTERNAL_AUTH_LOGOUT_SSO = 'https://login.cern.ch/adfs/ls/?wa=wsignout1.0' CFG_EXTERNAL_AUTHENTICATION = { CFG_EXTERNAL_AUTH_USING_SSO : ea_sso.ExternalAuthSSO(), } elif CFG_OPENAIRE_SITE: CFG_EXTERNAL_AUTH_DEFAULT = 'Local' CFG_EXTERNAL_AUTH_USING_SSO = False CFG_EXTERNAL_AUTH_LOGOUT_SSO = None CFG_EXTERNAL_AUTHENTICATION = { "Local": None, "OpenAIRE": ExternalAuthRobot(enforce_external_nicknames=True, use_zlib=False, external_id_attribute_name="id"), } elif CFG_INSPIRE_SITE: CFG_EXTERNAL_AUTH_DEFAULT = 'arXiv' CFG_EXTERNAL_AUTH_USING_SSO = False CFG_EXTERNAL_AUTH_LOGOUT_SSO = None CFG_EXTERNAL_AUTHENTICATION = { "arXiv": ExternalAuthRobot(enforce_external_nicknames=True, use_zlib=True) } elif CFG_INSPIRE_SITE: # INSPIRE specific robot configuration CFG_EXTERNAL_AUTH_DEFAULT = 'Local' CFG_EXTERNAL_AUTH_USING_SSO = False CFG_EXTERNAL_AUTH_LOGOUT_SSO = None CFG_EXTERNAL_AUTHENTICATION = { "Local": None, "Robot": ExternalAuthRobot(enforce_external_nicknames=True, use_zlib=False, check_user_ip=2), "ZRobot": ExternalAuthRobot(enforce_external_nicknames=True, use_zlib=True, check_user_ip=2) } else: CFG_EXTERNAL_AUTH_DEFAULT = 'Local' CFG_EXTERNAL_AUTH_USING_SSO = False CFG_EXTERNAL_AUTH_LOGOUT_SSO = None CFG_EXTERNAL_AUTHENTICATION = { "Local": None, "Robot": ExternalAuthRobot(enforce_external_nicknames=True, use_zlib=False), "ZRobot": ExternalAuthRobot(enforce_external_nicknames=True, use_zlib=True) } ## If using SSO, this is the number of seconds after which the keep-alive ## SSO handler is pinged again to provide fresh SSO information. CFG_EXTERNAL_AUTH_SSO_REFRESH = 600 # default data for the add_default_settings function # Note: by default the definition is set to deny any. This won't be a problem # because userid directly connected with roles will still be allowed. # roles # name description definition DEF_ROLES = ((SUPERADMINROLE, 'superuser with all rights', 'deny any'), (WEBACCESSADMINROLE, 'WebAccess administrator', 'deny any'), ('anyuser', 'Any user', 'allow any'), ('basketusers', 'Users who can use baskets', 'allow any'), ('loanusers', 'Users who can use loans', 'allow any'), ('groupusers', 'Users who can use groups', 'allow any'), ('alertusers', 'Users who can use alerts', 'allow any'), ('messageusers', 'Users who can use messages', 'allow any'), ('holdingsusers', 'Users who can view holdings', 'allow any'), ('statisticsusers', 'Users who can view statistics', 'allow any'), ('claimpaperusers', 'Users who can perform changes to their own paper attributions without the need for an operator\'s approval', 'allow any'), ('claimpaperoperators', 'Users who can perform changes to _all_ paper attributions without the need for an operator\'s approval', 'deny any'), ('paperclaimviewers', 'Users who can view "claim my paper" facilities.', 'allow all'), ('paperattributionviewers', 'Users who can view "attribute this paper" facilities', 'allow all'), ('paperattributionlinkviewers', 'Users who can see attribution links in the search', 'allow all'), ) # Demo site roles DEF_DEMO_ROLES = (('photocurator', 'Photo collection curator', 'deny any'), ('thesesviewer', 'Theses viewer', 'allow group "Theses viewers"'), ('thesescurator', 'Theses collection curator', 'deny any'), ('swordcurator', 'BibSword client curator', 'deny any'), ('referee_DEMOBOO_*', 'Book collection curator', 'deny any'), ('restrictedpicturesviewer', 'Restricted pictures viewer', 'deny any'), ('curator', 'Curator', 'deny any'), ('basketusers', 'Users who can use baskets', 'deny email "hyde@cds.cern.ch"\nallow any'), ('claimpaperusers', 'Users who can perform changes to their own paper attributions without the need for an operator\'s approval', 'deny email "hyde@cds.cern.ch"\nallow any'), ('submit_DEMOJRN_*', 'Users who can submit (and modify) "Atlantis Times" articles', 'deny all'), ('atlantiseditor', 'Users who can configure "Atlantis Times" journal', 'deny all'), ('commentmoderator', 'Users who can moderate comments', 'deny all'), ('poetrycommentreader', 'Users who can view comments in Poetry collection', 'deny all')) DEF_DEMO_USER_ROLES = (('jekyll@cds.cern.ch', 'thesesviewer'), ('jekyll@cds.cern.ch', 'swordcurator'), ('jekyll@cds.cern.ch', 'claimpaperusers'), ('dorian.gray@cds.cern.ch', 'referee_DEMOBOO_*'), ('balthasar.montague@cds.cern.ch', 'curator'), ('romeo.montague@cds.cern.ch', 'restrictedpicturesviewer'), ('romeo.montague@cds.cern.ch', 'swordcurator'), ('romeo.montague@cds.cern.ch', 'thesescurator'), ('juliet.capulet@cds.cern.ch', 'restrictedpicturesviewer'), ('juliet.capulet@cds.cern.ch', 'photocurator'), ('romeo.montague@cds.cern.ch', 'submit_DEMOJRN_*'), ('juliet.capulet@cds.cern.ch', 'submit_DEMOJRN_*'), ('balthasar.montague@cds.cern.ch', 'atlantiseditor'), ('romeo.montague@cds.cern.ch', 'poetrycommentreader')) # users # list of e-mail addresses DEF_USERS = [] # actions # name desc allowedkeywords optional DEF_ACTIONS = ( ('cfgwebsearch', 'configure WebSearch', '', 'no'), ('cfgbibformat', 'configure BibFormat', '', 'no'), ('cfgbibknowledge', 'configure BibKnowledge', '', 'no'), ('cfgwebsubmit', 'configure WebSubmit', '', 'no'), ('cfgbibrank', 'configure BibRank', '', 'no'), ('cfgwebcomment', 'configure WebComment', '', 'no'), ('cfgoaiharvest', 'configure OAI Harvest', '', 'no'), ('cfgoairepository', 'configure OAI Repository', '', 'no'), ('cfgbibindex', 'configure BibIndex', '', 'no'), ('cfgbibexport', 'configure BibExport', '', 'no'), ('cfgrobotkeys', 'configure Robot keys', 'login_method,robot', 'yes'), ('cfgbibsort', 'configure BibSort', '', 'no'), ('runbibindex', 'run BibIndex', '', 'no'), ('runbibupload', 'run BibUpload', '', 'no'), ('runwebcoll', 'run webcoll', 'collection', 'yes'), ('runbibformat', 'run BibFormat', 'format', 'yes'), ('runbibclassify', 'run BibClassify', 'taxonomy', 'yes'), ('runbibtaskex', 'run BibTaskEx example', '', 'no'), ('runbibrank', 'run BibRank', '', 'no'), ('runoaiharvest', 'run oaiharvest task', '', 'no'), ('runoairepository', 'run oairepositoryupdater task', '', 'no'), ('runbibedit', 'run Record Editor', 'collection', 'yes'), ('runbibeditmulti', 'run Multi-Record Editor', '', 'no'), ('runbibdocfile', 'run Document File Manager', '', 'no'), ('runbibmerge', 'run Record Merger', '', 'no'), ('runbibswordclient', 'run BibSword client', '', 'no'), ('runwebstatadmin', 'run WebStadAdmin', '', 'no'), ('runinveniogc', 'run InvenioGC', '', 'no'), ('runbibexport', 'run BibExport', '', 'no'), ('referee', 'referee document type doctype/category categ', 'doctype,categ', 'yes'), ('submit', 'use webSubmit', 'doctype,act,categ', 'yes'), ('viewrestrdoc', 'view restricted document', 'status', 'no'), ('viewrestrcomment', 'view restricted comment', 'status', 'no'), (WEBACCESSACTION, 'configure WebAccess', '', 'no'), (DELEGATEADDUSERROLE, 'delegate subroles inside WebAccess', 'role', 'no'), (VIEWRESTRCOLL, 'view restricted collection', 'collection', 'no'), ('cfgwebjournal', 'configure WebJournal', 'name,with_editor_rights', 'no'), ('viewcomment', 'view comments', 'collection', 'no'), ('sendcomment', 'send comments', 'collection', 'no'), ('attachcommentfile', 'attach files to comments', 'collection', 'no'), ('attachsubmissionfile', 'upload files to drop box during submission', '', 'no'), ('cfgbibexport', 'configure BibExport', '', 'no'), ('runbibexport', 'run BibExport', '', 'no'), ('usebaskets', 'use baskets', '', 'no'), ('useloans', 'use loans', '', 'no'), ('usegroups', 'use groups', '', 'no'), ('usealerts', 'use alerts', '', 'no'), ('usemessages', 'use messages', '', 'no'), ('viewholdings', 'view holdings', 'collection', 'yes'), ('viewstatistics', 'view statistics', 'collection', 'yes'), ('runbibcirculation', 'run BibCirculation', '', 'no'), ('moderatecomments', 'moderate comments', 'collection', 'no'), ('runbatchuploader', 'run batchuploader', 'collection', 'yes'), ('runbibtasklet', 'run BibTaskLet', '', 'no'), ('claimpaper_view_pid_universe', 'View the Claim Paper interface', '', 'no'), ('claimpaper_claim_own_papers', 'Clam papers to his own personID', '', 'no'), ('claimpaper_claim_others_papers', 'Claim papers for others', '', 'no'), ('claimpaper_change_own_data', 'Change data associated to his own person ID', '', 'no'), ('claimpaper_change_others_data', 'Change data of any person ID', '', 'no'), ('runbibtasklet', 'run BibTaskLet', '', 'no') ) # Default authorizations # role action arguments DEF_AUTHS = (('basketusers', 'usebaskets', {}), ('loanusers', 'useloans', {}), ('groupusers', 'usegroups', {}), ('alertusers', 'usealerts', {}), ('messageusers', 'usemessages', {}), ('holdingsusers', 'viewholdings', {}), ('statisticsusers', 'viewstatistics', {}), ('claimpaperusers', 'claimpaper_view_pid_universe', {}), ('claimpaperoperators', 'claimpaper_view_pid_universe', {}), ('claimpaperusers', 'claimpaper_claim_own_papers', {}), ('claimpaperoperators', 'claimpaper_claim_own_papers', {}), ('claimpaperoperators', 'claimpaper_claim_others_papers', {}), ('claimpaperusers', 'claimpaper_change_own_data', {}), ('claimpaperoperators', 'claimpaper_change_own_data', {}), ('claimpaperoperators', 'claimpaper_change_others_data', {}), ) # Demo site authorizations # role action arguments DEF_DEMO_AUTHS = ( ('photocurator', 'runwebcoll', {'collection': 'Pictures'}), ('restrictedpicturesviewer', 'viewrestrdoc', {'status': 'restricted_picture'}), ('thesesviewer', VIEWRESTRCOLL, {'collection': 'Theses'}), ('referee_DEMOBOO_*', 'referee', {'doctype': 'DEMOBOO', 'categ': '*'}), ('curator', 'cfgbibknowledge', {}), ('curator', 'runbibedit', {}), ('curator', 'runbibeditmulti', {}), ('curator', 'runbibmerge', {}), ('swordcurator', 'runbibswordclient', {}), ('thesescurator', 'runbibedit', {'collection': 'Theses'}), ('thesescurator', VIEWRESTRCOLL, {'collection': 'Theses'}), ('photocurator', 'runbibedit', {'collection': 'Pictures'}), ('referee_DEMOBOO_*', 'runbibedit', {'collection': 'Books'}), ('submit_DEMOJRN_*', 'submit', {'doctype': 'DEMOJRN', 'act': 'SBI', 'categ': '*'}), ('submit_DEMOJRN_*', 'submit', {'doctype': 'DEMOJRN', 'act': 'MBI', 'categ': '*'}), ('submit_DEMOJRN_*', 'cfgwebjournal', {'name': 'AtlantisTimes', 'with_editor_rights': 'no'}), ('atlantiseditor', 'cfgwebjournal', {'name': 'AtlantisTimes', 'with_editor_rights': 'yes'}), ('referee_DEMOBOO_*', 'runbatchuploader', {'collection': 'Books'}), ('poetrycommentreader', 'viewcomment', {'collection': 'Poetry'}), ('atlantiseditor', VIEWRESTRCOLL, {'collection': 'Atlantis Times Drafts'}), ('anyuser', 'submit', {'doctype': 'DEMOART', 'act': 'SBI', 'categ': 'ARTICLE'}), ) _ = gettext_set_language(CFG_SITE_LANG) # Activities (i.e. actions) for which exists an administrative web interface. CFG_ACC_ACTIVITIES_URLS = { 'runbibedit' : (_("Run Record Editor"), "%s/%s/edit/?ln=%%s" % (CFG_SITE_URL, CFG_SITE_RECORD)), 'runbibeditmulti' : (_("Run Multi-Record Editor"), "%s/%s/multiedit/?ln=%%s" % (CFG_SITE_URL, CFG_SITE_RECORD)), - 'runbibdocfile' : (_("Run Document File Manager"), "%s/submit/managedocfiles?ln=%%s" % CFG_SITE_URL), + 'runbibdocfile' : (_("Run Document File Manager"), "%s/%s/managedocfiles?ln=%%s" % (CFG_SITE_URL, CFG_SITE_RECORD)), 'runbibmerge' : (_("Run Record Merger"), "%s/%s/merge/?ln=%%s" % (CFG_SITE_URL, CFG_SITE_RECORD)), 'runbibswordclient' : (_("Run BibSword client"), "%s/bibsword/?ln=%%s" % CFG_SITE_URL), 'cfgbibknowledge' : (_("Configure BibKnowledge"), "%s/kb?ln=%%s" % CFG_SITE_URL), 'cfgbibformat' : (_("Configure BibFormat"), "%s/admin/bibformat/bibformatadmin.py?ln=%%s" % CFG_SITE_URL), 'cfgoaiharvest' : (_("Configure OAI Harvest"), "%s/admin/oaiharvest/oaiharvestadmin.py?ln=%%s" % CFG_SITE_URL), 'cfgoairepository' : (_("Configure OAI Repository"), "%s/admin/oairepository/oairepositoryadmin.py?ln=%%s" % CFG_SITE_URL), 'cfgbibindex' : (_("Configure BibIndex"), "%s/admin/bibindex/bibindexadmin.py?ln=%%s" % CFG_SITE_URL), 'cfgbibrank' : (_("Configure BibRank"), "%s/admin/bibrank/bibrankadmin.py?ln=%%s" % CFG_SITE_URL), 'cfgwebaccess' : (_("Configure WebAccess"), "%s/admin/webaccess/webaccessadmin.py?ln=%%s" % CFG_SITE_URL), 'cfgwebcomment' : (_("Configure WebComment"), "%s/admin/webcomment/webcommentadmin.py?ln=%%s" % CFG_SITE_URL), 'cfgwebsearch' : (_("Configure WebSearch"), "%s/admin/websearch/websearchadmin.py?ln=%%s" % CFG_SITE_URL), 'cfgwebsubmit' : (_("Configure WebSubmit"), "%s/admin/websubmit/websubmitadmin.py?ln=%%s" % CFG_SITE_URL), 'cfgwebjournal' : (_("Configure WebJournal"), "%s/admin/webjournal/webjournaladmin.py?ln=%%s" % CFG_SITE_URL), 'cfgbibsort' : (_("Configure BibSort"), "%s/admin/bibsort/bibsortadmin.py?ln=%%s" % CFG_SITE_URL), 'runbibcirculation' : (_("Run BibCirculation"), "%s/admin/bibcirculation/bibcirculationadmin.py?ln=%%s" % CFG_SITE_URL), 'runbatchuploader' : (_("Run Batch Uploader"), "%s/batchuploader/metadata?ln=%%s" % CFG_SITE_URL), 'claimpaper_claim_others_papers' : (_("Run Person/Author Manager"), "%s/person/search?ln=%%s" % CFG_SITE_URL) } CFG_WEBACCESS_MSGS = { 0: 'Try to login with another account.' % (CFG_SITE_SECURE_URL), 1: '
If you think this is not correct, please contact: %s' % (CFG_SITE_SUPPORT_EMAIL, CFG_SITE_SUPPORT_EMAIL), 2: '
If you have any questions, please write to %s' % (CFG_SITE_SUPPORT_EMAIL, CFG_SITE_SUPPORT_EMAIL), 3: 'Guest users are not allowed, please login.' % CFG_SITE_SECURE_URL, 4: 'The site is temporarily closed for maintenance. Please come back soon.', 5: 'Authorization failure', 6: '%s temporarily closed' % CFG_SITE_NAME, 7: 'This functionality is temporarily closed due to server maintenance. Please use only the search engine in the meantime.', 8: 'Functionality temporarily closed' } CFG_WEBACCESS_WARNING_MSGS = { 0: 'Authorization granted', 1: 'You are not authorized to perform this action.', 2: 'You are not authorized to perform any action.', 3: 'The action %s does not exist.', 4: 'Unexpected error occurred.', 5: 'Missing mandatory keyword argument(s) for this action.', 6: 'Guest accounts are not authorized to perform this action.', 7: 'Not enough arguments, user ID and action name required.', 8: 'Incorrect keyword argument(s) for this action.', 9: """Account '%s' is not yet activated.""", 10: """You were not authorized by the authentication method '%s'.""", 11: """The selected login method '%s' is not the default method for this account, please try another one.""", 12: """Selected login method '%s' does not exist.""", 13: """Could not register '%s' account.""", 14: """Could not login using '%s', because this user is unknown.""", 15: """Could not login using your '%s' account, because you have introduced a wrong password.""", 16: """External authentication troubles using '%s' (maybe temporary network problems).""", 17: """You have not yet confirmed the email address for the '%s' authentication method.""", 18: """The administrator has not yet activated your account for the '%s' authentication method.""", 19: """The site is having troubles in sending you an email for confirming your email address. The error has been logged and will be taken care of as soon as possible.""", 20: """No roles are authorized to perform action %s with the given parameters.""" } #There are three status key that must be here: OK, REMOVED and REVOKED #the value doesn't matter at all CFG_WEB_API_KEY_STATUS = { 'OK':'OK', 'REMOVED':'REMOVED', 'REVOKED':'REVOKED', 'WARNING':'WARNING' } diff --git a/modules/webhelp/web/hacking/hacking.webdoc b/modules/webhelp/web/hacking/hacking.webdoc index 254a19d9d..c8095dda1 100644 --- a/modules/webhelp/web/hacking/hacking.webdoc +++ b/modules/webhelp/web/hacking/hacking.webdoc @@ -1,119 +1,122 @@ ## This file is part of Invenio. ## Copyright (C) 2007, 2008, 2009, 2010, 2011 CERN. ## ## Invenio is free software; you can redistribute it and/or ## modify it under the terms of the GNU General Public License as ## published by the Free Software Foundation; either version 2 of the ## License, or (at your option) any later version. ## ## Invenio is distributed in the hope that it will be useful, but ## WITHOUT ANY WARRANTY; without even the implied warranty of ## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ## General Public License for more details. ## ## You should have received a copy of the GNU General Public License ## along with Invenio; if not, write to the Free Software Foundation, Inc., ## 59 Temple Place, Suite 330, Boston, MA 02111-1307, USA. Welcome to the Invenio Developers' corner. Before diving into the source, make sure you don't miss our user-level and admin-level documentation as well. And now, back to the source, and happy hacking!

General information, coding practices

Common Concepts
Summarizing common terms you will encounter here and there.
Coding Style
A policy we try to follow, for good or bad.
Release Numbering
Presenting the version numbering scheme adopted for Invenio stable and development releases.
Directory Organization
How the source and target directories are organized, where the sources get installed to, what is the visible URL policy, etc.
Modules Overview
Presenting a summary of various Invenio modules and their relationships.
Test Suite
Describes our unit and regression test suites.

For more developer-related information, be sure to visit Invenio wiki.

Module-specific information

BibAuthorID Internals
Describes information useful to understand how BibAuthorID works.
BibClassify Internals
Describes information useful to understand how BibClassify works, the taxonomy extensions we use, how the keyword extraction algorithm works.
BibConvert Internals
Describes information useful to understand how BibConvert works, and the BibConvert functions can be reused.
BibFormat Internals
Describes information useful to understand how BibFormat works.
BibRank Internals
Describes information useful to understand how the various ranking methods available in bibrank works, and how they can be tweaked to give various output.
BibSort Internals
Describes information useful to understand how BibSort module works and how various data manipulations are done, stored and retrieved.
BibRecord Internals
Describes information useful to manipulate single records.
+
BibDocFile Internals
+
Describes information useful to manipulate documents within records.
+
MiscUtil Internals
Describes information useful to understand what can be found inside the miscellaneous utilities module, like database access, error management, date handling library, etc.
WebJournal Internals
Describes the WebJournal database and required MARC tags for article records.
WebSearch Internals
Describes information useful to understand the search process internals, like the different search stages, the high- and low-level API, etc.
WebAccess Internals
Describes information useful to understand the access control process internals, its API, etc.
WebStyle Internals
Describes how to customize WebDoc files, etc.
WebSubmit Internals
Describes information useful to understand the document submission internals.
BibSched Internals
Describes information useful to understand the bibliographic task scheduler internals.
diff --git a/modules/websearch/lib/websearch_webinterface.py b/modules/websearch/lib/websearch_webinterface.py index f0efe9834..18d914f14 100644 --- a/modules/websearch/lib/websearch_webinterface.py +++ b/modules/websearch/lib/websearch_webinterface.py @@ -1,1160 +1,1163 @@ ## This file is part of Invenio. ## Copyright (C) 2006, 2007, 2008, 2009, 2010, 2011, 2012 CERN. ## ## Invenio is free software; you can redistribute it and/or ## modify it under the terms of the GNU General Public License as ## published by the Free Software Foundation; either version 2 of the ## License, or (at your option) any later version. ## ## Invenio is distributed in the hope that it will be useful, but ## WITHOUT ANY WARRANTY; without even the implied warranty of ## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ## General Public License for more details. ## ## You should have received a copy of the GNU General Public License ## along with Invenio; if not, write to the Free Software Foundation, Inc., ## 59 Temple Place, Suite 330, Boston, MA 02111-1307, USA. """WebSearch URL handler.""" __revision__ = "$Id$" import cgi import os import datetime import time import sys from urllib import quote from invenio import webinterface_handler_config as apache import threading #maximum number of collaborating authors etc shown in GUI MAX_COLLAB_LIST = 10 MAX_KEYWORD_LIST = 10 MAX_VENUE_LIST = 10 #tag constants AUTHOR_TAG = "100__a" AUTHOR_INST_TAG = "100__u" COAUTHOR_TAG = "700__a" COAUTHOR_INST_TAG = "700__u" VENUE_TAG = "909C4p" KEYWORD_TAG = "695__a" FKEYWORD_TAG = "6531_a" CFG_INSPIRE_UNWANTED_KEYWORDS_START = ['talk', 'conference', 'conference proceedings', 'numerical calculations', 'experimental results', 'review', 'bibliography', 'upper limit', 'lower limit', 'tables', 'search for', 'on-shell', 'off-shell', 'formula', 'lectures', 'book', 'thesis'] CFG_INSPIRE_UNWANTED_KEYWORDS_MIDDLE = ['GeV', '(('] if sys.hexversion < 0x2040000: # pylint: disable=W0622 from sets import Set as set # pylint: enable=W0622 from invenio.config import \ CFG_SITE_URL, \ CFG_SITE_NAME, \ CFG_CACHEDIR, \ CFG_SITE_LANG, \ CFG_SITE_SECURE_URL, \ CFG_BIBRANK_SHOW_DOWNLOAD_STATS, \ CFG_WEBSEARCH_INSTANT_BROWSE_RSS, \ CFG_WEBSEARCH_RSS_TTL, \ CFG_WEBSEARCH_RSS_MAX_CACHED_REQUESTS, \ CFG_WEBSEARCH_DEFAULT_SEARCH_INTERFACE, \ CFG_WEBDIR, \ CFG_WEBSEARCH_USE_MATHJAX_FOR_FORMATS, \ CFG_WEBSEARCH_MAX_RECORDS_IN_GROUPS, \ CFG_WEBSEARCH_PERMITTED_RESTRICTED_COLLECTIONS_LEVEL, \ CFG_WEBSEARCH_USE_ALEPH_SYSNOS, \ CFG_WEBSEARCH_RSS_I18N_COLLECTIONS, \ CFG_INSPIRE_SITE, \ CFG_WEBSEARCH_WILDCARD_LIMIT, \ CFG_SITE_RECORD from invenio.dbquery import Error from invenio.webinterface_handler import wash_urlargd, WebInterfaceDirectory from invenio.urlutils import redirect_to_url, make_canonical_urlargd, drop_default_urlargd from invenio.htmlutils import get_mathjax_header from invenio.htmlutils import nmtoken_from_string from invenio.webuser import getUid, page_not_authorized, get_user_preferences, \ collect_user_info, logoutUser, isUserSuperAdmin -from invenio.websubmit_webinterface import WebInterfaceFilesPages from invenio.webcomment_webinterface import WebInterfaceCommentsPages from invenio.bibcirculation_webinterface import WebInterfaceHoldingsPages from invenio.webpage import page, pageheaderonly, create_error_box from invenio.messages import gettext_set_language from invenio.search_engine import check_user_can_view_record, \ collection_reclist_cache, \ collection_restricted_p, \ create_similarly_named_authors_link_box, \ get_colID, \ get_coll_i18nname, \ get_fieldvalues_alephseq_like, \ get_most_popular_field_values, \ get_mysql_recid_from_aleph_sysno, \ guess_primary_collection_of_a_record, \ page_end, \ page_start, \ perform_request_cache, \ perform_request_log, \ perform_request_search, \ restricted_collection_cache, \ get_coll_normalised_name from invenio.search_engine_utils import get_fieldvalues from invenio.access_control_engine import acc_authorize_action from invenio.access_control_config import VIEWRESTRCOLL from invenio.access_control_mailcookie import mail_cookie_create_authorize_action from invenio.bibformat import format_records from invenio.bibformat_engine import get_output_formats from invenio.websearch_webcoll import get_collection from invenio.intbitset import intbitset from invenio.bibupload import find_record_from_sysno from invenio.bibrank_citation_searcher import get_cited_by_list from invenio.bibrank_downloads_indexer import get_download_weight_total from invenio.search_engine_summarizer import summarize_records from invenio.errorlib import register_exception from invenio.bibedit_webinterface import WebInterfaceEditPages from invenio.bibeditmulti_webinterface import WebInterfaceMultiEditPages from invenio.bibmerge_webinterface import WebInterfaceMergePages +from invenio.bibdocfile_webinterface import WebInterfaceManageDocFilesPages, WebInterfaceFilesPages from invenio.search_engine import get_record from invenio.shellutils import mymkdir import invenio.template websearch_templates = invenio.template.load('websearch') search_results_default_urlargd = websearch_templates.search_results_default_urlargd search_interface_default_urlargd = websearch_templates.search_interface_default_urlargd try: output_formats = [output_format['attrs']['code'].lower() for output_format in \ get_output_formats(with_attributes=True).values()] except KeyError: output_formats = ['xd', 'xm', 'hd', 'hb', 'hs', 'hx'] output_formats.extend(['hm', 't', 'h']) def wash_search_urlargd(form): """ Create canonical search arguments from those passed via web form. """ argd = wash_urlargd(form, search_results_default_urlargd) if argd.has_key('as'): argd['aas'] = argd['as'] del argd['as'] # Sometimes, users pass ot=245,700 instead of # ot=245&ot=700. Normalize that. ots = [] for ot in argd['ot']: ots += ot.split(',') argd['ot'] = ots # We can either get the mode of function as # action=, or by setting action_browse or # action_search. if argd['action_browse']: argd['action'] = 'browse' elif argd['action_search']: argd['action'] = 'search' else: if argd['action'] not in ('browse', 'search'): argd['action'] = 'search' del argd['action_browse'] del argd['action_search'] return argd class WebInterfaceUnAPIPages(WebInterfaceDirectory): """ Handle /unapi set of pages.""" _exports = [''] def __call__(self, req, form): argd = wash_urlargd(form, { 'id' : (int, 0), 'format' : (str, '')}) formats_dict = get_output_formats(True) formats = {} for format in formats_dict.values(): if format['attrs']['visibility']: formats[format['attrs']['code'].lower()] = format['attrs']['content_type'] del formats_dict if argd['id'] and argd['format']: ## Translate back common format names format = { 'nlm' : 'xn', 'marcxml' : 'xm', 'dc' : 'xd', 'endnote' : 'xe', 'mods' : 'xo' }.get(argd['format'], argd['format']) if format in formats: redirect_to_url(req, '%s/%s/%s/export/%s' % (CFG_SITE_URL, CFG_SITE_RECORD, argd['id'], format)) else: raise apache.SERVER_RETURN, apache.HTTP_NOT_ACCEPTABLE elif argd['id']: return websearch_templates.tmpl_unapi(formats, identifier=argd['id']) else: return websearch_templates.tmpl_unapi(formats) index = __call__ class WebInterfaceRecordPages(WebInterfaceDirectory): """ Handling of a /CFG_SITE_RECORD/ URL fragment """ _exports = ['', 'files', 'reviews', 'comments', 'usage', 'references', 'export', 'citations', 'holdings', 'edit', 'keywords', 'multiedit', 'merge', 'plots'] #_exports.extend(output_formats) def __init__(self, recid, tab, format=None): self.recid = recid self.tab = tab self.format = format self.files = WebInterfaceFilesPages(self.recid) self.reviews = WebInterfaceCommentsPages(self.recid, reviews=1) self.comments = WebInterfaceCommentsPages(self.recid) self.usage = self self.references = self self.keywords = self self.holdings = WebInterfaceHoldingsPages(self.recid) self.citations = self self.plots = self self.export = WebInterfaceRecordExport(self.recid, self.format) self.edit = WebInterfaceEditPages(self.recid) self.merge = WebInterfaceMergePages(self.recid) return def __call__(self, req, form): argd = wash_search_urlargd(form) argd['recid'] = self.recid argd['tab'] = self.tab if self.format is not None: argd['of'] = self.format req.argd = argd uid = getUid(req) if uid == -1: return page_not_authorized(req, "../", text="You are not authorized to view this record.", navmenuid='search') elif uid > 0: pref = get_user_preferences(uid) try: if not form.has_key('rg'): # fetch user rg preference only if not overridden via URL argd['rg'] = int(pref['websearch_group_records']) except (KeyError, ValueError): pass user_info = collect_user_info(req) (auth_code, auth_msg) = check_user_can_view_record(user_info, self.recid) if argd['rg'] > CFG_WEBSEARCH_MAX_RECORDS_IN_GROUPS and acc_authorize_action(req, 'runbibedit')[0] != 0: argd['rg'] = CFG_WEBSEARCH_MAX_RECORDS_IN_GROUPS #check if the user has rights to set a high wildcard limit #if not, reduce the limit set by user, with the default one if CFG_WEBSEARCH_WILDCARD_LIMIT > 0 and (argd['wl'] > CFG_WEBSEARCH_WILDCARD_LIMIT or argd['wl'] == 0): if acc_authorize_action(req, 'runbibedit')[0] != 0: argd['wl'] = CFG_WEBSEARCH_WILDCARD_LIMIT if auth_code and user_info['email'] == 'guest': cookie = mail_cookie_create_authorize_action(VIEWRESTRCOLL, {'collection' : guess_primary_collection_of_a_record(self.recid)}) target = CFG_SITE_SECURE_URL + '/youraccount/login' + \ make_canonical_urlargd({'action': cookie, 'ln' : argd['ln'], 'referer' : CFG_SITE_SECURE_URL + req.unparsed_uri}, {}) return redirect_to_url(req, target, norobot=True) elif auth_code: return page_not_authorized(req, "../", \ text=auth_msg, \ navmenuid='search') from invenio.search_engine import record_exists, get_merged_recid # check if the current record has been deleted # and has been merged, case in which the deleted record # will be redirect to the new one record_status = record_exists(argd['recid']) merged_recid = get_merged_recid(argd['recid']) if record_status == -1 and merged_recid: url = CFG_SITE_URL + '/' + CFG_SITE_RECORD + '/%s?ln=%s' url %= (str(merged_recid), argd['ln']) redirect_to_url(req, url) # mod_python does not like to return [] in case when of=id: out = perform_request_search(req, **argd) if out == []: return str(out) else: return out # Return the same page wether we ask for /CFG_SITE_RECORD/123 or /CFG_SITE_RECORD/123/ index = __call__ class WebInterfaceRecordRestrictedPages(WebInterfaceDirectory): """ Handling of a /record-restricted/ URL fragment """ _exports = ['', 'files', 'reviews', 'comments', 'usage', 'references', 'export', 'citations', 'holdings', 'edit', 'keywords', 'multiedit', 'merge', 'plots'] #_exports.extend(output_formats) def __init__(self, recid, tab, format=None): self.recid = recid self.tab = tab self.format = format self.files = WebInterfaceFilesPages(self.recid) self.reviews = WebInterfaceCommentsPages(self.recid, reviews=1) self.comments = WebInterfaceCommentsPages(self.recid) self.usage = self self.references = self self.keywords = self self.holdings = WebInterfaceHoldingsPages(self.recid) self.citations = self self.plots = self self.export = WebInterfaceRecordExport(self.recid, self.format) self.edit = WebInterfaceEditPages(self.recid) self.merge = WebInterfaceMergePages(self.recid) return def __call__(self, req, form): argd = wash_search_urlargd(form) argd['recid'] = self.recid if self.format is not None: argd['of'] = self.format req.argd = argd uid = getUid(req) user_info = collect_user_info(req) if uid == -1: return page_not_authorized(req, "../", text="You are not authorized to view this record.", navmenuid='search') elif uid > 0: pref = get_user_preferences(uid) try: if not form.has_key('rg'): # fetch user rg preference only if not overridden via URL argd['rg'] = int(pref['websearch_group_records']) except (KeyError, ValueError): pass if argd['rg'] > CFG_WEBSEARCH_MAX_RECORDS_IN_GROUPS and acc_authorize_action(req, 'runbibedit')[0] != 0: argd['rg'] = CFG_WEBSEARCH_MAX_RECORDS_IN_GROUPS #check if the user has rights to set a high wildcard limit #if not, reduce the limit set by user, with the default one if CFG_WEBSEARCH_WILDCARD_LIMIT > 0 and (argd['wl'] > CFG_WEBSEARCH_WILDCARD_LIMIT or argd['wl'] == 0): if acc_authorize_action(req, 'runbibedit')[0] != 0: argd['wl'] = CFG_WEBSEARCH_WILDCARD_LIMIT record_primary_collection = guess_primary_collection_of_a_record(self.recid) if collection_restricted_p(record_primary_collection): (auth_code, dummy) = acc_authorize_action(user_info, VIEWRESTRCOLL, collection=record_primary_collection) if auth_code: return page_not_authorized(req, "../", text="You are not authorized to view this record.", navmenuid='search') # Keep all the arguments, they might be reused in the # record page itself to derivate other queries req.argd = argd # mod_python does not like to return [] in case when of=id: out = perform_request_search(req, **argd) if out == []: return str(out) else: return out # Return the same page wether we ask for /CFG_SITE_RECORD/123 or /CFG_SITE_RECORD/123/ index = __call__ class WebInterfaceSearchResultsPages(WebInterfaceDirectory): """ Handling of the /search URL and its sub-pages. """ _exports = ['', 'authenticate', 'cache', 'log'] def __call__(self, req, form): """ Perform a search. """ argd = wash_search_urlargd(form) _ = gettext_set_language(argd['ln']) if req.method == 'POST': raise apache.SERVER_RETURN, apache.HTTP_METHOD_NOT_ALLOWED uid = getUid(req) user_info = collect_user_info(req) if uid == -1: return page_not_authorized(req, "../", text=_("You are not authorized to view this area."), navmenuid='search') elif uid > 0: pref = get_user_preferences(uid) try: if not form.has_key('rg'): # fetch user rg preference only if not overridden via URL argd['rg'] = int(pref['websearch_group_records']) except (KeyError, ValueError): pass if CFG_WEBSEARCH_PERMITTED_RESTRICTED_COLLECTIONS_LEVEL == 2: ## Let's update the current collections list with all ## the restricted collections the user has rights to view. try: restricted_collections = user_info['precached_permitted_restricted_collections'] argd_collections = set(argd['c']) argd_collections.update(restricted_collections) argd['c'] = list(argd_collections) except KeyError: pass if argd['rg'] > CFG_WEBSEARCH_MAX_RECORDS_IN_GROUPS and acc_authorize_action(req, 'runbibedit')[0] != 0: argd['rg'] = CFG_WEBSEARCH_MAX_RECORDS_IN_GROUPS involved_collections = set() involved_collections.update(argd['c']) involved_collections.add(argd['cc']) if argd['id'] > 0: argd['recid'] = argd['id'] if argd['idb'] > 0: argd['recidb'] = argd['idb'] if argd['sysno']: tmp_recid = find_record_from_sysno(argd['sysno']) if tmp_recid: argd['recid'] = tmp_recid if argd['sysnb']: tmp_recid = find_record_from_sysno(argd['sysnb']) if tmp_recid: argd['recidb'] = tmp_recid if argd['recid'] > 0: if argd['recidb'] > argd['recid']: # Hack to check if among the restricted collections # at least a record of the range is there and # then if the user is not authorized for that # collection. recids = intbitset(xrange(argd['recid'], argd['recidb'])) restricted_collection_cache.recreate_cache_if_needed() for collname in restricted_collection_cache.cache: (auth_code, auth_msg) = acc_authorize_action(user_info, VIEWRESTRCOLL, collection=collname) if auth_code and user_info['email'] == 'guest': coll_recids = get_collection(collname).reclist if coll_recids & recids: cookie = mail_cookie_create_authorize_action(VIEWRESTRCOLL, {'collection' : collname}) target = CFG_SITE_SECURE_URL + '/youraccount/login' + \ make_canonical_urlargd({'action': cookie, 'ln' : argd['ln'], 'referer' : CFG_SITE_SECURE_URL + req.unparsed_uri}, {}) return redirect_to_url(req, target, norobot=True) elif auth_code: return page_not_authorized(req, "../", \ text=auth_msg, \ navmenuid='search') else: involved_collections.add(guess_primary_collection_of_a_record(argd['recid'])) # If any of the collection requires authentication, redirect # to the authentication form. for coll in involved_collections: if collection_restricted_p(coll): (auth_code, auth_msg) = acc_authorize_action(user_info, VIEWRESTRCOLL, collection=coll) if auth_code and user_info['email'] == 'guest': cookie = mail_cookie_create_authorize_action(VIEWRESTRCOLL, {'collection' : coll}) target = CFG_SITE_SECURE_URL + '/youraccount/login' + \ make_canonical_urlargd({'action': cookie, 'ln' : argd['ln'], 'referer' : CFG_SITE_SECURE_URL + req.unparsed_uri}, {}) return redirect_to_url(req, target, norobot=True) elif auth_code: return page_not_authorized(req, "../", \ text=auth_msg, \ navmenuid='search') #check if the user has rights to set a high wildcard limit #if not, reduce the limit set by user, with the default one if CFG_WEBSEARCH_WILDCARD_LIMIT > 0 and (argd['wl'] > CFG_WEBSEARCH_WILDCARD_LIMIT or argd['wl'] == 0): auth_code, auth_message = acc_authorize_action(req, 'runbibedit') if auth_code != 0: argd['wl'] = CFG_WEBSEARCH_WILDCARD_LIMIT # Keep all the arguments, they might be reused in the # search_engine itself to derivate other queries req.argd = argd # mod_python does not like to return [] in case when of=id: out = perform_request_search(req, **argd) if out == []: return str(out) else: return out def cache(self, req, form): """Search cache page.""" argd = wash_urlargd(form, {'action': (str, 'show')}) return perform_request_cache(req, action=argd['action']) def log(self, req, form): """Search log page.""" argd = wash_urlargd(form, {'date': (str, '')}) return perform_request_log(req, date=argd['date']) def authenticate(self, req, form): """Restricted search results pages.""" argd = wash_search_urlargd(form) user_info = collect_user_info(req) for coll in argd['c'] + [argd['cc']]: if collection_restricted_p(coll): (auth_code, auth_msg) = acc_authorize_action(user_info, VIEWRESTRCOLL, collection=coll) if auth_code and user_info['email'] == 'guest': cookie = mail_cookie_create_authorize_action(VIEWRESTRCOLL, {'collection' : coll}) target = CFG_SITE_SECURE_URL + '/youraccount/login' + \ make_canonical_urlargd({'action': cookie, 'ln' : argd['ln'], 'referer' : CFG_SITE_SECURE_URL + req.unparsed_uri}, {}) return redirect_to_url(req, target, norobot=True) elif auth_code: return page_not_authorized(req, "../", \ text=auth_msg, \ navmenuid='search') #check if the user has rights to set a high wildcard limit #if not, reduce the limit set by user, with the default one if CFG_WEBSEARCH_WILDCARD_LIMIT > 0 and (argd['wl'] > CFG_WEBSEARCH_WILDCARD_LIMIT or argd['wl'] == 0): auth_code, auth_message = acc_authorize_action(req, 'runbibedit') if auth_code != 0: argd['wl'] = CFG_WEBSEARCH_WILDCARD_LIMIT # Keep all the arguments, they might be reused in the # search_engine itself to derivate other queries req.argd = argd uid = getUid(req) if uid > 0: pref = get_user_preferences(uid) try: if not form.has_key('rg'): # fetch user rg preference only if not overridden via URL argd['rg'] = int(pref['websearch_group_records']) except (KeyError, ValueError): pass # mod_python does not like to return [] in case when of=id: out = perform_request_search(req, **argd) if out == []: return str(out) else: return out index = __call__ class WebInterfaceLegacySearchPages(WebInterfaceDirectory): """ Handling of the /search.py URL and its sub-pages. """ _exports = ['', ('authenticate', 'index')] def __call__(self, req, form): """ Perform a search. """ argd = wash_search_urlargd(form) # We either jump into the generic search form, or the specific # /CFG_SITE_RECORD/... display if a recid is requested if argd['recid'] != -1: target = '/%s/%d' % (CFG_SITE_RECORD, argd['recid']) del argd['recid'] else: target = '/search' target += make_canonical_urlargd(argd, search_results_default_urlargd) return redirect_to_url(req, target, apache.HTTP_MOVED_PERMANENTLY) index = __call__ # Parameters for the legacy URLs, of the form /?c=ALEPH legacy_collection_default_urlargd = { 'as': (int, CFG_WEBSEARCH_DEFAULT_SEARCH_INTERFACE), 'aas': (int, CFG_WEBSEARCH_DEFAULT_SEARCH_INTERFACE), 'verbose': (int, 0), 'c': (str, CFG_SITE_NAME)} class WebInterfaceSearchInterfacePages(WebInterfaceDirectory): """ Handling of collection navigation.""" _exports = [('index.py', 'legacy_collection'), ('', 'legacy_collection'), ('search.py', 'legacy_search'), 'search', 'openurl', 'opensearchdescription', 'logout_SSO_hook'] search = WebInterfaceSearchResultsPages() legacy_search = WebInterfaceLegacySearchPages() def logout_SSO_hook(self, req, form): """Script triggered by the display of the centralized SSO logout dialog. It logouts the user from Invenio and stream back the expected picture.""" logoutUser(req) req.content_type = 'image/gif' req.encoding = None req.filename = 'wsignout.gif' req.headers_out["Content-Disposition"] = "inline; filename=wsignout.gif" req.set_content_length(os.path.getsize('%s/img/wsignout.gif' % CFG_WEBDIR)) req.send_http_header() req.sendfile('%s/img/wsignout.gif' % CFG_WEBDIR) def _lookup(self, component, path): """ This handler is invoked for the dynamic URLs (for collections and records)""" if component == 'collection': c = '/'.join(path) def answer(req, form): """Accessing collections cached pages.""" # Accessing collections: this is for accessing the # cached page on top of each collection. argd = wash_urlargd(form, search_interface_default_urlargd) # We simply return the cached page of the collection argd['c'] = c if not argd['c']: # collection argument not present; display # home collection by default argd['c'] = CFG_SITE_NAME # Treat `as' argument specially: if argd.has_key('as'): argd['aas'] = argd['as'] del argd['as'] return display_collection(req, **argd) return answer, [] elif component == CFG_SITE_RECORD and path and path[0] == 'merge': return WebInterfaceMergePages(), path[1:] elif component == CFG_SITE_RECORD and path and path[0] == 'edit': return WebInterfaceEditPages(), path[1:] elif component == CFG_SITE_RECORD and path and path[0] == 'multiedit': return WebInterfaceMultiEditPages(), path[1:] + elif component == CFG_SITE_RECORD and path and path[0] in ('managedocfiles', 'managedocfilesasync'): + return WebInterfaceManageDocFilesPages(), path + elif component == CFG_SITE_RECORD or component == 'record-restricted': try: if CFG_WEBSEARCH_USE_ALEPH_SYSNOS: # let us try to recognize // style of URLs: # check for SYSNOs with an embedded slash; needed for [ARXIVINV-15] if len(path) > 1 and get_mysql_recid_from_aleph_sysno(path[0] + "/" + path[1]): path[0] = path[0] + "/" + path[1] del path[1] x = get_mysql_recid_from_aleph_sysno(path[0]) if x: recid = x else: recid = int(path[0]) else: recid = int(path[0]) except IndexError: # display record #1 for URL /CFG_SITE_RECORD without a number recid = 1 except ValueError: if path[0] == '': # display record #1 for URL /CFG_SITE_RECORD/ without a number recid = 1 else: # display page not found for URLs like /CFG_SITE_RECORD/foo return None, [] from invenio.intbitset import __maxelem__ if recid <= 0 or recid > __maxelem__: # __maxelem__ = 2147483647 # display page not found for URLs like /CFG_SITE_RECORD/-5 or /CFG_SITE_RECORD/0 or /CFG_SITE_RECORD/2147483649 return None, [] format = None tab = '' try: if path[1] in ['', 'files', 'reviews', 'comments', 'usage', 'references', 'citations', 'holdings', 'edit', 'keywords', 'multiedit', 'merge', 'plots']: tab = path[1] elif path[1] == 'export': tab = '' format = path[2] # format = None # elif path[1] in output_formats: # tab = '' # format = path[1] else: # display page not found for URLs like /CFG_SITE_RECORD/references # for a collection where 'references' tabs is not visible return None, [] except IndexError: # Keep normal url if tabs is not specified pass #if component == 'record-restricted': #return WebInterfaceRecordRestrictedPages(recid, tab, format), path[1:] #else: return WebInterfaceRecordPages(recid, tab, format), path[1:] elif component == 'sslredirect': ## Fallback solution for sslredirect special path that should ## be rather implemented as an Apache level redirection def redirecter(req, form): real_url = "http://" + '/'.join(path) redirect_to_url(req, real_url) return redirecter, [] return None, [] def openurl(self, req, form): """ OpenURL Handler.""" argd = wash_urlargd(form, websearch_templates.tmpl_openurl_accepted_args) ret_url = websearch_templates.tmpl_openurl2invenio(argd) if ret_url: return redirect_to_url(req, ret_url) else: return redirect_to_url(req, CFG_SITE_URL) def opensearchdescription(self, req, form): """OpenSearch description file""" req.content_type = "application/opensearchdescription+xml" req.send_http_header() argd = wash_urlargd(form, {'ln': (str, CFG_SITE_LANG), 'verbose': (int, 0) }) return websearch_templates.tmpl_opensearch_description(ln=argd['ln']) def legacy_collection(self, req, form): """Collection URL backward compatibility handling.""" accepted_args = dict(legacy_collection_default_urlargd) argd = wash_urlargd(form, accepted_args) # Treat `as' argument specially: if argd.has_key('as'): argd['aas'] = argd['as'] del argd['as'] # If we specify no collection, then we don't need to redirect # the user, so that accessing returns the # default collection. if not form.has_key('c'): return display_collection(req, **argd) # make the collection an element of the path, and keep the # other query elements as is. If the collection is CFG_SITE_NAME, # however, redirect to the main URL. c = argd['c'] del argd['c'] if c == CFG_SITE_NAME: target = '/' else: target = '/collection/' + quote(c) # Treat `as' argument specially: # We are going to redirect, so replace `aas' by `as' visible argument: if argd.has_key('aas'): argd['as'] = argd['aas'] del argd['aas'] target += make_canonical_urlargd(argd, legacy_collection_default_urlargd) return redirect_to_url(req, target) def display_collection(req, c, aas, verbose, ln): """Display search interface page for collection c by looking in the collection cache.""" _ = gettext_set_language(ln) req.argd = drop_default_urlargd({'aas': aas, 'verbose': verbose, 'ln': ln}, search_interface_default_urlargd) # get user ID: try: uid = getUid(req) user_preferences = {} if uid == -1: return page_not_authorized(req, "../", text="You are not authorized to view this collection", navmenuid='search') elif uid > 0: user_preferences = get_user_preferences(uid) except Error: register_exception(req=req, alert_admin=True) return page(title=_("Internal Error"), body=create_error_box(req, verbose=verbose, ln=ln), description="%s - Internal Error" % CFG_SITE_NAME, keywords="%s, Internal Error" % CFG_SITE_NAME, language=ln, req=req, navmenuid='search') # start display: req.content_type = "text/html" req.send_http_header() # deduce collection id: colID = get_colID(get_coll_normalised_name(c)) if type(colID) is not int: page_body = '

' + (_("Sorry, collection %s does not seem to exist.") % ('' + str(c) + '')) + '

' page_body = '

' + (_("You may want to start browsing from %s.") % ('' + get_coll_i18nname(CFG_SITE_NAME, ln) + '')) + '

' if req.header_only: raise apache.SERVER_RETURN, apache.HTTP_NOT_FOUND return page(title=_("Collection %s Not Found") % cgi.escape(c), body=page_body, description=(CFG_SITE_NAME + ' - ' + _("Not found") + ': ' + cgi.escape(str(c))), keywords="%s" % CFG_SITE_NAME, uid=uid, language=ln, req=req, navmenuid='search') # wash `aas' argument: if not os.path.exists("%s/collections/%d/body-as=%d-ln=%s.html" % \ (CFG_CACHEDIR, colID, aas, ln)): # nonexistent `aas' asked for, fall back to Simple Search: aas = 0 # display collection interface page: try: filedesc = open("%s/collections/%d/navtrail-as=%d-ln=%s.html" % \ (CFG_CACHEDIR, colID, aas, ln), "r") c_navtrail = filedesc.read() filedesc.close() except: c_navtrail = "" try: filedesc = open("%s/collections/%d/body-as=%d-ln=%s.html" % \ (CFG_CACHEDIR, colID, aas, ln), "r") c_body = filedesc.read() filedesc.close() except: c_body = "" try: filedesc = open("%s/collections/%d/portalbox-tp-ln=%s.html" % \ (CFG_CACHEDIR, colID, ln), "r") c_portalbox_tp = filedesc.read() filedesc.close() except: c_portalbox_tp = "" try: filedesc = open("%s/collections/%d/portalbox-te-ln=%s.html" % \ (CFG_CACHEDIR, colID, ln), "r") c_portalbox_te = filedesc.read() filedesc.close() except: c_portalbox_te = "" try: filedesc = open("%s/collections/%d/portalbox-lt-ln=%s.html" % \ (CFG_CACHEDIR, colID, ln), "r") c_portalbox_lt = filedesc.read() filedesc.close() except: c_portalbox_lt = "" try: # show help boxes (usually located in "tr", "top right") # if users have not banned them in their preferences: c_portalbox_rt = "" if user_preferences.get('websearch_helpbox', 1) > 0: filedesc = open("%s/collections/%d/portalbox-rt-ln=%s.html" % \ (CFG_CACHEDIR, colID, ln), "r") c_portalbox_rt = filedesc.read() filedesc.close() except: c_portalbox_rt = "" try: filedesc = open("%s/collections/%d/last-updated-ln=%s.html" % \ (CFG_CACHEDIR, colID, ln), "r") c_last_updated = filedesc.read() filedesc.close() except: c_last_updated = "" try: title = get_coll_i18nname(c, ln) except: title = "" show_title_p = True body_css_classes = [] if c == CFG_SITE_NAME: # Do not display title on home collection show_title_p = False body_css_classes.append('home') if len(collection_reclist_cache.cache.keys()) == 1: # if there is only one collection defined, do not print its # title on the page as it would be displayed repetitively. show_title_p = False if aas == -1: show_title_p = False if CFG_INSPIRE_SITE == 1: # INSPIRE should never show title, but instead use css to # style collections show_title_p = False body_css_classes.append(nmtoken_from_string(c)) # RSS: rssurl = CFG_SITE_URL + '/rss' rssurl_params = [] if c != CFG_SITE_NAME: rssurl_params.append('cc=' + quote(c)) if ln != CFG_SITE_LANG and \ c in CFG_WEBSEARCH_RSS_I18N_COLLECTIONS: rssurl_params.append('ln=' + ln) if rssurl_params: rssurl += '?' + '&'.join(rssurl_params) if 'hb' in CFG_WEBSEARCH_USE_MATHJAX_FOR_FORMATS: metaheaderadd = get_mathjax_header(req.is_https()) else: metaheaderadd = '' return page(title=title, body=c_body, navtrail=c_navtrail, description="%s - %s" % (CFG_SITE_NAME, c), keywords="%s, %s" % (CFG_SITE_NAME, c), metaheaderadd=metaheaderadd, uid=uid, language=ln, req=req, cdspageboxlefttopadd=c_portalbox_lt, cdspageboxrighttopadd=c_portalbox_rt, titleprologue=c_portalbox_tp, titleepilogue=c_portalbox_te, lastupdated=c_last_updated, navmenuid='search', rssurl=rssurl, body_css_classes=body_css_classes, show_title_p=show_title_p) class WebInterfaceRSSFeedServicePages(WebInterfaceDirectory): """RSS 2.0 feed service pages.""" def __call__(self, req, form): """RSS 2.0 feed service.""" # Keep only interesting parameters for the search default_params = websearch_templates.rss_default_urlargd # We need to keep 'jrec' and 'rg' here in order to have # 'multi-page' RSS. These parameters are not kept be default # as we don't want to consider them when building RSS links # from search and browse pages. default_params.update({'jrec':(int, 1), 'rg': (int, CFG_WEBSEARCH_INSTANT_BROWSE_RSS)}) argd = wash_urlargd(form, default_params) user_info = collect_user_info(req) for coll in argd['c'] + [argd['cc']]: if collection_restricted_p(coll): (auth_code, auth_msg) = acc_authorize_action(user_info, VIEWRESTRCOLL, collection=coll) if auth_code and user_info['email'] == 'guest': cookie = mail_cookie_create_authorize_action(VIEWRESTRCOLL, {'collection' : coll}) target = CFG_SITE_SECURE_URL + '/youraccount/login' + \ make_canonical_urlargd({'action': cookie, 'ln' : argd['ln'], 'referer' : CFG_SITE_SECURE_URL + req.unparsed_uri}, {}) return redirect_to_url(req, target, norobot=True) elif auth_code: return page_not_authorized(req, "../", \ text=auth_msg, \ navmenuid='search') # Create a standard filename with these parameters current_url = websearch_templates.build_rss_url(argd) cache_filename = current_url.split('/')[-1] # In the same way as previously, add 'jrec' & 'rg' req.content_type = "application/rss+xml" req.send_http_header() try: # Try to read from cache path = "%s/rss/%s.xml" % (CFG_CACHEDIR, cache_filename) # Check if cache needs refresh filedesc = open(path, "r") last_update_time = datetime.datetime.fromtimestamp(os.stat(os.path.abspath(path)).st_mtime) assert(datetime.datetime.now() < last_update_time + datetime.timedelta(minutes=CFG_WEBSEARCH_RSS_TTL)) c_rss = filedesc.read() filedesc.close() req.write(c_rss) return except Exception, e: # do it live and cache previous_url = None if argd['jrec'] > 1: prev_jrec = argd['jrec'] - argd['rg'] if prev_jrec < 1: prev_jrec = 1 previous_url = websearch_templates.build_rss_url(argd, jrec=prev_jrec) #check if the user has rights to set a high wildcard limit #if not, reduce the limit set by user, with the default one if CFG_WEBSEARCH_WILDCARD_LIMIT > 0 and (argd['wl'] > CFG_WEBSEARCH_WILDCARD_LIMIT or argd['wl'] == 0): if acc_authorize_action(req, 'runbibedit')[0] != 0: argd['wl'] = CFG_WEBSEARCH_WILDCARD_LIMIT recIDs = perform_request_search(req, of="id", c=argd['c'], cc=argd['cc'], p=argd['p'], f=argd['f'], p1=argd['p1'], f1=argd['f1'], m1=argd['m1'], op1=argd['op1'], p2=argd['p2'], f2=argd['f2'], m2=argd['m2'], op2=argd['op2'], p3=argd['p3'], f3=argd['f3'], m3=argd['m3'], wl=argd['wl']) nb_found = len(recIDs) next_url = None if len(recIDs) >= argd['jrec'] + argd['rg']: next_url = websearch_templates.build_rss_url(argd, jrec=(argd['jrec'] + argd['rg'])) first_url = websearch_templates.build_rss_url(argd, jrec=1) last_url = websearch_templates.build_rss_url(argd, jrec=nb_found - argd['rg'] + 1) recIDs = recIDs[-argd['jrec']:(-argd['rg'] - argd['jrec']):-1] rss_prologue = '\n' + \ websearch_templates.tmpl_xml_rss_prologue(current_url=current_url, previous_url=previous_url, next_url=next_url, first_url=first_url, last_url=last_url, nb_found=nb_found, jrec=argd['jrec'], rg=argd['rg'], cc=argd['cc']) + '\n' req.write(rss_prologue) rss_body = format_records(recIDs, of='xr', ln=argd['ln'], user_info=user_info, record_separator="\n", req=req, epilogue="\n") rss_epilogue = websearch_templates.tmpl_xml_rss_epilogue() + '\n' req.write(rss_epilogue) # update cache dirname = "%s/rss" % (CFG_CACHEDIR) mymkdir(dirname) fullfilename = "%s/rss/%s.xml" % (CFG_CACHEDIR, cache_filename) try: # Remove the file just in case it already existed # so that a bit of space is created os.remove(fullfilename) except OSError: pass # Check if there's enough space to cache the request. if len(os.listdir(dirname)) < CFG_WEBSEARCH_RSS_MAX_CACHED_REQUESTS: try: os.umask(022) f = open(fullfilename, "w") f.write(rss_prologue + rss_body + rss_epilogue) f.close() except IOError, v: if v[0] == 36: # URL was too long. Never mind, don't cache pass else: raise repr(v) index = __call__ class WebInterfaceRecordExport(WebInterfaceDirectory): """ Handling of a ///export/ URL fragment """ _exports = output_formats def __init__(self, recid, format=None): self.recid = recid self.format = format for output_format in output_formats: self.__dict__[output_format] = self return def __call__(self, req, form): argd = wash_search_urlargd(form) argd['recid'] = self.recid if self.format is not None: argd['of'] = self.format req.argd = argd uid = getUid(req) if uid == -1: return page_not_authorized(req, "../", text="You are not authorized to view this record.", navmenuid='search') elif uid > 0: pref = get_user_preferences(uid) try: if not form.has_key('rg'): # fetch user rg preference only if not overridden via URL argd['rg'] = int(pref['websearch_group_records']) except (KeyError, ValueError): pass # Check if the record belongs to a restricted primary # collection. If yes, redirect to the authenticated URL. user_info = collect_user_info(req) (auth_code, auth_msg) = check_user_can_view_record(user_info, self.recid) if argd['rg'] > CFG_WEBSEARCH_MAX_RECORDS_IN_GROUPS and acc_authorize_action(req, 'runbibedit')[0] != 0: argd['rg'] = CFG_WEBSEARCH_MAX_RECORDS_IN_GROUPS #check if the user has rights to set a high wildcard limit #if not, reduce the limit set by user, with the default one if CFG_WEBSEARCH_WILDCARD_LIMIT > 0 and (argd['wl'] > CFG_WEBSEARCH_WILDCARD_LIMIT or argd['wl'] == 0): if acc_authorize_action(req, 'runbibedit')[0] != 0: argd['wl'] = CFG_WEBSEARCH_WILDCARD_LIMIT if auth_code and user_info['email'] == 'guest': cookie = mail_cookie_create_authorize_action(VIEWRESTRCOLL, {'collection' : guess_primary_collection_of_a_record(self.recid)}) target = CFG_SITE_SECURE_URL + '/youraccount/login' + \ make_canonical_urlargd({'action': cookie, 'ln' : argd['ln'], 'referer' : CFG_SITE_SECURE_URL + req.unparsed_uri}, {}) return redirect_to_url(req, target, norobot=True) elif auth_code: return page_not_authorized(req, "../", \ text=auth_msg, \ navmenuid='search') # mod_python does not like to return [] in case when of=id: out = perform_request_search(req, **argd) if out == []: return str(out) else: return out # Return the same page wether we ask for /CFG_SITE_RECORD/123/export/xm or /CFG_SITE_RECORD/123/export/xm/ index = __call__ diff --git a/modules/websession/lib/websession_templates.py b/modules/websession/lib/websession_templates.py index 1ed5a8dd8..92f6ce047 100644 --- a/modules/websession/lib/websession_templates.py +++ b/modules/websession/lib/websession_templates.py @@ -1,2566 +1,2566 @@ ## This file is part of Invenio. ## Copyright (C) 2005, 2006, 2007, 2008, 2009, 2010, 2011, 2012 CERN. ## ## Invenio is free software; you can redistribute it and/or ## modify it under the terms of the GNU General Public License as ## published by the Free Software Foundation; either version 2 of the ## License, or (at your option) any later version. ## ## Invenio is distributed in the hope that it will be useful, but ## WITHOUT ANY WARRANTY; without even the implied warranty of ## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ## General Public License for more details. ## ## You should have received a copy of the GNU General Public License ## along with Invenio; if not, write to the Free Software Foundation, Inc., ## 59 Temple Place, Suite 330, Boston, MA 02111-1307, USA. __revision__ = "$Id$" import urllib import cgi from invenio.config import \ CFG_CERN_SITE, \ CFG_SITE_LANG, \ CFG_SITE_NAME, \ CFG_SITE_NAME_INTL, \ CFG_SITE_SUPPORT_EMAIL, \ CFG_SITE_SECURE_URL, \ CFG_SITE_URL, \ CFG_WEBSESSION_RESET_PASSWORD_EXPIRE_IN_DAYS, \ CFG_WEBSESSION_ADDRESS_ACTIVATION_EXPIRE_IN_DAYS, \ CFG_WEBSESSION_DIFFERENTIATE_BETWEEN_GUESTS, \ CFG_WEBSEARCH_MAX_RECORDS_IN_GROUPS, \ CFG_ACCESS_CONTROL_LEVEL_ACCOUNTS, \ CFG_SITE_RECORD from invenio.access_control_config import CFG_EXTERNAL_AUTH_USING_SSO, \ CFG_EXTERNAL_AUTH_LOGOUT_SSO, CFG_WEB_API_KEY_STATUS from invenio.urlutils import make_canonical_urlargd, create_url, create_html_link from invenio.htmlutils import escape_html, nmtoken_from_string from invenio.messages import gettext_set_language, language_list_long from invenio.websession_config import CFG_WEBSESSION_GROUP_JOIN_POLICY class Template: def tmpl_back_form(self, ln, message, url, link): """ A standard one-message-go-back-link page. Parameters: - 'ln' *string* - The language to display the interface in - 'message' *string* - The message to display - 'url' *string* - The url to go back to - 'link' *string* - The link text """ out = """
%(message)s %(link)s
"""% { 'message' : message, 'url' : url, 'link' : link, 'ln' : ln } return out def tmpl_external_setting(self, ln, key, value): _ = gettext_set_language(ln) out = """ %s: %s """ % (key, value) return out def tmpl_external_user_settings(self, ln, html_settings): _ = gettext_set_language(ln) out = """

%(external_user_settings)s

%(html_settings)s

%(external_user_groups)s

%(consult_external_groups)s

""" % { 'external_user_settings' : _('External account settings'), 'html_settings' : html_settings, 'consult_external_groups' : _('You can consult the list of your external groups directly in the %(x_url_open)sgroups page%(x_url_close)s.') % { 'x_url_open' : '' % ln, 'x_url_close' : '' }, 'external_user_groups' : _('External user groups'), } return out def tmpl_user_api_key(self, ln=CFG_SITE_LANG, keys_info=None): """ Displays all the API key that the user owns the user Parameters: - 'ln' *string* - The language to display the interface in - 'key_info' *tuples* - Contains the tuples with the key data (id, desciption, status) """ # load the right message language _ = gettext_set_language(ln) out = """

%(user_api_key)s

""" % { 'user_api_key' : _("API keys") } if keys_info and len(keys_info) != 0: out += "

%(user_keys)s

" % {'user_keys': _("These are your current API keys")} out += """ """ for key_info in keys_info: out += """ """ % { 'key_description': _("Description: " + cgi.escape(key_info[1])), 'key_status': _("Status: " + key_info[2]), 'key_id': _(key_info[0]), 'index': keys_info.index(key_info), 'key_label': _("API key"), 'remove_key' : _("Delete key"), 'sitesecureurl': CFG_SITE_SECURE_URL, 'input_type': ("submit", "hidden")[key_info[2] == CFG_WEB_API_KEY_STATUS['REVOKED']] } out += "
%(key_description)s %(key_status)s
%(key_label)s %(key_id)s
" out += """

%(create_new_key)s


(%(mandatory)s)

%(note)s: %(new_key_description_note)s
""" % { 'create_new_key' : _("If you want to create a new API key, please enter a description for it"), 'new_key_description_label' : _("Description for the new API key"), 'mandatory' : _("mandatory"), 'note' : _("Note"), 'new_key_description_note': _("The description should be something meaningful for you to recognize the API key"), 'create_new_key_button' : _("Create new key"), 'sitesecureurl': CFG_SITE_SECURE_URL } return out def tmpl_user_preferences(self, ln, email, email_disabled, password_disabled, nickname): """ Displays a form for the user to change his email/password. Parameters: - 'ln' *string* - The language to display the interface in - 'email' *string* - The email of the user - 'email_disabled' *boolean* - If the user has the right to edit his email - 'password_disabled' *boolean* - If the user has the right to edit his password - 'nickname' *string* - The nickname of the user (empty string if user does not have it) """ # load the right message language _ = gettext_set_language(ln) out = """

%(edit_params)s

%(change_user)s


(%(mandatory)s)
%(nickname_prefix)s%(nickname)s%(nickname_suffix)s
%(note)s: %(fixed_nickname_note)s

(%(mandatory)s)

%(example)s: john.doe@example.com
   
""" % { 'change_user' : _("If you want to change your email or set for the first time your nickname, please set new values in the form below."), 'edit_params' : _("Edit login credentials"), 'nickname_label' : _("Nickname"), 'nickname' : nickname, 'nickname_prefix' : nickname=='' and '
'+_("Example")+':johnd' or '', 'new_email' : _("New email address"), 'mandatory' : _("mandatory"), 'example' : _("Example"), 'note' : _("Note"), 'set_values' : _("Set new values"), 'email' : email, 'email_disabled' : email_disabled and "readonly" or "", 'sitesecureurl': CFG_SITE_SECURE_URL, 'fixed_nickname_note' : _('Since this is considered as a signature for comments and reviews, once set it can not be changed.') } if not password_disabled and not CFG_EXTERNAL_AUTH_USING_SSO: out += """

%(change_pass)s



%(note)s: %(old_password_note)s


%(note)s: %(password_note)s
   
""" % { 'change_pass' : _("If you want to change your password, please enter the old one and set the new value in the form below."), 'mandatory' : _("mandatory"), 'old_password' : _("Old password"), 'new_password' : _("New password"), 'optional' : _("optional"), 'note' : _("Note"), 'password_note' : _("The password phrase may contain punctuation, spaces, etc."), 'old_password_note' : _("You must fill the old password in order to set a new one."), 'retype_password' : _("Retype password"), 'set_values' : _("Set new password"), 'password_disabled' : password_disabled and "disabled" or "", 'sitesecureurl': CFG_SITE_SECURE_URL, } elif not CFG_EXTERNAL_AUTH_USING_SSO and CFG_CERN_SITE: out += "

" + _("""If you are using a lightweight CERN account you can %(x_url_open)sreset the password%(x_url_close)s.""") % \ {'x_url_open' : \ '' \ % (make_canonical_urlargd({'email': email, 'returnurl' : CFG_SITE_SECURE_URL + '/youraccount/edit' + make_canonical_urlargd({'lang' : ln}, {})}, {})), 'x_url_close' : ''} + "

" elif CFG_EXTERNAL_AUTH_USING_SSO and CFG_CERN_SITE: out += "

" + _("""You can change or reset your CERN account password by means of the %(x_url_open)sCERN account system%(x_url_close)s.""") % \ {'x_url_open' : '', 'x_url_close' : ''} + "

" return out def tmpl_user_bibcatalog_auth(self, bibcatalog_username="", bibcatalog_password="", ln=CFG_SITE_LANG): """template for setting username and pw for bibcatalog backend""" _ = gettext_set_language(ln) out = """

%(edit_bibcatalog_settings)s

%(username)s: %(password)s:
""" % { 'sitesecureurl' : CFG_SITE_SECURE_URL, 'bibcatalog_username' : bibcatalog_username, 'bibcatalog_password' : bibcatalog_password, 'edit_bibcatalog_settings' : _("Edit cataloging interface settings"), 'username' : _("Username"), 'password' : _("Password"), 'update_settings' : _('Update settings') } return out def tmpl_user_lang_edit(self, ln, preferred_lang): _ = gettext_set_language(ln) out = """

%(edit_lang_settings)s

""" % { 'select_lang' : _('Select desired language of the web interface.'), 'update_settings' : _('Update settings') } return out def tmpl_user_websearch_edit(self, ln, current = 10, show_latestbox = True, show_helpbox = True): _ = gettext_set_language(ln) out = """

%(edit_websearch_settings)s

""" % { 'update_settings' : _("Update settings"), 'select_group_records' : _("Number of search results per page"), } return out def tmpl_user_external_auth(self, ln, methods, current, method_disabled): """ Displays a form for the user to change his authentication method. Parameters: - 'ln' *string* - The language to display the interface in - 'methods' *array* - The methods of authentication - 'method_disabled' *boolean* - If the user has the right to change this - 'current' *string* - The currently selected method """ # load the right message language _ = gettext_set_language(ln) out = """
%(edit_method)s

%(explain_method)s:

%(select_method)s: """ % { 'edit_method' : _("Edit login method"), 'explain_method' : _("Please select which login method you would like to use to authenticate yourself"), 'select_method' : _("Select method"), 'sitesecureurl': CFG_SITE_SECURE_URL, } for system in methods: out += """
""" % { 'system' : system, 'disabled' : method_disabled and 'disabled="disabled"' or "", 'selected' : current == system and 'checked="checked"' or "", 'id' : nmtoken_from_string(system), } out += """
 
""" % { 'select_method' : _("Select method"), } return out def tmpl_lost_password_form(self, ln): """ Displays a form for the user to ask for his password sent by email. Parameters: - 'ln' *string* - The language to display the interface in - 'msg' *string* - Explicative message on top of the form. """ # load the right message language _ = gettext_set_language(ln) out = "

" + _("If you have lost the password for your %(sitename)s %(x_fmt_open)sinternal account%(x_fmt_close)s, then please enter your email address in the following form in order to have a password reset link emailed to you.") % {'x_fmt_open' : '', 'x_fmt_close' : '', 'sitename' : CFG_SITE_NAME_INTL[ln]} + "

" out += """
 
""" % { 'ln': ln, 'email' : _("Email address"), 'send' : _("Send password reset link"), } if CFG_CERN_SITE: out += "

" + _("If you have been using the %(x_fmt_open)sCERN login system%(x_fmt_close)s, then you can recover your password through the %(x_url_open)sCERN authentication system%(x_url_close)s.") % {'x_fmt_open' : '', 'x_fmt_close' : '', 'x_url_open' : '' \ % make_canonical_urlargd({'lf': 'auth', 'returnURL' : CFG_SITE_SECURE_URL + '/youraccount/login?ln='+ln}, {}), 'x_url_close' : ''} + " " else: out += "

" + _("Note that if you have been using an external login system, then we cannot do anything and you have to ask there.") + " " out += _("Alternatively, you can ask %s to change your login system from external to internal.") % ("""%(email)s""" % { 'email' : CFG_SITE_SUPPORT_EMAIL }) + "

" return out def tmpl_account_info(self, ln, uid, guest, CFG_CERN_SITE): """ Displays the account information Parameters: - 'ln' *string* - The language to display the interface in - 'uid' *string* - The user id - 'guest' *boolean* - If the user is guest - 'CFG_CERN_SITE' *boolean* - If the site is a CERN site """ # load the right message language _ = gettext_set_language(ln) out = """

%(account_offer)s

""" % { 'account_offer' : _("%s offers you the possibility to personalize the interface, to set up your own personal library of documents, or to set up an automatic alert query that would run periodically and would notify you of search results by email.") % CFG_SITE_NAME_INTL[ln], } if not guest: out += """
%(your_settings)s
%(change_account)s
""" % { 'ln' : ln, 'your_settings' : _("Your Settings"), 'change_account' : _("Set or change your account email address or password. Specify your preferences about the look and feel of the interface.") } out += """
%(your_searches)s
%(search_explain)s
""" % { 'ln' : ln, 'your_searches' : _("Your Searches"), 'search_explain' : _("View all the searches you performed during the last 30 days."), } out += """
%(your_baskets)s
%(basket_explain)s""" % { 'ln' : ln, 'your_baskets' : _("Your Baskets"), 'basket_explain' : _("With baskets you can define specific collections of items, store interesting records you want to access later or share with others."), } if guest and CFG_WEBSESSION_DIFFERENTIATE_BETWEEN_GUESTS: out += self.tmpl_warning_guest_user(ln = ln, type = "baskets") out += """
%(your_alerts)s
%(explain_alerts)s""" % { 'ln' : ln, 'your_alerts' : _("Your Alerts"), 'explain_alerts' : _("Subscribe to a search which will be run periodically by our service. The result can be sent to you via Email or stored in one of your baskets."), } if guest and CFG_WEBSESSION_DIFFERENTIATE_BETWEEN_GUESTS: out += self.tmpl_warning_guest_user(type="alerts", ln = ln) out += "
" if CFG_CERN_SITE: out += """
%(your_loans)s
%(explain_loans)s
""" % { 'your_loans' : _("Your Loans"), 'explain_loans' : _("Check out book you have on loan, submit borrowing requests, etc. Requires CERN ID."), 'ln': ln, 'CFG_SITE_SECURE_URL': CFG_SITE_SECURE_URL } out += """
""" return out def tmpl_warning_guest_user(self, ln, type): """ Displays a warning message about the specified type Parameters: - 'ln' *string* - The language to display the interface in - 'type' *string* - The type of data that will get lost in case of guest account (for the moment: 'alerts' or 'baskets') """ # load the right message language _ = gettext_set_language(ln) if (type=='baskets'): msg = _("You are logged in as a guest user, so your baskets will disappear at the end of the current session.") + ' ' elif (type=='alerts'): msg = _("You are logged in as a guest user, so your alerts will disappear at the end of the current session.") + ' ' msg += _("If you wish you can %(x_url_open)slogin or register here%(x_url_close)s.") % {'x_url_open': '', 'x_url_close': ''} return """
%s
""" % msg def tmpl_account_body(self, ln, user): """ Displays the body of the actions of the user Parameters: - 'ln' *string* - The language to display the interface in - 'user' *string* - The username (nickname or email) """ # load the right message language _ = gettext_set_language(ln) out = _("You are logged in as %(x_user)s. You may want to a) %(x_url1_open)slogout%(x_url1_close)s; b) edit your %(x_url2_open)saccount settings%(x_url2_close)s.") %\ {'x_user': user, 'x_url1_open': '', 'x_url1_close': '', 'x_url2_open': '', 'x_url2_close': '', } return out + "

" def tmpl_account_template(self, title, body, ln, url): """ Displays a block of the your account page Parameters: - 'ln' *string* - The language to display the interface in - 'title' *string* - The title of the block - 'body' *string* - The body of the block - 'url' *string* - The URL to go to the proper section """ out ="""
%s
%s
""" % (url, title, body) return out def tmpl_account_page(self, ln, warnings, warning_list, accBody, baskets, alerts, searches, messages, loans, groups, submissions, approvals, tickets, administrative): """ Displays the your account page Parameters: - 'ln' *string* - The language to display the interface in - 'accBody' *string* - The body of the heading block - 'baskets' *string* - The body of the baskets block - 'alerts' *string* - The body of the alerts block - 'searches' *string* - The body of the searches block - 'messages' *string* - The body of the messages block - 'groups' *string* - The body of the groups block - 'submissions' *string* - The body of the submission block - 'approvals' *string* - The body of the approvals block - 'administrative' *string* - The body of the administrative block """ # load the right message language _ = gettext_set_language(ln) out = "" if warnings == "1": out += self.tmpl_general_warnings(warning_list) out += self.tmpl_account_template(_("Your Account"), accBody, ln, '/youraccount/edit?ln=%s' % ln) if messages: out += self.tmpl_account_template(_("Your Messages"), messages, ln, '/yourmessages/display?ln=%s' % ln) if loans: out += self.tmpl_account_template(_("Your Loans"), loans, ln, '/yourloans/display?ln=%s' % ln) if baskets: out += self.tmpl_account_template(_("Your Baskets"), baskets, ln, '/yourbaskets/display?ln=%s' % ln) if alerts: out += self.tmpl_account_template(_("Your Alert Searches"), alerts, ln, '/youralerts/list?ln=%s' % ln) if searches: out += self.tmpl_account_template(_("Your Searches"), searches, ln, '/youralerts/display?ln=%s' % ln) if groups: groups_description = _("You can consult the list of %(x_url_open)syour groups%(x_url_close)s you are administering or are a member of.") groups_description %= {'x_url_open': '', 'x_url_close': ''} out += self.tmpl_account_template(_("Your Groups"), groups_description, ln, '/yourgroups/display?ln=%s' % ln) if submissions: submission_description = _("You can consult the list of %(x_url_open)syour submissions%(x_url_close)s and inquire about their status.") submission_description %= {'x_url_open': '', 'x_url_close': ''} out += self.tmpl_account_template(_("Your Submissions"), submission_description, ln, '/yoursubmissions.py?ln=%s' % ln) if approvals: approval_description = _("You can consult the list of %(x_url_open)syour approvals%(x_url_close)s with the documents you approved or refereed.") approval_description %= {'x_url_open': '', 'x_url_close': ''} out += self.tmpl_account_template(_("Your Approvals"), approval_description, ln, '/yourapprovals.py?ln=%s' % ln) #check if this user might have tickets if tickets: ticket_description = _("You can consult the list of %(x_url_open)syour tickets%(x_url_close)s.") ticket_description %= {'x_url_open': '', 'x_url_close': ''} out += self.tmpl_account_template(_("Your Tickets"), ticket_description, ln, '/yourtickets?ln=%s' % ln) if administrative: out += self.tmpl_account_template(_("Your Administrative Activities"), administrative, ln, '/admin') return out def tmpl_account_emailMessage(self, ln, msg): """ Displays a link to retrieve the lost password Parameters: - 'ln' *string* - The language to display the interface in - 'msg' *string* - Explicative message on top of the form. """ # load the right message language _ = gettext_set_language(ln) out ="" out +=""" %(msg)s %(try_again)s """ % { 'ln' : ln, 'msg' : msg, 'try_again' : _("Try again") } return out def tmpl_account_reset_password_email_body(self, email, reset_key, ip_address, ln=CFG_SITE_LANG): """ The body of the email that sends lost internal account passwords to users. """ _ = gettext_set_language(ln) out = """ %(intro)s %(intro2)s <%(link)s> %(outro)s %(outro2)s""" % { 'intro': _("Somebody (possibly you) coming from %(x_ip_address)s " "has asked\nfor a password reset at %(x_sitename)s\nfor " "the account \"%(x_email)s\"." % { 'x_sitename' :CFG_SITE_NAME_INTL.get(ln, CFG_SITE_NAME), 'x_email' : email, 'x_ip_address' : ip_address, } ), 'intro2' : _("If you want to reset the password for this account, please go to:"), 'link' : "%s/youraccount/access%s" % (CFG_SITE_SECURE_URL, make_canonical_urlargd({ 'ln' : ln, 'mailcookie' : reset_key }, {})), 'outro' : _("in order to confirm the validity of this request."), 'outro2' : _("Please note that this URL will remain valid for about %(days)s days only.") % {'days': CFG_WEBSESSION_RESET_PASSWORD_EXPIRE_IN_DAYS}, } return out def tmpl_account_address_activation_email_body(self, email, address_activation_key, ip_address, ln=CFG_SITE_LANG): """ The body of the email that sends email address activation cookie passwords to users. """ _ = gettext_set_language(ln) out = """ %(intro)s %(intro2)s <%(link)s> %(outro)s %(outro2)s""" % { 'intro': _("Somebody (possibly you) coming from %(x_ip_address)s " "has asked\nto register a new account at %(x_sitename)s\nfor the " "email address \"%(x_email)s\"." % { 'x_sitename' :CFG_SITE_NAME_INTL.get(ln, CFG_SITE_NAME), 'x_email' : email, 'x_ip_address' : ip_address, } ), 'intro2' : _("If you want to complete this account registration, please go to:"), 'link' : "%s/youraccount/access%s" % (CFG_SITE_SECURE_URL, make_canonical_urlargd({ 'ln' : ln, 'mailcookie' : address_activation_key }, {})), 'outro' : _("in order to confirm the validity of this request."), 'outro2' : _("Please note that this URL will remain valid for about %(days)s days only.") % {'days' : CFG_WEBSESSION_ADDRESS_ACTIVATION_EXPIRE_IN_DAYS}, } return out def tmpl_account_emailSent(self, ln, email): """ Displays a confirmation message for an email sent Parameters: - 'ln' *string* - The language to display the interface in - 'email' *string* - The email to which the message has been sent """ # load the right message language _ = gettext_set_language(ln) out ="" out += _("Okay, a password reset link has been emailed to %s.") % email return out def tmpl_account_delete(self, ln): """ Displays a confirmation message about deleting the account Parameters: - 'ln' *string* - The language to display the interface in """ # load the right message language _ = gettext_set_language(ln) out = "

" + _("""Deleting your account""") + '

' return out def tmpl_account_logout(self, ln): """ Displays a confirmation message about logging out Parameters: - 'ln' *string* - The language to display the interface in """ # load the right message language _ = gettext_set_language(ln) out = _("You are no longer recognized by our system.") + ' ' if CFG_EXTERNAL_AUTH_USING_SSO and CFG_EXTERNAL_AUTH_LOGOUT_SSO: out += _("""You are still recognized by the centralized %(x_fmt_open)sSSO%(x_fmt_close)s system. You can %(x_url_open)slogout from SSO%(x_url_close)s, too.""") % \ {'x_fmt_open' : '', 'x_fmt_close' : '', 'x_url_open' : '' % CFG_EXTERNAL_AUTH_LOGOUT_SSO, 'x_url_close' : ''} out += '
' out += _("If you wish you can %(x_url_open)slogin here%(x_url_close)s.") % \ {'x_url_open': '', 'x_url_close': ''} return out def tmpl_login_form(self, ln, referer, internal, register_available, methods, selected_method, msg=None): """ Displays a login form Parameters: - 'ln' *string* - The language to display the interface in - 'referer' *string* - The referer URL - will be redirected upon after login - 'internal' *boolean* - If we are producing an internal authentication - 'register_available' *boolean* - If users can register freely in the system - 'methods' *array* - The available authentication methods - 'selected_method' *string* - The default authentication method - 'msg' *string* - The message to print before the form, if needed """ # load the right message language _ = gettext_set_language(ln) if msg is "": out = "

%(please_login)s

" % { 'please_login' : cgi.escape(_("If you already have an account, please login using the form below.")) } if CFG_CERN_SITE: out += "

" + _("If you don't own a CERN account yet, you can register a %(x_url_open)snew CERN lightweight account%(x_url_close)s.") % {'x_url_open' : '', 'x_url_close' : ''} + "

" else: if register_available: out += "

"+_("If you don't own an account yet, please %(x_url_open)sregister%(x_url_close)s an internal account.") %\ {'x_url_open': '', 'x_url_close': ''} + "

" else: # users cannot register accounts, so advise them # how to get one, or be silent about register # facility if account level is more than 4: if CFG_ACCESS_CONTROL_LEVEL_ACCOUNTS < 5: out += "

" + _("If you don't own an account yet, please contact %s.") % ('%s' % (cgi.escape(CFG_SITE_SUPPORT_EMAIL, True), cgi.escape(CFG_SITE_SUPPORT_EMAIL))) + "

" else: out = "

%s

" % msg out += """
""" if len(methods) > 1: # more than one method, must make a select login_select = """" out += """ """ % { 'login_title' : cgi.escape(_("Login method:")), 'login_select' : login_select, } else: # only one login method available out += """""" % cgi.escape(methods[0], True) out += """
%(login_select)s
""" % { 'ln': cgi.escape(ln, True), 'referer' : cgi.escape(referer, True), 'username' : cgi.escape(_("Username")), 'password' : cgi.escape(_("Password")), 'remember_me' : cgi.escape(_("Remember login on this computer.")), 'login' : cgi.escape(_("login")), } if internal: out += """   (%(lost_pass)s)""" % { 'ln' : cgi.escape(ln, True), 'lost_pass' : cgi.escape(_("Lost your password?")) } out += """
""" out += """

%(note)s: %(note_text)s

""" % { 'note' : cgi.escape(_("Note")), 'note_text': cgi.escape(_("You can use your nickname or your email address to login."))} return out def tmpl_lost_your_password_teaser(self, ln=CFG_SITE_LANG): """Displays a short sentence to attract user to the fact that maybe he lost his password. Used by the registration page. """ _ = gettext_set_language(ln) out = "" out += """%(maybe_lost_pass)s""" % { 'ln' : ln, 'maybe_lost_pass': ("Maybe you have lost your password?") } return out def tmpl_reset_password_form(self, ln, email, reset_key, msg=''): """Display a form to reset the password.""" _ = gettext_set_language(ln) out = "" out = "

%s

" % _("Your request is valid. Please set the new " "desired password in the following form.") if msg: out += """

%s

""" % msg out += """
%(set_password_for)s:%(email)s
""" % { 'ln' : ln, 'reset_key' : reset_key, 'email' : email, 'set_password_for' : _('Set a new password for'), 'type_new_password' : _('Type the new password'), 'type_it_again' : _('Type again the new password'), 'set_new_password' : _('Set the new password') } return out def tmpl_register_page(self, ln, referer, level): """ Displays a login form Parameters: - 'ln' *string* - The language to display the interface in - 'referer' *string* - The referer URL - will be redirected upon after login - 'level' *int* - Login level (0 - all access, 1 - accounts activated, 2+ - no self-registration) """ # load the right message language _ = gettext_set_language(ln) out = "" if level <= 1: out += _("Please enter your email address and desired nickname and password:") if level == 1: out += _("It will not be possible to use the account before it has been verified and activated.") out += """

(%(mandatory)s)

%(example)s: john.doe@example.com

(%(mandatory)s)

%(example)s: johnd

(%(optional)s)

%(note)s: %(password_contain)s

%(note)s: %(explain_acc)s""" % { 'referer' : cgi.escape(referer), 'ln' : cgi.escape(ln), 'email_address' : _("Email address"), 'nickname' : _("Nickname"), 'password' : _("Password"), 'mandatory' : _("mandatory"), 'optional' : _("optional"), 'example' : _("Example"), 'note' : _("Note"), 'password_contain' : _("The password phrase may contain punctuation, spaces, etc."), 'retype' : _("Retype Password"), 'register' : _("register"), 'explain_acc' : _("Please do not use valuable passwords such as your Unix, AFS or NICE passwords with this service. Your email address will stay strictly confidential and will not be disclosed to any third party. It will be used to identify you for personal services of %s. For example, you may set up an automatic alert search that will look for new preprints and will notify you daily of new arrivals by email.") % CFG_SITE_NAME, } else: # level >=2, so users cannot register accounts out += "

" + _("It is not possible to create an account yourself. Contact %s if you want an account.") % ('%s' % (CFG_SITE_SUPPORT_EMAIL, CFG_SITE_SUPPORT_EMAIL)) + "

" return out def tmpl_account_adminactivities(self, ln, uid, guest, roles, activities): """ Displays the admin activities block for this user Parameters: - 'ln' *string* - The language to display the interface in - 'uid' *string* - The used id - 'guest' *boolean* - If the user is guest - 'roles' *array* - The current user roles - 'activities' *array* - The user allowed activities """ # load the right message language _ = gettext_set_language(ln) out = "" # guest condition if guest: return _("You seem to be a guest user. You have to %(x_url_open)slogin%(x_url_close)s first.") % \ {'x_url_open': '', 'x_url_close': ''} # no rights condition if not roles: return "

" + _("You are not authorized to access administrative functions.") + "

" # displaying form out += "

" + _("You are enabled to the following roles: %(x_role)s.") % {'x_role': ('' + ", ".join(roles) + "")} + '

' if activities: # print proposed links: activities.sort(lambda x, y: cmp(x.lower(), y.lower())) tmp_out = '' for action in activities: if action == "runbibedit": tmp_out += """
   
%s""" % (CFG_SITE_URL, CFG_SITE_RECORD, _("Run Record Editor")) if action == "runbibeditmulti": tmp_out += """
    %s""" % (CFG_SITE_URL, CFG_SITE_RECORD, _("Run Multi-Record Editor")) if action == "runbibcirculation": tmp_out += """
    %s""" % (CFG_SITE_URL, ln, _("Run BibCirculation")) if action == "runbibmerge": tmp_out += """
    %s""" % (CFG_SITE_URL, CFG_SITE_RECORD, _("Run Record Merger")) if action == "runbibswordclient": tmp_out += """
    %s""" % (CFG_SITE_URL, CFG_SITE_RECORD, _("Run BibSword Client")) if action == "runbatchuploader": tmp_out += """
    %s""" % (CFG_SITE_URL, ln, _("Run Batch Uploader")) if action == "cfgbibformat": tmp_out += """
    %s""" % (CFG_SITE_URL, ln, _("Configure BibFormat")) tmp_out += """
    %s""" % (CFG_SITE_URL, ln, _("Configure BibKnowledge")) if action == "cfgoaiharvest": tmp_out += """
    %s""" % (CFG_SITE_URL, ln, _("Configure OAI Harvest")) if action == "cfgoairepository": tmp_out += """
    %s""" % (CFG_SITE_URL, ln, _("Configure OAI Repository")) if action == "cfgbibindex": tmp_out += """
    %s""" % (CFG_SITE_URL, ln, _("Configure BibIndex")) if action == "cfgbibrank": tmp_out += """
    %s""" % (CFG_SITE_URL, ln, _("Configure BibRank")) if action == "cfgwebaccess": tmp_out += """
    %s""" % (CFG_SITE_URL, ln, _("Configure WebAccess")) if action == "cfgwebcomment": tmp_out += """
    %s""" % (CFG_SITE_URL, ln, _("Configure WebComment")) if action == "cfgwebjournal": tmp_out += """
    %s""" % (CFG_SITE_URL, ln, _("Configure WebJournal")) if action == "cfgwebsearch": tmp_out += """
    %s""" % (CFG_SITE_URL, ln, _("Configure WebSearch")) if action == "cfgwebsubmit": tmp_out += """
    %s""" % (CFG_SITE_URL, ln, _("Configure WebSubmit")) if action == "runbibdocfile": - tmp_out += """
    %s""" % (CFG_SITE_URL, ln, _("Run Document File Manager")) + tmp_out += """
    %s""" % (CFG_SITE_URL, CFG_SITE_RECORD, ln, _("Run Document File Manager")) if action == "cfgbibsort": tmp_out += """
    %s""" % (CFG_SITE_URL, ln, _("Configure BibSort")) if tmp_out: out += _("Here are some interesting web admin links for you:") + tmp_out out += "
" + _("For more admin-level activities, see the complete %(x_url_open)sAdmin Area%(x_url_close)s.") %\ {'x_url_open': '', 'x_url_close': ''} return out def tmpl_create_userinfobox(self, ln, url_referer, guest, username, submitter, referee, admin, usebaskets, usemessages, usealerts, usegroups, useloans, usestats): """ Displays the user block Parameters: - 'ln' *string* - The language to display the interface in - 'url_referer' *string* - URL of the page being displayed - 'guest' *boolean* - If the user is guest - 'username' *string* - The username (nickname or email) - 'submitter' *boolean* - If the user is submitter - 'referee' *boolean* - If the user is referee - 'admin' *boolean* - If the user is admin - 'usebaskets' *boolean* - If baskets are enabled for the user - 'usemessages' *boolean* - If messages are enabled for the user - 'usealerts' *boolean* - If alerts are enabled for the user - 'usegroups' *boolean* - If groups are enabled for the user - 'useloans' *boolean* - If loans are enabled for the user - 'usestats' *boolean* - If stats are enabled for the user @note: with the update of CSS classes (cds.cds -> invenio.css), the variables useloans etc are not used in this function, since they are in the menus. But we keep them in the function signature for backwards compatibility. """ # load the right message language _ = gettext_set_language(ln) out = """ """ % CFG_SITE_URL if guest: out += """%(guest_msg)s :: %(login)s""" % { 'sitesecureurl': CFG_SITE_SECURE_URL, 'ln' : ln, 'guest_msg' : _("guest"), 'referer' : url_referer and ('&referer=%s' % urllib.quote(url_referer)) or '', 'login' : _('login') } else: out += """ %(username)s :: """ % { 'sitesecureurl' : CFG_SITE_SECURE_URL, 'ln' : ln, 'username' : username } out += """%(logout)s""" % { 'sitesecureurl' : CFG_SITE_SECURE_URL, 'ln' : ln, 'logout' : _("logout"), } return out def tmpl_create_useractivities_menu(self, ln, selected, url_referer, guest, username, submitter, referee, admin, usebaskets, usemessages, usealerts, usegroups, useloans, usestats): """ Returns the main navigation menu with actions based on user's priviledges @param ln: The language to display the interface in @type ln: string @param selected: If the menu is currently selected @type selected: boolean @param url_referer: URL of the page being displayed @type url_referer: string @param guest: If the user is guest @type guest: string @param username: The username (nickname or email) @type username: string @param submitter: If the user is submitter @type submitter: boolean @param referee: If the user is referee @type referee: boolean @param admin: If the user is admin @type admin: boolean @param usebaskets: If baskets are enabled for the user @type usebaskets: boolean @param usemessages: If messages are enabled for the user @type usemessages: boolean @param usealerts: If alerts are enabled for the user @type usealerts: boolean @param usegroups: If groups are enabled for the user @type usegroups: boolean @param useloans: If loans are enabled for the user @type useloans: boolean @param usestats: If stats are enabled for the user @type usestats: boolean @return: html menu of the user activities @rtype: string """ # load the right message language _ = gettext_set_language(ln) out = '''
%(personalize)s
    ''' % { 'CFG_SITE_SECURE_URL' : CFG_SITE_SECURE_URL, 'ln' : ln, 'personalize': _("Personalize"), 'on': selected and " on" or '', 'selected': selected and "selected" or '' } if not guest: out += '
  • %(account)s
  • ' % { 'CFG_SITE_SECURE_URL' : CFG_SITE_SECURE_URL, 'ln' : ln, 'account' : _('Your account') } if usealerts or guest: out += '
  • %(alerts)s
  • ' % { 'CFG_SITE_SECURE_URL' : CFG_SITE_SECURE_URL, 'ln' : ln, 'alerts' : _('Your alerts') } if referee: out += '
  • %(approvals)s
  • ' % { 'CFG_SITE_SECURE_URL' : CFG_SITE_SECURE_URL, 'ln' : ln, 'approvals' : _('Your approvals') } if usebaskets or guest: out += '
  • %(baskets)s
  • ' % { 'CFG_SITE_SECURE_URL' : CFG_SITE_SECURE_URL, 'ln' : ln, 'baskets' : _('Your baskets') } if usegroups: out += '
  • %(groups)s
  • ' % { 'CFG_SITE_SECURE_URL' : CFG_SITE_SECURE_URL, 'ln' : ln, 'groups' : _('Your groups') } if useloans: out += '
  • %(loans)s
  • ' % { 'CFG_SITE_SECURE_URL' : CFG_SITE_SECURE_URL, 'ln' : ln, 'loans' : _('Your loans') } if usemessages: out += '
  • %(messages)s
  • ' % { 'CFG_SITE_SECURE_URL' : CFG_SITE_SECURE_URL, 'ln' : ln, 'messages' : _('Your messages') } if submitter: out += '
  • %(submissions)s
  • ' % { 'CFG_SITE_SECURE_URL' : CFG_SITE_SECURE_URL, 'ln' : ln, 'submissions' : _('Your submissions') } if usealerts or guest: out += '
  • %(searches)s
  • ' % { 'CFG_SITE_SECURE_URL' : CFG_SITE_SECURE_URL, 'ln' : ln, 'searches' : _('Your searches') } out += '
' return out def tmpl_create_adminactivities_menu(self, ln, selected, url_referer, guest, username, submitter, referee, admin, usebaskets, usemessages, usealerts, usegroups, useloans, usestats, activities): """ Returns the main navigation menu with actions based on user's priviledges @param ln: The language to display the interface in @type ln: string @param selected: If the menu is currently selected @type selected: boolean @param url_referer: URL of the page being displayed @type url_referer: string @param guest: If the user is guest @type guest: string @param username: The username (nickname or email) @type username: string @param submitter: If the user is submitter @type submitter: boolean @param referee: If the user is referee @type referee: boolean @param admin: If the user is admin @type admin: boolean @param usebaskets: If baskets are enabled for the user @type usebaskets: boolean @param usemessages: If messages are enabled for the user @type usemessages: boolean @param usealerts: If alerts are enabled for the user @type usealerts: boolean @param usegroups: If groups are enabled for the user @type usegroups: boolean @param useloans: If loans are enabled for the user @type useloans: boolean @param usestats: If stats are enabled for the user @type usestats: boolean @param activities: dictionary of admin activities @rtype activities: dict @return: html menu of the user activities @rtype: string """ # load the right message language _ = gettext_set_language(ln) out = '' if activities: out += '''
%(admin)s
    ''' % { 'CFG_SITE_SECURE_URL' : CFG_SITE_SECURE_URL, 'ln' : ln, 'admin': _("Administration"), 'on': selected and " on" or '', 'selected': selected and "selected" or '' } for name in sorted(activities.iterkeys()): url = activities[name] out += '
  • %(name)s
  • ' % { 'url': url, 'name': name } if usestats: out += """
  • %(stats)s
  • """ % { 'CFG_SITE_URL' : CFG_SITE_URL, 'ln' : ln, 'stats' : _("Statistics"), } out += '
' return out def tmpl_warning(self, warnings, ln=CFG_SITE_LANG): """ Display len(warnings) warning fields @param infos: list of strings @param ln=language @return: html output """ if not((type(warnings) is list) or (type(warnings) is tuple)): warnings = [warnings] warningbox = "" if warnings != []: warningbox = "
\n Warning:\n" for warning in warnings: lines = warning.split("\n") warningbox += "

" for line in lines[0:-1]: warningbox += line + "
\n" warningbox += lines[-1] + "

" warningbox += "

\n" return warningbox def tmpl_error(self, error, ln=CFG_SITE_LANG): """ Display error @param error: string @param ln=language @return: html output """ _ = gettext_set_language(ln) errorbox = "" if error != "": errorbox = "
\n Error:\n" errorbox += "

" errorbox += error + "

" errorbox += "

\n" return errorbox def tmpl_display_all_groups(self, infos, admin_group_html, member_group_html, external_group_html = None, warnings=[], ln=CFG_SITE_LANG): """ Displays the 3 tables of groups: admin, member and external Parameters: - 'ln' *string* - The language to display the interface in - 'admin_group_html' *string* - HTML code for displaying all the groups the user is the administrator of - 'member_group_html' *string* - HTML code for displaying all the groups the user is member of - 'external_group_html' *string* - HTML code for displaying all the external groups the user is member of """ _ = gettext_set_language(ln) group_text = self.tmpl_infobox(infos) group_text += self.tmpl_warning(warnings) if external_group_html: group_text += """
%s

%s

%s
""" %(admin_group_html, member_group_html, external_group_html) else: group_text += """
%s

%s
""" %(admin_group_html, member_group_html) return group_text def tmpl_display_admin_groups(self, groups, ln=CFG_SITE_LANG): """ Display the groups the user is admin of. Parameters: - 'ln' *string* - The language to display the interface in - 'groups' *list* - All the group the user is admin of - 'infos' *list* - Display infos on top of admin group table """ _ = gettext_set_language(ln) img_link = """ %(text)s
%(text)s
""" out = self.tmpl_group_table_title(img="/img/group_admin.png", text=_("You are an administrator of the following groups:") ) out += """ """ %(_("Group"), _("Description")) if len(groups) == 0: out += """ """ %(_("You are not an administrator of any groups."),) for group_data in groups: (grpID, name, description) = group_data edit_link = img_link % {'siteurl' : CFG_SITE_URL, 'grpID' : grpID, 'ln': ln, 'img':"webbasket_create_small.png", 'text':_("Edit group"), 'action':"edit" } members_link = img_link % {'siteurl' : CFG_SITE_URL, 'grpID' : grpID, 'ln': ln, 'img':"webbasket_usergroup.png", 'text':_("Edit %s members") % '', 'action':"members" } out += """ """ % (cgi.escape(name), cgi.escape(description), edit_link, members_link) out += """
%s %s    
%s
%s %s %s %s
     
""" % {'ln': ln, 'write_label': _("Create new group"), } return out def tmpl_display_member_groups(self, groups, ln=CFG_SITE_LANG): """ Display the groups the user is member of. Parameters: - 'ln' *string* - The language to display the interface in - 'groups' *list* - All the group the user is member of """ _ = gettext_set_language(ln) group_text = self.tmpl_group_table_title(img="/img/webbasket_us.png", text=_("You are a member of the following groups:")) group_text += """ """ % (_("Group"), _("Description")) if len(groups) == 0: group_text += """ """ %(_("You are not a member of any groups."),) for group_data in groups: (id, name, description) = group_data group_text += """ """ % (cgi.escape(name), cgi.escape(description)) group_text += """
%s %s
%s
%s %s
""" % {'ln': ln, 'join_label': _("Join new group"), 'leave_label':_("Leave group") } return group_text def tmpl_display_external_groups(self, groups, ln=CFG_SITE_LANG): """ Display the external groups the user is member of. Parameters: - 'ln' *string* - The language to display the interface in - 'groups' *list* - All the group the user is member of """ _ = gettext_set_language(ln) group_text = self.tmpl_group_table_title(img="/img/webbasket_us.png", text=_("You are a member of the following external groups:")) group_text += """ """ % (_("Group"), _("Description")) if len(groups) == 0: group_text += """ """ %(_("You are not a member of any external groups."),) for group_data in groups: (id, name, description) = group_data group_text += """ """ % (cgi.escape(name), cgi.escape(description)) group_text += """
%s %s
%s
%s %s
""" return group_text def tmpl_display_input_group_info(self, group_name, group_description, join_policy, act_type="create", grpID=None, warnings=[], ln=CFG_SITE_LANG): """ Display group data when creating or updating a group: Name, description, join_policy. Parameters: - 'ln' *string* - The language to display the interface in - 'group_name' *string* - name of the group - 'group_description' *string* - description of the group - 'join_policy' *string* - join policy - 'act_type' *string* - info about action : create or edit(update) - 'grpID' *int* - ID of the group(not None in case of group editing) - 'warnings' *list* - Display warning if values are not correct """ _ = gettext_set_language(ln) #default hidden_id ="" form_name = "create_group" action = CFG_SITE_URL + '/yourgroups/create' button_label = _("Create new group") button_name = "create_button" label = _("Create new group") delete_text = "" if act_type == "update": form_name = "update_group" action = CFG_SITE_URL + '/yourgroups/edit' button_label = _("Update group") button_name = "update" label = _('Edit group %s') % cgi.escape(group_name) delete_text = """""" delete_text %= (_("Delete group"),"delete") if grpID is not None: hidden_id = """""" hidden_id %= grpID out = self.tmpl_warning(warnings) out += """
%(label)s %(label)s
%(join_policy_label)s %(join_policy)s
%(hidden_id)s
%(delete_text)s
""" out %= {'action' : action, 'logo': CFG_SITE_URL + '/img/webbasket_create.png', 'label': label, 'form_name' : form_name, 'name_label': _("Group name:"), 'delete_text': delete_text, 'description_label': _("Group description:"), 'join_policy_label': _("Group join policy:"), 'group_name': cgi.escape(group_name, 1), 'group_description': cgi.escape(group_description, 1), 'button_label': button_label, 'button_name':button_name, 'cancel_label':_("Cancel"), 'hidden_id':hidden_id, 'ln': ln, 'join_policy' :self.__create_join_policy_selection_menu("join_policy", join_policy, ln) } return out def tmpl_display_input_join_group(self, group_list, group_name, group_from_search, search, warnings=[], ln=CFG_SITE_LANG): """ Display the groups the user can join. He can use default select list or the search box Parameters: - 'ln' *string* - The language to display the interface in - 'group_list' *list* - All the group the user can join - 'group_name' *string* - Name of the group the user is looking for - 'group_from search' *list* - List of the group the user can join matching group_name - 'search' *int* - User is looking for group using group_name - 'warnings' *list* - Display warning if two group are selected """ _ = gettext_set_language(ln) out = self.tmpl_warning(warnings) search_content = "" if search: search_content = """ """ if group_from_search != []: search_content += self.__create_select_menu('grpID', group_from_search, _("Please select:")) else: search_content += _("No matching group") search_content += """ """ out += """
%(label)s %(label)s
%(search_content)s
%(list_label)s %(group_list)s  



""" out %= {'action' : CFG_SITE_URL + '/yourgroups/join', 'logo': CFG_SITE_URL + '/img/webbasket_create.png', 'label': _("Join group"), 'group_name': cgi.escape(group_name, 1), 'label2':_("or find it") + ': ', 'list_label':_("Choose group:"), 'ln': ln, 'find_label': _("Find group"), 'cancel_label':_("Cancel"), 'group_list' :self.__create_select_menu("grpID",group_list, _("Please select:")), 'search_content' : search_content } return out def tmpl_display_manage_member(self, grpID, group_name, members, pending_members, infos=[], warnings=[], ln=CFG_SITE_LANG): """Display current members and waiting members of a group. Parameters: - 'ln' *string* - The language to display the interface in - 'grpID *int* - ID of the group - 'group_name' *string* - Name of the group - 'members' *list* - List of the current members - 'pending_members' *list* - List of the waiting members - 'infos' *tuple of 2 lists* - Message to inform user about his last action - 'warnings' *list* - Display warning if two group are selected """ _ = gettext_set_language(ln) out = self.tmpl_warning(warnings) out += self.tmpl_infobox(infos) out += """

%(title)s

%(img_alt_header1)s %(header1)s
 
%(member_text)s
%(img_alt_header2)s %(header2)s
 
%(pending_text)s
%(img_alt_header3)s %(header3)s
 
%(invite_text)s
""" if members : member_list = self.__create_select_menu("member_id", members, _("Please select:")) member_text = """ %s """ % (member_list,_("Remove member")) else : member_text = """%s""" % _("No members.") if pending_members : pending_list = self.__create_select_menu("pending_member_id", pending_members, _("Please select:")) pending_text = """ %s """ % (pending_list,_("Accept member"), _("Reject member")) else : pending_text = """%s""" % _("No members awaiting approval.") header1 = self.tmpl_group_table_title(text=_("Current members")) header2 = self.tmpl_group_table_title(text=_("Members awaiting approval")) header3 = _("Invite new members") write_a_message_url = create_url( "%s/yourmessages/write" % CFG_SITE_URL, { 'ln' : ln, 'msg_subject' : _('Invitation to join "%s" group' % escape_html(group_name)), 'msg_body' : _("""\ Hello: I think you might be interested in joining the group "%(x_name)s". You can join by clicking here: %(x_url)s. Best regards. """) % {'x_name': group_name, 'x_url': create_html_link("%s/yourgroups/join" % CFG_SITE_URL, { 'grpID' : grpID, 'join_button' : "1", }, link_label=group_name, escape_urlargd=True, escape_linkattrd=True)}}) link_open = '' % escape_html(write_a_message_url) invite_text = _("If you want to invite new members to join your group, please use the %(x_url_open)sweb message%(x_url_close)s system.") % \ {'x_url_open': link_open, 'x_url_close': ''} action = CFG_SITE_URL + '/yourgroups/members?ln=' + ln out %= {'title':_('Group: %s') % escape_html(group_name), 'member_text' : member_text, 'pending_text' :pending_text, 'action':action, 'grpID':grpID, 'header1': header1, 'header2': header2, 'header3': header3, 'img_alt_header1': _("Current members"), 'img_alt_header2': _("Members awaiting approval"), 'img_alt_header3': _("Invite new members"), 'invite_text': invite_text, 'imgurl': CFG_SITE_URL + '/img', 'cancel_label':_("Cancel"), 'ln':ln } return out def tmpl_display_input_leave_group(self, groups, warnings=[], ln=CFG_SITE_LANG): """Display groups the user can leave. Parameters: - 'ln' *string* - The language to display the interface in - 'groups' *list* - List of groups the user is currently member of - 'warnings' *list* - Display warning if no group is selected """ _ = gettext_set_language(ln) out = self.tmpl_warning(warnings) out += """
%(label)s %(label)s
%(list_label)s %(groups)s  
%(submit)s
""" if groups: groups = self.__create_select_menu("grpID", groups, _("Please select:")) list_label = _("Group list") submit = """""" % _("Leave group") else : groups = _("You are not member of any group.") list_label = "" submit = "" action = CFG_SITE_URL + '/yourgroups/leave?ln=%s' action %= (ln) out %= {'groups' : groups, 'list_label' : list_label, 'action':action, 'logo': CFG_SITE_URL + '/img/webbasket_create.png', 'label' : _("Leave group"), 'cancel_label':_("Cancel"), 'ln' :ln, 'submit' : submit } return out def tmpl_confirm_delete(self, grpID, ln=CFG_SITE_LANG): """ display a confirm message when deleting a group @param grpID *int* - ID of the group @param ln: language @return: html output """ _ = gettext_set_language(ln) action = CFG_SITE_URL + '/yourgroups/edit' out = """
%(message)s
"""% {'message': _("Are you sure you want to delete this group?"), 'ln':ln, 'yes_label': _("Yes"), 'no_label': _("No"), 'grpID':grpID, 'action': action } return out def tmpl_confirm_leave(self, uid, grpID, ln=CFG_SITE_LANG): """ display a confirm message @param grpID *int* - ID of the group @param ln: language @return: html output """ _ = gettext_set_language(ln) action = CFG_SITE_URL + '/yourgroups/leave' out = """
%(message)s
"""% {'message': _("Are you sure you want to leave this group?"), 'ln':ln, 'yes_label': _("Yes"), 'no_label': _("No"), 'grpID':grpID, 'action': action } return out def __create_join_policy_selection_menu(self, name, current_join_policy, ln=CFG_SITE_LANG): """Private function. create a drop down menu for selection of join policy @param current_join_policy: join policy as defined in CFG_WEBSESSION_GROUP_JOIN_POLICY @param ln: language """ _ = gettext_set_language(ln) elements = [(CFG_WEBSESSION_GROUP_JOIN_POLICY['VISIBLEOPEN'], _("Visible and open for new members")), (CFG_WEBSESSION_GROUP_JOIN_POLICY['VISIBLEMAIL'], _("Visible but new members need approval")) ] select_text = _("Please select:") return self.__create_select_menu(name, elements, select_text, selected_key=current_join_policy) def __create_select_menu(self, name, elements, select_text, multiple=0, selected_key=None): """ private function, returns a popup menu @param name: name of HTML control @param elements: list of (key, value) """ if multiple : out = """ """ % name out += '' % (select_text) for (key, label) in elements: selected = '' if key == selected_key: selected = ' selected="selected"' out += ''% (key, selected, label) out += '' return out def tmpl_infobox(self, infos, ln=CFG_SITE_LANG): """Display len(infos) information fields @param infos: list of strings @param ln=language @return: html output """ _ = gettext_set_language(ln) if not((type(infos) is list) or (type(infos) is tuple)): infos = [infos] infobox = "" for info in infos: infobox += '
' lines = info.split("\n") for line in lines[0:-1]: infobox += line + "
\n" infobox += lines[-1] + "
\n" return infobox def tmpl_navtrail(self, ln=CFG_SITE_LANG, title=""): """ display the navtrail, e.g.: Your account > Your group > title @param title: the last part of the navtrail. Is not a link @param ln: language return html formatted navtrail """ _ = gettext_set_language(ln) nav_h1 = '%s' nav_h2 = "" if (title != ""): nav_h2 = ' > %s' nav_h2 = nav_h2 % (CFG_SITE_URL, _("Your Groups")) return nav_h1 % (CFG_SITE_URL, _("Your Account")) + nav_h2 def tmpl_group_table_title(self, img="", text="", ln=CFG_SITE_LANG): """ display the title of a table: - 'img' *string* - img path - 'text' *string* - title - 'ln' *string* - The language to display the interface in """ out = "
" if img: out += """ """ % (CFG_SITE_URL + img) out += """ %s
""" % text return out def tmpl_admin_msg(self, group_name, grpID, ln=CFG_SITE_LANG): """ return message content for joining group - 'group_name' *string* - name of the group - 'grpID' *int* - ID of the group - 'ln' *string* - The language to display the interface in """ _ = gettext_set_language(ln) subject = _("Group %s: New membership request") % group_name url = CFG_SITE_URL + "/yourgroups/members?grpID=%s&ln=%s" url %= (grpID, ln) # FIXME: which user? We should show his nickname. body = (_("A user wants to join the group %s.") % group_name) + '
' body += _("Please %(x_url_open)saccept or reject%(x_url_close)s this user's request.") % {'x_url_open': '', 'x_url_close': ''} body += '
' return subject, body def tmpl_member_msg(self, group_name, accepted=0, ln=CFG_SITE_LANG): """ return message content when new member is accepted/rejected - 'group_name' *string* - name of the group - 'accepted' *int* - 1 if new membership has been accepted, 0 if it has been rejected - 'ln' *string* - The language to display the interface in """ _ = gettext_set_language(ln) if accepted: subject = _("Group %s: Join request has been accepted") % (group_name) body = _("Your request for joining group %s has been accepted.") % (group_name) else: subject = _("Group %s: Join request has been rejected") % (group_name) body = _("Your request for joining group %s has been rejected.") % (group_name) url = CFG_SITE_URL + "/yourgroups/display?ln=" + ln body += '
' body += _("You can consult the list of %(x_url_open)syour groups%(x_url_close)s.") % {'x_url_open': '', 'x_url_close': ''} body += '
' return subject, body def tmpl_delete_msg(self, group_name, ln=CFG_SITE_LANG): """ return message content when new member is accepted/rejected - 'group_name' *string* - name of the group - 'ln' *string* - The language to display the interface in """ _ = gettext_set_language(ln) subject = _("Group %s has been deleted") % group_name url = CFG_SITE_URL + "/yourgroups/display?ln=" + ln body = _("Group %s has been deleted by its administrator.") % group_name body += '
' body += _("You can consult the list of %(x_url_open)syour groups%(x_url_close)s.") % {'x_url_open': '', 'x_url_close': ''} body += '
' return subject, body def tmpl_group_info(self, nb_admin_groups=0, nb_member_groups=0, nb_total_groups=0, ln=CFG_SITE_LANG): """ display infos about groups (used by myaccount.py) @param nb_admin_group: number of groups the user is admin of @param nb_member_group: number of groups the user is member of @param total_group: number of groups the user belongs to @param ln: language return: html output. """ _ = gettext_set_language(ln) out = _("You can consult the list of %(x_url_open)s%(x_nb_total)i groups%(x_url_close)s you are subscribed to (%(x_nb_member)i) or administering (%(x_nb_admin)i).") out %= {'x_url_open': '', 'x_nb_total': nb_total_groups, 'x_url_close': '', 'x_nb_admin': nb_admin_groups, 'x_nb_member': nb_member_groups} return out def tmpl_general_warnings(self, warning_list, ln=CFG_SITE_LANG): """ display information to the admin user about possible ssecurity problems in the system. """ message = "" _ = gettext_set_language(ln) #Try and connect to the mysql database with the default invenio password if "warning_mysql_password_equal_to_invenio_password" in warning_list: message += "

" message += _("Warning: The password set for MySQL root user is the same as the default Invenio password. For security purposes, you may want to change the password.") message += "

" #Try and connect to the invenio database with the default invenio password if "warning_invenio_password_equal_to_default" in warning_list: message += "

" message += _("Warning: The password set for the Invenio MySQL user is the same as the shipped default. For security purposes, you may want to change the password.") message += "

" #Check if the admin password is empty if "warning_empty_admin_password" in warning_list: message += "

" message += _("Warning: The password set for the Invenio admin user is currently empty. For security purposes, it is strongly recommended that you add a password.") message += "

" #Check if the admin email has been changed from the default if "warning_site_support_email_equal_to_default" in warning_list: message += "

" message += _("Warning: The email address set for support email is currently set to info@invenio-software.org. It is recommended that you change this to your own address.") message += "

" #Check for a new release if "note_new_release_available" in warning_list: message += "

" message += _("A newer version of Invenio is available for download. You may want to visit ") message += "http://invenio-software.org/wiki/Installation/Download" message += "

" #Error downloading release notes if "error_cannot_download_release_notes" in warning_list: message += "

" message += _("Cannot download or parse release notes from http://invenio-software.org/repo/invenio/tree/RELEASE-NOTES") message += "

" return message diff --git a/modules/webstyle/lib/webinterface_layout.py b/modules/webstyle/lib/webinterface_layout.py index 8a57ed14d..a9e4a5a6e 100644 --- a/modules/webstyle/lib/webinterface_layout.py +++ b/modules/webstyle/lib/webinterface_layout.py @@ -1,300 +1,297 @@ # -*- coding: utf-8 -*- ## This file is part of Invenio. ## Copyright (C) 2006, 2007, 2008, 2009, 2010, 2011, 2012 CERN. ## ## Invenio is free software; you can redistribute it and/or ## modify it under the terms of the GNU General Public License as ## published by the Free Software Foundation; either version 2 of the ## License, or (at your option) any later version. ## ## Invenio is distributed in the hope that it will be useful, but ## WITHOUT ANY WARRANTY; without even the implied warranty of ## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ## General Public License for more details. ## ## You should have received a copy of the GNU General Public License ## along with Invenio; if not, write to the Free Software Foundation, Inc., ## 59 Temple Place, Suite 330, Boston, MA 02111-1307, USA. """ Global organisation of the application's URLs. This module binds together Invenio's modules and maps them to their corresponding URLs (ie, /search to the websearch modules,...) """ -__revision__ = \ - "$Id$" - from invenio.webinterface_handler import create_handler from invenio.errorlib import register_exception from invenio.webinterface_handler import WebInterfaceDirectory from invenio import webinterface_handler_config as apache from invenio.config import CFG_DEVEL_SITE, CFG_OPENAIRE_SITE class WebInterfaceDumbPages(WebInterfaceDirectory): """This class implements a dumb interface to use as a fallback in case of errors importing particular module pages.""" _exports = [''] def __call__(self, req, form): try: from invenio.webpage import page except ImportError: page = lambda * args: args[1] req.status = apache.HTTP_INTERNAL_SERVER_ERROR msg = "

This functionality is experiencing a temporary failure.

" msg += "

The administrator has been informed about the problem.

" try: from invenio.config import CFG_SITE_ADMIN_EMAIL msg += """

You can contact %s in case of questions.

""" % \ CFG_SITE_ADMIN_EMAIL except ImportError: pass msg += """

We hope to restore the service soon.

Sorry for the inconvenience.

""" try: return page('Service failure', msg) except: return msg def _lookup(self, component, path): return WebInterfaceDumbPages(), path index = __call__ try: from invenio.websearch_webinterface import WebInterfaceSearchInterfacePages except: register_exception(alert_admin=True, subject='EMERGENCY') WebInterfaceSearchInterfacePages = WebInterfaceDumbPages try: from invenio.websearch_webinterface import WebInterfaceRSSFeedServicePages except: register_exception(alert_admin=True, subject='EMERGENCY') WebInterfaceRSSFeedServicePages = WebInterfaceDumbPages try: from invenio.websearch_webinterface import WebInterfaceUnAPIPages except: register_exception(alert_admin=True, subject='EMERGENCY') WebInterfaceUnAPIPages = WebInterfaceDumbPages try: - from invenio.websubmit_webinterface import websubmit_legacy_getfile + from invenio.bibdocfile_webinterface import bibdocfile_legacy_getfile except: register_exception(alert_admin=True, subject='EMERGENCY') - websubmit_legacy_getfile = WebInterfaceDumbPages + bibdocfile_legacy_getfile = WebInterfaceDumbPages try: from invenio.websubmit_webinterface import WebInterfaceSubmitPages except: register_exception(alert_admin=True, subject='EMERGENCY') WebInterfaceSubmitPages = WebInterfaceDumbPages try: from invenio.websession_webinterface import WebInterfaceYourAccountPages except: register_exception(alert_admin=True, subject='EMERGENCY') WebInterfaceYourAccountPages = WebInterfaceDumbPages try: from invenio.websession_webinterface import WebInterfaceYourTicketsPages except: register_exception(alert_admin=True, subject='EMERGENCY') WebInterfaceYourTicketsPages = WebInterfaceDumbPages try: from invenio.websession_webinterface import WebInterfaceYourGroupsPages except: register_exception(alert_admin=True, subject='EMERGENCY') WebInterfaceYourGroupsPages = WebInterfaceDumbPages try: from invenio.webalert_webinterface import WebInterfaceYourAlertsPages except: register_exception(alert_admin=True, subject='EMERGENCY') WebInterfaceYourAlertsPages = WebInterfaceDumbPages try: from invenio.webbasket_webinterface import WebInterfaceYourBasketsPages except: register_exception(alert_admin=True, subject='EMERGENCY') WebInterfaceYourBasketsPages = WebInterfaceDumbPages try: from invenio.webcomment_webinterface import WebInterfaceCommentsPages except: register_exception(alert_admin=True, subject='EMERGENCY') WebInterfaceCommentsPages = WebInterfaceDumbPages try: from invenio.webmessage_webinterface import WebInterfaceYourMessagesPages except: register_exception(alert_admin=True, subject='EMERGENCY') WebInterfaceYourMessagesPages = WebInterfaceDumbPages try: from invenio.errorlib_webinterface import WebInterfaceErrorPages except: register_exception(alert_admin=True, subject='EMERGENCY') WebInterfaceErrorPages = WebInterfaceDumbPages try: from invenio.oai_repository_webinterface import WebInterfaceOAIProviderPages except: register_exception(alert_admin=True, subject='EMERGENCY') WebInterfaceOAIProviderPages = WebInterfaceDumbPages try: from invenio.webstat_webinterface import WebInterfaceStatsPages except: register_exception(alert_admin=True, subject='EMERGENCY') WebInterfaceStatsPages = WebInterfaceDumbPages try: from invenio.bibcirculation_webinterface import WebInterfaceYourLoansPages except: register_exception(alert_admin=True, subject='EMERGENCY') WebInterfaceYourLoansPages = WebInterfaceDumbPages try: from invenio.webjournal_webinterface import WebInterfaceJournalPages except: register_exception(alert_admin=True, subject='EMERGENCY') WebInterfaceJournalPages = WebInterfaceDumbPages try: from invenio.webdoc_webinterface import WebInterfaceDocumentationPages except: register_exception(alert_admin=True, subject='EMERGENCY') WebInterfaceDocumentationPages = WebInterfaceDumbPages try: from invenio.bibexport_method_fieldexporter_webinterface import \ WebInterfaceFieldExporterPages except: register_exception(alert_admin=True, subject='EMERGENCY') WebInterfaceFieldExporterPages = WebInterfaceDumbPages try: from invenio.bibknowledge_webinterface import WebInterfaceBibKnowledgePages except: register_exception(alert_admin=True, subject='EMERGENCY') WebInterfaceBibKnowledgePages = WebInterfaceDumbPages try: from invenio.batchuploader_webinterface import \ WebInterfaceBatchUploaderPages except: register_exception(alert_admin=True, subject='EMERGENCY') WebInterfaceBatchUploaderPages = WebInterfaceDumbPages try: from invenio.bibsword_webinterface import \ WebInterfaceSword except: register_exception(alert_admin=True, subject='EMERGENCY') WebInterfaceSword = WebInterfaceDumbPages try: from invenio.bibauthorid_webinterface import WebInterfaceBibAuthorIDPages except: register_exception(alert_admin=True, subject='EMERGENCY') WebInterfaceBibAuthorIDPages = WebInterfaceDumbPages try: from invenio.webauthorprofile_webinterface import WebAuthorPages WebInterfaceWebAuthorPages = WebAuthorPages except: register_exception(alert_admin=True, subject='EMERGENCY') WebInterfaceWebAuthorPages = WebInterfaceDumbPages if CFG_OPENAIRE_SITE: try: from invenio.openaire_deposit_webinterface import \ WebInterfaceOpenAIREDepositPages except: register_exception(alert_admin=True, subject='EMERGENCY') WebInterfaceOpenAIREDepositPages = WebInterfaceDumbPages openaire_exports = ['deposit'] else: openaire_exports = [] if CFG_DEVEL_SITE: try: from invenio.httptest_webinterface import WebInterfaceHTTPTestPages except: register_exception(alert_admin=True, subject='EMERGENCY') WebInterfaceHTTPTestPages = WebInterfaceDumbPages test_exports = ['httptest'] else: test_exports = [] class WebInterfaceInvenio(WebInterfaceSearchInterfacePages): """ The global URL layout is composed of the search API plus all the other modules.""" _exports = WebInterfaceSearchInterfacePages._exports + \ [ 'youraccount', 'youralerts', 'yourbaskets', 'yourmessages', 'yourloans', 'yourgroups', 'yourtickets', 'comments', 'error', 'oai2d', ('oai2d.py', 'oai2d'), ('getfile.py', 'getfile'), 'submit', 'rss', 'stats', 'journal', 'help', 'unapi', 'exporter', 'kb', 'batchuploader', 'person', 'bibsword', 'author' ] + test_exports + openaire_exports def __init__(self): - self.getfile = websubmit_legacy_getfile + self.getfile = bibdocfile_legacy_getfile if CFG_DEVEL_SITE: self.httptest = WebInterfaceHTTPTestPages() if CFG_OPENAIRE_SITE: self.deposit = WebInterfaceOpenAIREDepositPages() submit = WebInterfaceSubmitPages() youraccount = WebInterfaceYourAccountPages() youralerts = WebInterfaceYourAlertsPages() yourbaskets = WebInterfaceYourBasketsPages() yourmessages = WebInterfaceYourMessagesPages() yourloans = WebInterfaceYourLoansPages() yourgroups = WebInterfaceYourGroupsPages() yourtickets = WebInterfaceYourTicketsPages() comments = WebInterfaceCommentsPages() error = WebInterfaceErrorPages() oai2d = WebInterfaceOAIProviderPages() rss = WebInterfaceRSSFeedServicePages() stats = WebInterfaceStatsPages() journal = WebInterfaceJournalPages() help = WebInterfaceDocumentationPages() unapi = WebInterfaceUnAPIPages() exporter = WebInterfaceFieldExporterPages() kb = WebInterfaceBibKnowledgePages() batchuploader = WebInterfaceBatchUploaderPages() bibsword = WebInterfaceSword() person = WebInterfaceBibAuthorIDPages() #redirects author to the new webauthor author = WebInterfaceWebAuthorPages() #author = WebInterfaceAuthorPages() # This creates the 'handler' function, which will be invoked directly # by mod_python. invenio_handler = create_handler(WebInterfaceInvenio()) diff --git a/modules/websubmit/doc/hacking/Makefile.am b/modules/websubmit/doc/hacking/Makefile.am index 1512aaef4..adbec6735 100644 --- a/modules/websubmit/doc/hacking/Makefile.am +++ b/modules/websubmit/doc/hacking/Makefile.am @@ -1,29 +1,28 @@ ## This file is part of Invenio. ## Copyright (C) 2004, 2008, 2009, 2010, 2011 CERN. ## ## Invenio is free software; you can redistribute it and/or ## modify it under the terms of the GNU General Public License as ## published by the Free Software Foundation; either version 2 of the ## License, or (at your option) any later version. ## ## Invenio is distributed in the hope that it will be useful, but ## WITHOUT ANY WARRANTY; without even the implied warranty of ## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ## General Public License for more details. ## ## You should have received a copy of the GNU General Public License ## along with Invenio; if not, write to the Free Software Foundation, Inc., ## 59 Temple Place, Suite 330, Boston, MA 02111-1307, USA. webdoclibdir = $(libdir)/webdoc/invenio/hacking -webdoclib_DATA = bibdocfile-api.webdoc \ - websubmit-internals.webdoc \ +webdoclib_DATA = websubmit-internals.webdoc \ websubmit-file-converter.webdoc \ websubmit-file-stamper.webdoc \ websubmit-icon-creator.webdoc \ websubmit-file-metadata.webdoc EXTRA_DIST = $(webdoclib_DATA) CLEANFILES = *~ *.tmp diff --git a/modules/websubmit/doc/hacking/websubmit-internals.webdoc b/modules/websubmit/doc/hacking/websubmit-internals.webdoc index 82388c334..10915b921 100644 --- a/modules/websubmit/doc/hacking/websubmit-internals.webdoc +++ b/modules/websubmit/doc/hacking/websubmit-internals.webdoc @@ -1,42 +1,39 @@ ## -*- mode: html; coding: utf-8; -*- ## This file is part of Invenio. ## Copyright (C) 2010, 2011 CERN. ## ## Invenio is free software; you can redistribute it and/or ## modify it under the terms of the GNU General Public License as ## published by the Free Software Foundation; either version 2 of the ## License, or (at your option) any later version. ## ## Invenio is distributed in the hope that it will be useful, but ## WITHOUT ANY WARRANTY; without even the implied warranty of ## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ## General Public License for more details. ## ## You should have received a copy of the GNU General Public License ## along with Invenio; if not, write to the Free Software Foundation, Inc., ## 59 Temple Place, Suite 330, Boston, MA 02111-1307, USA. This page summarizes all the information suitable to dig inside the WebSubmit internals.
-
BibDocFile API
-
Explains the fulltext document management library.
-
Conversion tools
Explains how to convert from a file format to an other, and how to perform OCR.
Stamping fulltextes
Explains how to stamp fulltextes.
Icon creation tools
Explains how to create icons from fulltextes.
diff --git a/modules/websubmit/lib/Makefile.am b/modules/websubmit/lib/Makefile.am index de30503d2..d89b61afc 100644 --- a/modules/websubmit/lib/Makefile.am +++ b/modules/websubmit/lib/Makefile.am @@ -1,53 +1,47 @@ ## This file is part of Invenio. ## Copyright (C) 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011 CERN. ## ## Invenio is free software; you can redistribute it and/or ## modify it under the terms of the GNU General Public License as ## published by the Free Software Foundation; either version 2 of the ## License, or (at your option) any later version. ## ## Invenio is distributed in the hope that it will be useful, but ## WITHOUT ANY WARRANTY; without even the implied warranty of ## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ## General Public License for more details. ## ## You should have received a copy of the GNU General Public License ## along with Invenio; if not, write to the Free Software Foundation, Inc., ## 59 Temple Place, Suite 330, Boston, MA 02111-1307, USA. SUBDIRS = functions pylibdir = $(libdir)/python/invenio -pylib_DATA = websubmit_config.py websubmit_engine.py file.py \ +pylib_DATA = websubmit_config.py websubmit_engine.py \ websubmit_dblayer.py \ websubmit_webinterface.py \ websubmit_templates.py \ websubmit_regression_tests.py \ websubmitadmin_config.py \ websubmitadmin_dblayer.py \ websubmitadmin_engine.py \ websubmitadmin_templates.py \ websubmitadmin_regression_tests.py \ websubmit_file_stamper.py \ websubmit_icon_creator.py \ websubmit_file_converter.py \ - websubmit_managedocfiles.py \ - bibdocfile.py \ - bibdocfilecli.py \ - bibdocfile_regression_tests.py \ hocrlib.py \ websubmit_file_metadata.py \ websubmit_web_tests.py metadataplugindir = $(libdir)/python/invenio/websubmit_file_metadata_plugins metadataplugin_DATA = __init__.py \ wsm_extractor_plugin.py \ wsm_pyexiv2_plugin.py \ wsm_pdftk_plugin.py -noinst_DATA = fulltext_files_migration_kit.py icon_migration_kit.py - -EXTRA_DIST = $(pylib_DATA) $(metadataplugin_DATA) $(noinst_DATA) +EXTRA_DIST = $(pylib_DATA) $(metadataplugin_DATA) CLEANFILES = *~ *.tmp *.pyc diff --git a/modules/websubmit/lib/functions/Create_Upload_Files_Interface.py b/modules/websubmit/lib/functions/Create_Upload_Files_Interface.py index eb023ccbf..8eedb92a8 100644 --- a/modules/websubmit/lib/functions/Create_Upload_Files_Interface.py +++ b/modules/websubmit/lib/functions/Create_Upload_Files_Interface.py @@ -1,500 +1,500 @@ ## $Id: Revise_Files.py,v 1.37 2009/03/26 15:11:05 jerome Exp $ ## This file is part of Invenio. ## Copyright (C) 2009, 2010, 2011 CERN. ## ## Invenio is free software; you can redistribute it and/or ## modify it under the terms of the GNU General Public License as ## published by the Free Software Foundation; either version 2 of the ## License, or (at your option) any later version. ## ## Invenio is distributed in the hope that it will be useful, but ## WITHOUT ANY WARRANTY; without even the implied warranty of ## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ## General Public License for more details. ## ## You should have received a copy of the GNU General Public License ## along with Invenio; if not, write to the Free Software Foundation, Inc., ## 59 Temple Place, Suite 330, Boston, MA 02111-1307, USA. """WebSubmit function - Displays a generic interface to upload, delete and revise files. To be used on par with Move_Uploaded_Files_to_Storage function: - Create_Upload_Files_Interface records the actions performed by user. - Move_Uploaded_Files_to_Storage execute the recorded actions. NOTE: ===== - Due to the way WebSubmit works, this function can only work when positionned at step 1 in WebSubmit admin, and Move_Uploaded_Files_to_Storage is at step 2 FIXME: ====== - One issue: if we allow deletion or renaming, we might lose track of a bibdoc: someone adds X, renames X->Y, and adds again another file with name X: when executing actions, we will add the second X, and rename it to Y -> need to go back in previous action when renaming... or check that name has never been used.. """ __revision__ = "$Id$" import os from invenio.config import \ CFG_SITE_LANG from invenio.messages import gettext_set_language, wash_language -from invenio.websubmit_managedocfiles import create_file_upload_interface +from invenio.bibdocfile_managedocfiles import create_file_upload_interface def Create_Upload_Files_Interface(parameters, curdir, form, user_info=None): """ List files for revisions. You should use Move_Uploaded_Files_to_Storage.py function in your submission to apply the changes performed by users with this interface. @param parameters:(dictionary) - must contain: + maxsize: the max size allowed for uploaded files + minsize: the max size allowed for uploaded files + doctypes: the list of doctypes (like 'Main' or 'Additional') and their description that users can choose from when adding new files. - When no value is provided, users cannot add new file (they can only revise/delete/add format) - When a single value is given, it is used as default doctype for all new documents Eg: main=Main document|additional=Figure, schema. etc ('=' separates doctype and description '|' separates each doctype/description group) + restrictions: the list of restrictions (like 'Restricted' or 'No Restriction') and their description that users can choose from when adding/revising files. Restrictions can then be configured at the level of WebAccess. - When no value is provided, no restriction is applied - When a single value is given, it is used as default resctriction for all documents. - The first value of the list is used as default restriction if the user if not given the choice of the restriction. CHOOSE THE ORDER! Eg: =No restriction|restr=Restricted ('=' separates restriction and description '|' separates each restriction/description group) + canDeleteDoctypes: the list of doctypes that users are allowed to delete. Eg: Main|Additional ('|' separated values) Use '*' for all doctypes + canReviseDoctypes: the list of doctypes that users are allowed to revise Eg: Main|Additional ('|' separated values) Use '*' for all doctypes + canDescribeDoctypes: the list of doctypes that users are allowed to describe Eg: Main|Additional ('|' separated values) Use '*' for all doctypes + canCommentDoctypes: the list of doctypes that users are allowed to comment Eg: Main|Additional ('|' separated values) Use '*' for all doctypes + canKeepDoctypes: the list of doctypes for which users can choose to keep previous versions visible when revising a file (i.e. 'Keep previous version' checkbox). See also parameter 'keepDefault'. Note that this parameter is ~ignored when revising the attributes of a file (comment, description) without uploading a new file. See also parameter Move_Uploaded_Files_to_Storage.forceFileRevision Eg: Main|Additional ('|' separated values) Use '*' for all doctypes + canAddFormatDoctypes: the list of doctypes for which users can add new formats. If there is no value, then no 'add format' link nor warning about losing old formats are displayed. Eg: Main|Additional ('|' separated values) Use '*' for all doctypes + canRestrictDoctypes: the list of doctypes for which users can choose the access restrictions when adding or revising a file. If no value is given: - no restriction is applied if none is defined in the 'restrictions' parameter. - else the *first* value of the 'restrictions' parameter is used as default restriction. Eg: Main|Additional ('|' separated values) Use '*' for all doctypes + canRenameDoctypes: the list of doctypes that users are allowed to rename (when revising) Eg: Main|Additional ('|' separated values) Use '*' for all doctypes + canNameNewFiles: if user can choose the name of the files they upload (1) or not (0) + defaultFilenameDoctypes: Rename uploaded files to admin-chosen values. List here the the files in current submission directory that contain the names to use for each doctype. Eg: Main=RN|Additional=additional_filename ('=' separates doctype and file in curdir '|' separates each doctype/file group). If the same doctype is submitted several times, a"-%i" suffix is added to the name defined in the file. The default filenames are overriden by user-chosen names if you allow 'canNameNewFiles' or 'canRenameDoctypes'. + maxFilesDoctypes: the maximum number of files that users can upload for each doctype. Eg: Main=1|Additional=2 ('|' separated values) Do not specify the doctype here to have an unlimited number of files for a given doctype. + createRelatedFormats: if uploaded files get converted to whatever format we can (1) or not (0) + keepDefault: the default behaviour for keeping or not previous version of files when users cannot choose (no value in canKeepDoctypes): keep (1) or not (0) Note that this parameter is ignored when revising the attributes of a file (comment, description) without uploading a new file. See also parameter Move_Uploaded_Files_to_Storage.forceFileRevision + showLinks: if we display links to files (1) when possible or not (0) + fileLabel: the label for the file field + filenameLabel: the label for the file name field + descriptionLabel: the label for the description field + commentLabel: the label for the comments field + restrictionLabel: the label in front of the restrictions list + startDoc: the name of a file in curdir that contains some text/markup to be printed *before* the file revision box + endDoc: the name of a file in curdir that contains some text/markup to be printed *after* the file revision box """ global sysno ln = wash_language(form['ln']) _ = gettext_set_language(ln) out = '' ## Fetch parameters defined for this function (minsize, maxsize, doctypes_and_desc, doctypes, can_delete_doctypes, can_revise_doctypes, can_describe_doctypes, can_comment_doctypes, can_keep_doctypes, can_rename_doctypes, can_add_format_to_doctypes, createRelatedFormats_p, can_name_new_files, keep_default, show_links, file_label, filename_label, description_label, comment_label, startDoc, endDoc, restrictions_and_desc, can_restrict_doctypes, restriction_label, doctypes_to_default_filename, max_files_for_doctype) = \ wash_function_parameters(parameters, curdir, ln) try: recid = int(sysno) except: recid = None out += '
' out += startDoc out += create_file_upload_interface(recid, form=form, print_outside_form_tag=True, print_envelope=True, include_headers=True, ln=ln, minsize=minsize, maxsize=maxsize, doctypes_and_desc=doctypes_and_desc, can_delete_doctypes=can_delete_doctypes, can_revise_doctypes=can_revise_doctypes, can_describe_doctypes=can_describe_doctypes, can_comment_doctypes=can_comment_doctypes, can_keep_doctypes=can_keep_doctypes, can_rename_doctypes=can_rename_doctypes, can_add_format_to_doctypes=can_add_format_to_doctypes, create_related_formats=createRelatedFormats_p, can_name_new_files=can_name_new_files, keep_default=keep_default, show_links=show_links, file_label=file_label, filename_label=filename_label, description_label=description_label, comment_label=comment_label, restrictions_and_desc=restrictions_and_desc, can_restrict_doctypes=can_restrict_doctypes, restriction_label=restriction_label, doctypes_to_default_filename=doctypes_to_default_filename, max_files_for_doctype=max_files_for_doctype, sbm_indir=None, sbm_doctype=None, sbm_access=None, uid=None, sbm_curdir=curdir)[1] out += endDoc out += '
' return out def wash_function_parameters(parameters, curdir, ln=CFG_SITE_LANG): """ Returns the functions (admin-defined) parameters washed and initialized properly, as a tuple: Parameters: check Create_Upload_Files_Interface(..) docstring Returns: tuple (minsize, maxsize, doctypes_and_desc, doctypes, can_delete_doctypes, can_revise_doctypes, can_describe_doctypes can_comment_doctypes, can_keep_doctypes, can_rename_doctypes, can_add_format_to_doctypes, createRelatedFormats_p, can_name_new_files, keep_default, show_links, file_label, filename_label, description_label, comment_label, startDoc, endDoc, access_restrictions_and_desc, can_restrict_doctypes, restriction_label, doctypes_to_default_filename, max_files_for_doctype) """ _ = gettext_set_language(ln) # The min and max files sizes that users can upload minsize = parameters['minsize'] maxsize = parameters['maxsize'] # The list of doctypes + description that users can select when # adding new files. If there are no values, then user cannot add # new files. '|' is used to separate doctypes groups, and '=' to # separate doctype and description. Eg: # main=Main document|additional=Figure, schema. etc doctypes_and_desc = [doctype.strip().split("=") for doctype \ in parameters['doctypes'].split('|') \ if doctype.strip() != ''] doctypes = [doctype for (doctype, desc) in doctypes_and_desc] doctypes_and_desc = [[doctype, _(desc)] for \ (doctype, desc) in doctypes_and_desc] # The list of doctypes users are allowed to delete # (list of values separated by "|") can_delete_doctypes = [doctype.strip() for doctype \ in parameters['canDeleteDoctypes'].split('|') \ if doctype.strip() != ''] # The list of doctypes users are allowed to revise # (list of values separated by "|") can_revise_doctypes = [doctype.strip() for doctype \ in parameters['canReviseDoctypes'].split('|') \ if doctype.strip() != ''] # The list of doctypes users are allowed to describe # (list of values separated by "|") can_describe_doctypes = [doctype.strip() for doctype \ in parameters['canDescribeDoctypes'].split('|') \ if doctype.strip() != ''] # The list of doctypes users are allowed to comment # (list of values separated by "|") can_comment_doctypes = [doctype.strip() for doctype \ in parameters['canCommentDoctypes'].split('|') \ if doctype.strip() != ''] # The list of doctypes for which users are allowed to decide # if they want to keep old files or not when revising # (list of values separated by "|") can_keep_doctypes = [doctype.strip() for doctype \ in parameters['canKeepDoctypes'].split('|') \ if doctype.strip() != ''] # The list of doctypes users are allowed to rename # (list of values separated by "|") can_rename_doctypes = [doctype.strip() for doctype \ in parameters['canRenameDoctypes'].split('|') \ if doctype.strip() != ''] # The mapping from doctype to default filename. # '|' is used to separate doctypes groups, and '=' to # separate doctype and file in curdir where the default name is. Eg: # main=main_filename|additional=additional_filename. etc default_doctypes_and_curdir_files = [doctype.strip().split("=") for doctype \ in parameters['defaultFilenameDoctypes'].split('|') \ if doctype.strip() != ''] doctypes_to_default_filename = {} for doctype, curdir_file in default_doctypes_and_curdir_files: default_filename = read_file(curdir, curdir_file) if default_filename: doctypes_to_default_filename[doctype] = os.path.basename(default_filename) # The maximum number of files that can be uploaded for each doctype # Eg: # main=1|additional=3 doctypes_and_max_files = [doctype.strip().split("=") for doctype \ in parameters['maxFilesDoctypes'].split('|') \ if doctype.strip() != ''] max_files_for_doctype = {} for doctype, max_files in doctypes_and_max_files: if max_files.isdigit(): max_files_for_doctype[doctype] = int(max_files) # The list of doctypes for which users are allowed to add new formats # (list of values separated by "|") can_add_format_to_doctypes = [doctype.strip() for doctype \ in parameters['canAddFormatDoctypes'].split('|') \ if doctype.strip() != ''] # The list of access restrictions + description that users can # select when adding new files. If there are no values, no # restriction is applied . '|' is used to separate access # restrictions groups, and '=' to separate access restriction and # description. Eg: main=Main document|additional=Figure, # schema. etc access_restrictions_and_desc = [access.strip().split("=") for access \ in parameters['restrictions'].split('|') \ if access.strip() != ''] access_restrictions_and_desc = [[access, _(desc)] for \ (access, desc) in access_restrictions_and_desc] # The list of doctypes users are allowed to restrict # (list of values separated by "|") can_restrict_doctypes = [restriction.strip() for restriction \ in parameters['canRestrictDoctypes'].split('|') \ if restriction.strip() != ''] # If we should create additional formats when applicable (1) or # not (0) try: createRelatedFormats_p = int(parameters['createRelatedFormats']) except ValueError, e: createRelatedFormats_p = False # If users can name the files they add # Value should be 0 (Cannot rename) or 1 (Can rename) try: can_name_new_files = int(parameters['canNameNewFiles']) except ValueError, e: can_name_new_files = False # The default behaviour wrt keeping previous files or not. # 0 = do not keep, 1 = keep try: keep_default = int(parameters['keepDefault']) except ValueError, e: keep_default = False # If we display links to files (1) or not (0) try: show_links = int(parameters['showLinks']) except ValueError, e: show_links = True file_label = parameters['fileLabel'] if file_label == "": file_label = _('Choose a file') filename_label = parameters['filenameLabel'] if filename_label == "": filename_label = _('Name') description_label = parameters['descriptionLabel'] if description_label == "": description_label = _('Description') comment_label = parameters['commentLabel'] if comment_label == "": comment_label = _('Comment') restriction_label = parameters['restrictionLabel'] if restriction_label == "": restriction_label = _('Access') startDoc = parameters['startDoc'] endDoc = parameters['endDoc'] prefix = read_file(curdir, startDoc) if prefix is None: prefix = "" suffix = read_file(curdir, endDoc) if suffix is None: suffix = "" return (minsize, maxsize, doctypes_and_desc, doctypes, can_delete_doctypes, can_revise_doctypes, can_describe_doctypes, can_comment_doctypes, can_keep_doctypes, can_rename_doctypes, can_add_format_to_doctypes, createRelatedFormats_p, can_name_new_files, keep_default, show_links, file_label, filename_label, description_label, comment_label, prefix, suffix, access_restrictions_and_desc, can_restrict_doctypes, restriction_label, doctypes_to_default_filename, max_files_for_doctype) def read_file(curdir, filename): """ Reads a file in curdir. Returns None if does not exist, cannot be read, or if file is not really in curdir """ try: file_path = os.path.abspath(os.path.join(curdir, filename)) if not file_path.startswith(curdir): return None file_desc = file(file_path, 'r') content = file_desc.read() file_desc.close() except: content = None return content diff --git a/modules/websubmit/lib/functions/Generate_Group_File.py b/modules/websubmit/lib/functions/Generate_Group_File.py index 5caddb293..d5bd5e2b4 100644 --- a/modules/websubmit/lib/functions/Generate_Group_File.py +++ b/modules/websubmit/lib/functions/Generate_Group_File.py @@ -1,61 +1,61 @@ ## This file is part of Invenio. ## Copyright (C) 2008, 2010, 2011 CERN. ## ## Invenio is free software; you can redistribute it and/or ## modify it under the terms of the GNU General Public License as ## published by the Free Software Foundation; either version 2 of the ## License, or (at your option) any later version. ## ## Invenio is distributed in the hope that it will be useful, but ## WITHOUT ANY WARRANTY; without even the implied warranty of ## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ## General Public License for more details. ## ## You should have received a copy of the GNU General Public License ## along with Invenio; if not, write to the Free Software Foundation, Inc., ## 59 Temple Place, Suite 330, Boston, MA 02111-1307, USA. from invenio.errorlib import register_exception from invenio.websubmit_config import InvenioWebSubmitFunctionError CFG_WEBSUBMIT_GROUP_FILE_NAME = "Group" def Generate_Group_File(parameters, curdir, form, user_info=None): """ Generates a group file (stored in 'curdir/Group') for use with publiline. @param parameters: (dictionary) - must contain: + group_name: (string) - the id of the Group for use in the complex approval refereeing workflow @param curdir: (string) - the current submission's working directory. @param form: (dictionary) - form fields. @param user_info: (dictionary) - various information about the submitting user (includes the apache req object). @return: (string) - empty string. @Exceptions raised: InvenioWebSubmitFunctionError when an unexpected error is encountered. """ try: group_file = open("%s/%s" % (curdir, CFG_WEBSUBMIT_GROUP_FILE_NAME), "w") group_file.write(parameters['group_name']) group_file.flush() group_file.close() except IOError, err: ## Unable to write the Group file to curdir. err_msg = "Error: Unable to create Group file [%s/%s]. " \ "Perhaps check directory permissions. " \ % (curdir, CFG_WEBSUBMIT_GROUP_FILE_NAME) - register_exception(req=req_obj, prefix=err_msg) + register_exception(prefix=err_msg) raise InvenioWebSubmitFunctionError(err_msg) ## Return an empty string: return "" diff --git a/modules/websubmit/lib/functions/Move_Files_to_Storage.py b/modules/websubmit/lib/functions/Move_Files_to_Storage.py index 0323be33c..52e76f2f8 100644 --- a/modules/websubmit/lib/functions/Move_Files_to_Storage.py +++ b/modules/websubmit/lib/functions/Move_Files_to_Storage.py @@ -1,270 +1,270 @@ ## This file is part of Invenio. ## Copyright (C) 2007, 2008, 2009, 2010, 2011 CERN. ## ## Invenio is free software; you can redistribute it and/or ## modify it under the terms of the GNU General Public License as ## published by the Free Software Foundation; either version 2 of the ## License, or (at your option) any later version. ## ## Invenio is distributed in the hope that it will be useful, but ## WITHOUT ANY WARRANTY; without even the implied warranty of ## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ## General Public License for more details. ## ## You should have received a copy of the GNU General Public License ## along with Invenio; if not, write to the Free Software Foundation, Inc., ## 59 Temple Place, Suite 330, Boston, MA 02111-1307, USA. """Function for archiving files""" __revision__ = "$Id$" from invenio.bibdocfile import \ BibRecDocs, \ decompose_file, \ - InvenioWebSubmitFileError, \ - CFG_WEBSUBMIT_DEFAULT_ICON_SUBFORMAT + InvenioBibDocFileError, \ + CFG_BIBDOCFILE_DEFAULT_ICON_SUBFORMAT import os import re from invenio.websubmit_icon_creator import create_icon from invenio.websubmit_config import InvenioWebSubmitFunctionWarning from invenio.websubmit_functions.Shared_Functions import get_dictionary_from_string, \ createRelatedFormats from invenio.errorlib import register_exception from invenio.config import CFG_BINDIR from invenio.dbquery import run_sql from invenio.shellutils import run_shell_command def Move_Files_to_Storage(parameters, curdir, form, user_info=None): """ The function moves files received from the standard submission's form through file input element(s). The document are assigned a 'doctype' (or category) corresponding to the file input element (eg. a file uploaded throught 'DEMOPIC_FILE' will go to 'DEMOPIC_FILE' doctype/category). Websubmit engine builds the following file organization in the directory curdir/files: curdir/files | _____________________________________________________________________ | | | ./file input 1 element's name ./file input 2 element's name .... (for eg. 'DEMOART_MAILFILE') (for eg. 'DEMOART_APPENDIX') | | test1.pdf test2.pdf There is only one instance of all possible extension(pdf, gz...) in each part otherwise we may encounter problems when renaming files. + parameters['rename']: if given, all the files in curdir/files are renamed. parameters['rename'] is of the form: elemfilename[re]* where re is an regexp to select(using re.sub) what part of the elem file has to be selected. e.g: file:TEST_FILE_RN + parameters['documenttype']: if given, other formats are created. It has 2 possible values: - if "picture" icon in gif format is created - if "fulltext" ps, gz .... formats are created + parameters['paths_and_suffixes']: directories to look into and corresponding suffix to add to every file inside. It must have the same structure as a Python dictionnary of the following form {'FrenchAbstract':'french', 'EnglishAbstract':''} The keys are the file input element name from the form <=> directories in curdir/files The values associated are the suffixes which will be added to all the files in e.g. curdir/files/FrenchAbstract + parameters['iconsize'] need only if 'icon' is selected in parameters['documenttype'] + parameters['paths_and_restrictions']: the restrictions to apply to each uploaded file. The parameter must have the same structure as a Python dictionnary of the following form: {'DEMOART_APPENDIX':'restricted'} Files not specified in this parameter are not restricted. The specified restrictions can include a variable that can be replaced at runtime, for eg: {'DEMOART_APPENDIX':'restricted to file:SuE'} + parameters['paths_and_doctypes']: if a doctype is specified, the file will be saved under the 'doctype/collection' instead of under the default doctype/collection given by the name of the upload element that was used on the websubmit interface. to configure the doctype in websubmit, enter the value as in a dictionnary, for eg: {'PATHS_SWORD_UPL' : 'PUSHED_TO_ARXIV'} -> from Demo_Export_Via_Sword [DEMOSWR] Document Types """ global sysno paths_and_suffixes = parameters['paths_and_suffixes'] paths_and_restrictions = parameters['paths_and_restrictions'] rename = parameters['rename'] documenttype = parameters['documenttype'] iconsizes = parameters['iconsize'].split(',') paths_and_doctypes = parameters['paths_and_doctypes'] ## Create an instance of BibRecDocs for the current recid(sysno) bibrecdocs = BibRecDocs(sysno) paths_and_suffixes = get_dictionary_from_string(paths_and_suffixes) paths_and_restrictions = get_dictionary_from_string(paths_and_restrictions) paths_and_doctypes = get_dictionary_from_string(paths_and_doctypes) ## Go through all the directories specified in the keys ## of parameters['paths_and_suffixes'] for path in paths_and_suffixes.keys(): ## Check if there is a directory for the current path if os.path.exists("%s/files/%s" % (curdir, path)): ## Retrieve the restriction to apply to files in this ## directory restriction = paths_and_restrictions.get(path, '') restriction = re.sub('(?P[^<]*)', get_pa_tag_content, restriction) ## Go through all the files in curdir/files/path for current_file in os.listdir("%s/files/%s" % (curdir, path)): ## retrieve filename and extension dummy, filename, extension = decompose_file(current_file) if extension and extension[0] != ".": extension = '.' + extension if len(paths_and_suffixes[path]) != 0: extension = "_%s%s" % (paths_and_suffixes[path], extension) ## Build the new file name if rename parameter has been given if rename: filename = re.sub('(?P[^<]*)', \ get_pa_tag_content, \ parameters['rename']) if rename or len(paths_and_suffixes[path]) != 0 : ## Rename the file try: # Write the log rename_cmd fd = open("%s/rename_cmd" % curdir, "a+") fd.write("%s/files/%s/%s" % (curdir, path, current_file) + " to " +\ "%s/files/%s/%s%s" % (curdir, path, filename, extension) + "\n\n") ## Rename os.rename("%s/files/%s/%s" % (curdir, path, current_file), \ "%s/files/%s/%s%s" % (curdir, path, filename, extension)) fd.close() ## Save the new name in a text file in curdir so that ## the new filename can be used by templates to created the recmysl fd = open("%s/%s_RENAMED" % (curdir, path), "w") fd.write("%s%s" % (filename, extension)) fd.close() except OSError, err: msg = "Cannot rename the file.[%s]" msg %= str(err) raise InvenioWebSubmitFunctionWarning(msg) fullpath = "%s/files/%s/%s%s" % (curdir, path, filename, extension) ## Check if there is any existing similar file if not bibrecdocs.check_file_exists(fullpath, extension): bibdoc = bibrecdocs.add_new_file(fullpath, doctype=paths_and_doctypes.get(path, path), never_fail=True) bibdoc.set_status(restriction) ## Fulltext if documenttype == "fulltext": additionalformats = createRelatedFormats(fullpath) if len(additionalformats) > 0: for additionalformat in additionalformats: try: bibrecdocs.add_new_format(additionalformat) - except InvenioWebSubmitFileError: + except InvenioBibDocFileError: pass ## Icon elif documenttype == "picture": has_added_default_icon_subformat_p = False for iconsize in iconsizes: try: iconpath, iconname = create_icon({ 'input-file' : fullpath, 'icon-scale' : iconsize, 'icon-name' : None, 'icon-file-format' : None, 'multipage-icon' : False, 'multipage-icon-delay' : 100, 'verbosity' : 0, }) except Exception, e: register_exception(prefix='Impossible to create icon for %s (record %s)' % (fullpath, sysno), alert_admin=True) continue iconpath = os.path.join(iconpath, iconname) docname = decompose_file(fullpath)[1] try: mybibdoc = bibrecdocs.get_bibdoc(docname) - except InvenioWebSubmitFileError: + except InvenioBibDocFileError: mybibdoc = None if iconpath is not None and mybibdoc is not None: try: icon_suffix = iconsize.replace('>', '').replace('<', '').replace('^', '').replace('!', '') if not has_added_default_icon_subformat_p: mybibdoc.add_icon(iconpath) has_added_default_icon_subformat_p = True else: - mybibdoc.add_icon(iconpath, subformat=CFG_WEBSUBMIT_DEFAULT_ICON_SUBFORMAT + "-" + icon_suffix) + mybibdoc.add_icon(iconpath, subformat=CFG_BIBDOCFILE_DEFAULT_ICON_SUBFORMAT + "-" + icon_suffix) ## Save the new icon filename in a text file in curdir so that ## it can be used by templates to created the recmysl try: if not has_added_default_icon_subformat_p: fd = open("%s/%s_ICON" % (curdir, path), "w") else: fd = open("%s/%s_ICON_%s" % (curdir, path, iconsize + '_' + icon_suffix), "w") fd.write(os.path.basename(iconpath)) fd.close() except OSError, err: msg = "Cannot store icon filename.[%s]" msg %= str(err) raise InvenioWebSubmitFunctionWarning(msg) - except InvenioWebSubmitFileError, e: + except InvenioBibDocFileError, e: # Most probably icon already existed. pass elif mybibdoc is not None: mybibdoc.delete_icon() # Update the MARC bibdocfile_bin = os.path.join(CFG_BINDIR, 'bibdocfile --yes-i-know') run_shell_command(bibdocfile_bin + " --fix-marc --recid=%s", (str(sysno),)) # Delete the HB BibFormat cache in the DB, so that the fulltext # links do not point to possible dead files run_sql("DELETE LOW_PRIORITY from bibfmt WHERE format='HB' AND id_bibrec=%s", (sysno,)) return "" def get_pa_tag_content(pa_content): """Get content for XXX. @param pa_content: MatchObject for (.*). return: the content of the file possibly filtered by an regular expression if pa_content=file[re]:a_file => first line of file a_file matching re if pa_content=file*p[re]:a_file => all lines of file a_file, matching re, separated by - (dash) char. """ pa_content = pa_content.groupdict()['content'] sep = '-' out = '' if pa_content.startswith('file'): filename = "" regexp = "" if "[" in pa_content: split_index_start = pa_content.find("[") split_index_stop = pa_content.rfind("]") regexp = pa_content[split_index_start+1:split_index_stop] filename = pa_content[split_index_stop+2:]## ]: else : filename = pa_content.split(":")[1] if os.path.exists(os.path.join(curdir, filename)): fp = open(os.path.join(curdir, filename), 'r') if pa_content[:5] == "file*": out = sep.join(map(lambda x: re.split(regexp, x.strip())[-1], fp.readlines())) else: out = re.split(regexp, fp.readline().strip())[-1] fp.close() return out diff --git a/modules/websubmit/lib/functions/Move_Photos_to_Storage.py b/modules/websubmit/lib/functions/Move_Photos_to_Storage.py index 2de1babbd..243b52ee5 100644 --- a/modules/websubmit/lib/functions/Move_Photos_to_Storage.py +++ b/modules/websubmit/lib/functions/Move_Photos_to_Storage.py @@ -1,553 +1,553 @@ ## This file is part of Invenio. ## Copyright (C) 2010, 2011 CERN. ## ## Invenio is free software; you can redistribute it and/or ## modify it under the terms of the GNU General Public License as ## published by the Free Software Foundation; either version 2 of the ## License, or (at your option) any later version. ## ## Invenio is distributed in the hope that it will be useful, but ## WITHOUT ANY WARRANTY; without even the implied warranty of ## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ## General Public License for more details. ## ## You should have received a copy of the GNU General Public License ## along with Invenio; if not, write to the Free Software Foundation, Inc., ## 59 Temple Place, Suite 330, Boston, MA 02111-1307, USA. """WebSubmit function - Batch photo uploader To be used with WebSubmit element 'Upload_Photos' or one of its derivatives in order to create a batch photos uploader. Requirements: ============= JQuery: - jquery.min.js JQuery UI: - jquery-ui.min.js - UI "base" theme: - jquery.ui.slider.css - jquery.ui.core.css - jquery.ui.theme.css - images Uploadify 2.0.1 (JQuery plugin): - jquery.uploadify.min.js - sfwobject.js - uploadify.css - cancel.png - uploadify.swf, uploadify.allglyphs.swf and uploadify.fla """ import os import time import re from urllib import quote from cgi import escape -from invenio.bibdocfile import BibRecDocs, InvenioWebSubmitFileError +from invenio.bibdocfile import BibRecDocs, InvenioBibDocFileError from invenio.config import CFG_BINDIR, CFG_SITE_URL from invenio.dbquery import run_sql from invenio.websubmit_icon_creator import create_icon, InvenioWebSubmitIconCreatorError -from invenio.websubmit_config import CFG_WEBSUBMIT_DEFAULT_ICON_SUBFORMAT +from invenio.bibdocfile_config import CFG_BIBDOCFILE_DEFAULT_ICON_SUBFORMAT def Move_Photos_to_Storage(parameters, curdir, form, user_info=None): """ The function moves files received from the submission's form through the PHOTO_MANAGER element and its asynchronous uploads at CFG_SITE_URL/submit/uploadfile. Parameters: @iconsize - Seperate multiple sizes with commas. The ImageMagick geometry inputs are supported. Use type 'geometry' as defined in ImageMagick. (eg. 320 or 320x240 or 100> or 5%) Example: "180>,700>" will create two icons, one with maximum dimension 180px, one 700px @iconformat - Allowed extensions (as defined in websubmit_icon_creator.py) are: "pdf", "gif", "jpg", "jpeg", "ps", "png", "bmp" "eps", "epsi", "epsf" The PHOTO_MANAGER elements builds the following file organization in the directory curdir:: curdir/ | ______________________________________________________________________ | | | files/ PHOTO_MANAGER_ICONS icons/ | PHOTO_MANAGER_ORDER | (user id)/ PHOTO_MANAGER_DELETE (user id)/ | PHOTO_MANAGER_NEW | NewFile/ PHOTO_MANAGER_DESCRIPTION_X NewFile/ | | _______________________ _____________________ | | | | | | photo1.jpg myPhoto.gif ... photo1.jpg myPhoto.gif ... where the files are: - PHOTO_MANAGER_ORDER: ordered list of file IDs. One per line. - PHOTO_MANAGER_ICONS: mappings from file IDs to URL of the icons. One per line. Separator: / - PHOTO_MANAGER_NEW: mapping from file ID to filename on disk. Only applicable to files that have just been uploaded (i.e. not bibdocfiles). One per line. Separator: / - PHOTO_MANAGER_DELETE: list of files IDs that must be deleted. One per line - PHOTO_MANAGER_DESCRIPTION_X, where X is file ID: contains photos descriptions (one per file) """ global sysno icon_sizes = parameters.get('iconsize').split(',') icon_format = parameters.get('iconformat') if not icon_format: icon_format = 'gif' PHOTO_MANAGER_ICONS = read_param_file(curdir, 'PHOTO_MANAGER_ICONS', split_lines=True) photo_manager_icons_dict = dict([value.split('/', 1) \ for value in PHOTO_MANAGER_ICONS \ if '/' in value]) PHOTO_MANAGER_ORDER = read_param_file(curdir, 'PHOTO_MANAGER_ORDER', split_lines=True) photo_manager_order_list = [value for value in PHOTO_MANAGER_ORDER if value.strip()] PHOTO_MANAGER_DELETE = read_param_file(curdir, 'PHOTO_MANAGER_DELETE', split_lines=True) photo_manager_delete_list = [value for value in PHOTO_MANAGER_DELETE if value.strip()] PHOTO_MANAGER_NEW = read_param_file(curdir, 'PHOTO_MANAGER_NEW', split_lines=True) photo_manager_new_dict = dict([value.split('/', 1) \ for value in PHOTO_MANAGER_NEW \ if '/' in value]) ## Create an instance of BibRecDocs for the current recid(sysno) bibrecdocs = BibRecDocs(sysno) for photo_id in photo_manager_order_list: photo_description = read_param_file(curdir, 'PHOTO_MANAGER_DESCRIPTION_' + photo_id) # We must take different actions depending if we deal with a # file that already exists, or if it is a new file if photo_id in photo_manager_new_dict.keys(): # New file if photo_id not in photo_manager_delete_list: filename = photo_manager_new_dict[photo_id] filepath = os.path.join(curdir, 'files', str(user_info['uid']), 'NewFile', filename) icon_filename = os.path.splitext(filename)[0] + ".gif" fileiconpath = os.path.join(curdir, 'icons', str(user_info['uid']), 'NewFile', icon_filename) # Add the file if os.path.exists(filepath): _do_log(curdir, "Adding file %s" % filepath) bibdoc = bibrecdocs.add_new_file(filepath, doctype="picture", never_fail=True) has_added_default_icon_subformat_p = False for icon_size in icon_sizes: # Create icon if needed try: (icon_path, icon_name) = create_icon( { 'input-file' : filepath, 'icon-name' : icon_filename, 'icon-file-format' : icon_format, 'multipage-icon' : False, 'multipage-icon-delay' : 100, 'icon-scale' : icon_size, # Resize only if width > 300 'verbosity' : 0, }) fileiconpath = os.path.join(icon_path, icon_name) except InvenioWebSubmitIconCreatorError, e: _do_log(curdir, "Icon could not be created to %s: %s" % (filepath, e)) pass if os.path.exists(fileiconpath): try: if not has_added_default_icon_subformat_p: bibdoc.add_icon(fileiconpath) has_added_default_icon_subformat_p = True _do_log(curdir, "Added icon %s" % fileiconpath) else: icon_suffix = icon_size.replace('>', '').replace('<', '').replace('^', '').replace('!', '') - bibdoc.add_icon(fileiconpath, subformat=CFG_WEBSUBMIT_DEFAULT_ICON_SUBFORMAT + "-" + icon_suffix) + bibdoc.add_icon(fileiconpath, subformat=CFG_BIBDOCFILE_DEFAULT_ICON_SUBFORMAT + "-" + icon_suffix) _do_log(curdir, "Added icon %s" % fileiconpath) - except InvenioWebSubmitFileError, e: + except InvenioBibDocFileError, e: # Most probably icon already existed. pass if photo_description and bibdoc: for file_format in [bibdocfile.get_format() \ for bibdocfile in bibdoc.list_latest_files()]: bibdoc.set_comment(photo_description, file_format) _do_log(curdir, "Added comment %s" % photo_description) else: # Existing file bibdocname = bibrecdocs.get_docname(int(photo_id)) if photo_id in photo_manager_delete_list: # In principle we should not get here. but just in case... bibrecdocs.delete_bibdoc(bibdocname) _do_log(curdir, "Deleted %s" % bibdocname) else: bibdoc = bibrecdocs.get_bibdoc(bibdocname) for file_format in [bibdocfile.get_format() \ for bibdocfile in bibdoc.list_latest_files()]: bibdoc.set_comment(photo_description, file_format) _do_log(curdir, "Added comment %s" % photo_description) # Now delete requeted files for photo_id in photo_manager_delete_list: try: bibdocname = bibrecdocs.get_docname(int(photo_id)) bibrecdocs.delete_bibdoc(bibdocname) _do_log(curdir, "Deleted %s" % bibdocname) except: # we tried to delete a photo that does not exist (maybe already deleted) pass # Update the MARC _do_log(curdir, "Asking bibdocfile to fix marc") bibdocfile_bin = os.path.join(CFG_BINDIR, 'bibdocfile --yes-i-know') os.system(bibdocfile_bin + " --fix-marc --recid=" + str(sysno)) # Delete the HB BibFormat cache in the DB, so that the fulltext # links do not point to possible dead files run_sql("DELETE LOW_PRIORITY from bibfmt WHERE format='HB' AND id_bibrec=%s", (sysno,)) return "" def read_param_file(curdir, param, split_lines=False): "Helper function to access files in submission dir" param_value = "" path = os.path.join(curdir, param) try: if os.path.abspath(path).startswith(curdir): fd = file(path) if split_lines: param_value = [line.strip() for line in fd.readlines()] else: param_value = fd.read() fd.close() except Exception, e: _do_log(curdir, 'Could not read %s: %s' % (param, e)) pass return param_value def _do_log(log_dir, msg): """ Log what we have done, in case something went wrong. Nice to compare with bibdocactions.log Should be removed when the development is over. """ log_file = os.path.join(log_dir, 'performed_actions.log') file_desc = open(log_file, "a+") file_desc.write("%s --> %s\n" %(time.strftime("%Y-%m-%d %H:%M:%S"), msg)) file_desc.close() def get_session_id(req, uid, user_info): """ Returns by all means the current session id of the user. Raises ValueError if cannot be found """ # Get the session id ## This can be later simplified once user_info object contain 'sid' key session_id = None try: try: session_id = req._session.sid() except AttributeError, e: # req was maybe not available (for eg. when this is run # through Create_Modify_Interface.py) session_id = user_info['session'] except Exception, e: raise ValueError("Cannot retrieve user session") return session_id def create_photos_manager_interface(sysno, session_id, uid, doctype, indir, curdir, access, can_delete_photos=True, can_reorder_photos=True, can_upload_photos=True, editor_width=None, editor_height=None, initial_slider_value=100, max_slider_value=200, min_slider_value=80): """ Creates and returns the HTML of the photos manager interface for submissions. @param sysno: current record id @param session_id: user session_id (as retrieved by get_session_id(...) ) @param uid: user id @param doctype: doctype of the submission @param indir: submission "indir" @param curdir: submission "curdir" @param access: submission "access" @param can_delete_photos: if users can delete photos @param can_reorder_photos: if users can reorder photos @param can_upload_photos: if users can upload photos @param editor_width: width (in pixels) of the editor @param editor_height: height (in pixels) of the editor @param initial_slider_value: initial value of the photo size slider @param max_slider_value: max value of the photo size slider @param min_slider_value: min value of the photo size slider """ out = '' PHOTO_MANAGER_ICONS = read_param_file(curdir, 'PHOTO_MANAGER_ICONS', split_lines=True) photo_manager_icons_dict = dict([value.split('/', 1) for value in PHOTO_MANAGER_ICONS if '/' in value]) PHOTO_MANAGER_ORDER = read_param_file(curdir, 'PHOTO_MANAGER_ORDER', split_lines=True) photo_manager_order_list = [value for value in PHOTO_MANAGER_ORDER if value.strip()] PHOTO_MANAGER_DELETE = read_param_file(curdir, 'PHOTO_MANAGER_DELETE', split_lines=True) photo_manager_delete_list = [value for value in PHOTO_MANAGER_DELETE if value.strip()] PHOTO_MANAGER_NEW = read_param_file(curdir, 'PHOTO_MANAGER_NEW', split_lines=True) photo_manager_new_dict = dict([value.split('/', 1) for value in PHOTO_MANAGER_NEW if '/' in value]) photo_manager_descriptions_dict = {} # Compile a regular expression that can match the "default" icon, # and not larger version. - CFG_WEBSUBMIT_ICON_SUBFORMAT_RE_DEFAULT = re.compile(CFG_WEBSUBMIT_DEFAULT_ICON_SUBFORMAT + '\Z') + CFG_BIBDOCFILE_ICON_SUBFORMAT_RE_DEFAULT = re.compile(CFG_BIBDOCFILE_DEFAULT_ICON_SUBFORMAT + '\Z') # Load the existing photos from the DB if we are displaying # this interface for the first time, and if a record exists if sysno and not PHOTO_MANAGER_ORDER: bibarchive = BibRecDocs(sysno) for doc in bibarchive.list_bibdocs(): if doc.get_icon() is not None: original_url = doc.list_latest_files()[0].get_url() doc_id = str(doc.get_id()) - icon_url = doc.get_icon(subformat_re=CFG_WEBSUBMIT_ICON_SUBFORMAT_RE_DEFAULT).get_url() # Get "default" icon + icon_url = doc.get_icon(subformat_re=CFG_BIBDOCFILE_ICON_SUBFORMAT_RE_DEFAULT).get_url() # Get "default" icon description = "" for bibdoc_file in doc.list_latest_files(): #format = bibdoc_file.get_format().lstrip('.').upper() #url = bibdoc_file.get_url() #photo_files.append((format, url)) if not description and bibdoc_file.get_comment(): description = escape(bibdoc_file.get_comment()) name = doc.get_docname() photo_manager_descriptions_dict[doc_id] = description photo_manager_icons_dict[doc_id] = icon_url photo_manager_order_list.append(doc_id) # FIXME: respect order # Prepare the list of photos to display. photos_img = [] for doc_id in photo_manager_order_list: if not photo_manager_icons_dict.has_key(doc_id): continue icon_url = photo_manager_icons_dict[doc_id] if PHOTO_MANAGER_ORDER: # Get description from disk only if some changes have been done description = escape(read_param_file(curdir, 'PHOTO_MANAGER_DESCRIPTION_' + doc_id)) else: description = escape(photo_manager_descriptions_dict[doc_id]) photos_img.append('''
  • ''' % \ {'initial_slider_value': initial_slider_value, 'doc_id': doc_id, 'icon_url': icon_url, 'description': description}) out += '''
      %(photos_img)s
    ''' % {'CFG_SITE_URL': CFG_SITE_URL, #'curdir': cgi.escape(quote(curdir, safe="")),#quote(curdir, safe=""), 'uid': uid, 'access': quote(access, safe=""), 'doctype': quote(doctype, safe=""), 'indir': quote(indir, safe=""), 'session_id': quote(session_id, safe=""), 'PHOTO_MANAGER_ICONS': '\n'.join([key + '/' + value for key, value in photo_manager_icons_dict.iteritems()]), 'PHOTO_MANAGER_ORDER': '\n'.join(photo_manager_order_list), 'PHOTO_MANAGER_DELETE': '\n'.join(photo_manager_delete_list), 'PHOTO_MANAGER_NEW': '\n'.join([key + '/' + value for key, value in photo_manager_new_dict.iteritems()]), 'initial_slider_value': initial_slider_value, 'max_slider_value': max_slider_value, 'min_slider_value': min_slider_value, 'photos_img': '\n'.join(photos_img), 'hide_photo_viewer': (len(photos_img) == 0 and len(photo_manager_new_dict.keys()) == 0) and 'display:none;' or '', 'delete_hover_class': can_delete_photos and "#sortable li div.imgBlock:hover .hidden {display:inline;}" or '', 'can_reorder_photos': can_reorder_photos and 'true' or 'false', 'can_upload_photos': can_upload_photos and 'true' or 'false', 'upload_display': not can_upload_photos and 'display: none' or '', 'editor_width_style': editor_width and 'width:%spx;' % editor_width or '', 'editor_height_style': editor_height and 'height:%spx;' % editor_height or ''} return out diff --git a/modules/websubmit/lib/functions/Move_Revised_Files_to_Storage.py b/modules/websubmit/lib/functions/Move_Revised_Files_to_Storage.py index 418eacf9e..d082ee894 100644 --- a/modules/websubmit/lib/functions/Move_Revised_Files_to_Storage.py +++ b/modules/websubmit/lib/functions/Move_Revised_Files_to_Storage.py @@ -1,418 +1,418 @@ ## $Id: Move_Revised_Files_to_Storage.py,v 1.20 2009/03/26 13:48:42 jerome Exp $ ## This file is part of Invenio. ## Copyright (C) 2009, 2010, 2011 CERN. ## ## Invenio is free software; you can redistribute it and/or ## modify it under the terms of the GNU General Public License as ## published by the Free Software Foundation; either version 2 of the ## License, or (at your option) any later version. ## ## Invenio is distributed in the hope that it will be useful, but ## WITHOUT ANY WARRANTY; without even the implied warranty of ## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ## General Public License for more details. ## ## You should have received a copy of the GNU General Public License ## along with Invenio; if not, write to the Free Software Foundation, Inc., ## 59 Temple Place, Suite 330, Boston, MA 02111-1307, USA. """WebSubmit function - Archives uploaded files TODO: - Add parameter 'elementNameToFilename' so that files to revise can be matched by name instead of doctype. - Icons are created only for uploaded files, but not for related format created on the fly. """ __revision__ = "$Id$" import time import os from invenio.bibdocfile import \ - InvenioWebSubmitFileError, \ + InvenioBibDocFileError, \ BibRecDocs from invenio.errorlib import register_exception from invenio.websubmit_icon_creator import \ create_icon, InvenioWebSubmitIconCreatorError from invenio.config import CFG_BINDIR from invenio.dbquery import run_sql from invenio.websubmit_functions.Shared_Functions import \ createRelatedFormats -from invenio.websubmit_managedocfiles import get_description_and_comment +from invenio.bibdocfile_managedocfiles import get_description_and_comment def Move_Revised_Files_to_Storage(parameters, curdir, form, user_info=None): """ The function revises the files of a record with the newly uploaded files. This function can work only if you can define a mapping from the WebSubmit element name that uploads the file, to the doctype of the file. In most cases, the doctype is equivalent to the element name, or just map to 'Main' doctype. That is typically the case if you use the Move_Files_to_Storage.py function to upload the files at submission step. For eg. with the DEMOBOO submission of the Atlantis Demo site, a file is uploaded thanks to the DEMOBOO_FILE element/File input, which is mapped to doctype DEMOBOO_FILE. The function ignores files for which multiple files exist for a single doctype in the record, or when several files are uploaded with the same element name. If the record to revise does not have a corresponding file, the file is inserted This function is similar to Move_Uploaded_Files_to_Storage.py, excepted that Move_Uploaded_Files_to_Storage relies on files uploaded from the web interface created by Create_Upload_Files_Interface.py, while this function relies on the files uploaded by a regular WebSubmit page that you have built from WebSubmit admin: Regular WebSubmit interface --(upload file)--> Move_Revised_Files_to_Storage.py Create_Upload_Files_Interface.py --(upload file)--> Move_Uploaded_Files_to_Storage.py The main advantages of this function over the functions Create_Upload_Files_Interface.py/Move_Uploaded_Files_to_Storage is that it lets you customize the display of your submission in the way you want, which could be simpler for your users if you usually only upload a few and fixed number of files per record. The disadvantages are that this function is not capable of : deleting files, adding an alternative format to a file, add a variable number of files, does not allow to set permissions at the level of file, does not support user comments, renaming, etc. @param parameters:(dictionary) - must contain: + elementNameToDoctype: maps an element/field name to a doctype. Eg. the file uploaded from the DEMOBOO_FILE element (input file tag) should revise the file with document type (doctype) "Main": DEMOBOO_FILE=Main|DEMOBOO_FILE_2=ADDITIONAL ('=' separates element name and doctype '|' separates each doctype/element name group) In most cases, the element name == doctype: DEMOBOO_FILE=DEMOBOO_FILE|DEMOBOO_FILE_2=DEMOBOO_FILE_2 + createIconDoctypes: the list of doctypes for which an icon should be created when revising the file. Eg: Figure|Graph ('|' separated values) Use '*' for all doctypes + iconsize: size of the icon to create (when applicable) + keepPreviousVersionDoctypes: the list of doctypes for which the function should keep previous versions visible when revising a file. Eg: Main|Additional ('|' separated values) Default is all + createRelatedFormats: if uploaded files get converted to whatever format we can (1) or not (0) """ # pylint: disable=E0602 # sysno is defined in the WebSubmit functions sandbox. global sysno bibrecdocs = BibRecDocs(int(sysno)) # Wash function parameters (element_name_and_doctype, create_icon_doctypes, iconsize, keep_previous_version_doctypes, createRelatedFormats_p) = \ wash_function_parameters(parameters, curdir) for element_name, doctype in element_name_and_doctype: _do_log(curdir, "Processing " + element_name) # Check if there is a corresponding file file_path = os.path.join(curdir, 'files', element_name, read_file(curdir, element_name)) if file_path and os.path.exists(file_path): # Now identify which file to revise files_in_record = bibrecdocs.list_bibdocs(doctype) if len(files_in_record) == 1: # Ok, we can revise bibdoc_name = files_in_record[0].get_docname() revise(bibrecdocs, curdir, sysno, file_path, bibdoc_name, doctype, iconsize, create_icon_doctypes, keep_previous_version_doctypes, createRelatedFormats_p) elif len(files_in_record) == 0: # We must add the file add(bibrecdocs, curdir, sysno, file_path, doctype, iconsize, create_icon_doctypes, createRelatedFormats_p) else: _do_log(curdir, " %s ignored, because multiple files found for same doctype %s in record %s: %s" %\ (element_name, doctype, sysno, ', '.join(files_in_record))) else: _do_log(curdir, " No corresponding file found (%s)" % file_path) # Update the MARC bibdocfile_bin = os.path.join(CFG_BINDIR, 'bibdocfile --yes-i-know') os.system(bibdocfile_bin + " --fix-marc --recid=" + sysno) # Delete the HB BibFormat cache in the DB, so that the fulltext # links do not point to possible dead files run_sql("DELETE LOW_PRIORITY from bibfmt WHERE format='HB' AND id_bibrec=%s", (sysno,)) # pylint: enable=E0602 def add(bibrecdocs, curdir, sysno, file_path, doctype, iconsize, create_icon_doctypes, createRelatedFormats_p): """ Adds the file using bibdocfile """ try: # Add file bibdoc = bibrecdocs.add_new_file(file_path, doctype, never_fail=True) _do_log(curdir, ' Added ' + bibdoc.get_docname() + ': ' + \ file_path) # Add icon iconpath = '' if doctype in create_icon_doctypes or \ '*' in create_icon_doctypes: iconpath = _create_icon(file_path, iconsize) if iconpath is not None: bibdoc.add_icon(iconpath) _do_log(curdir, ' Added icon to ' + \ bibdoc.get_docname() + ': ' + iconpath) # Automatically create additional formats when # possible. additional_formats = [] if createRelatedFormats_p: additional_formats = createRelatedFormats(file_path, overwrite=False) for additional_format in additional_formats: bibdoc.add_new_format(additional_format, bibdoc.get_docname()) # Log _do_log(curdir, ' Added format ' + additional_format + \ ' to ' + bibdoc.get_docname() + ': ' + iconpath) - except InvenioWebSubmitFileError, e: + except InvenioBibDocFileError, e: # Format already existed. How come? We should # have checked this in Create_Upload_Files_Interface.py register_exception(prefix='Move_Revised_Files_to_Storage ' \ 'tried to add already existing file %s ' \ 'to record %i. %s' % \ (file_path, sysno, curdir), alert_admin=True) def revise(bibrecdocs, curdir, sysno, file_path, bibdoc_name, doctype, iconsize, create_icon_doctypes, keep_previous_version_doctypes, createRelatedFormats_p): """ Revises the given bibdoc with a new file """ try: # Retrieve the current description and comment, or they # will be lost when revising latest_files = bibrecdocs.list_bibdocs(doctype)[0].list_latest_files() prev_desc, prev_comment = get_description_and_comment(latest_files) if doctype in keep_previous_version_doctypes: # Standard procedure, keep previous version bibdoc = bibrecdocs.add_new_version(file_path, bibdoc_name, prev_desc, prev_comment) _do_log(curdir, ' Revised ' + bibdoc.get_docname() + \ ' with : ' + file_path) else: # Soft-delete previous versions, and add new file # (we need to get the doctype before deleting) if bibrecdocs.has_docname_p(bibdoc_name): # Delete only if bibdoc originally # existed bibrecdocs.delete_bibdoc(bibdoc_name) _do_log(curdir, ' Deleted ' + bibdoc_name) try: bibdoc = bibrecdocs.add_new_file(file_path, doctype, bibdoc_name, never_fail=True, description=prev_desc, comment=prev_comment) _do_log(curdir, ' Added ' + bibdoc.get_docname() + ': ' + \ file_path) - except InvenioWebSubmitFileError, e: + except InvenioBibDocFileError, e: _do_log(curdir, str(e)) register_exception(prefix='Move_Uploaded_Files_to_Storage ' \ 'tried to revise a file %s ' \ 'named %s in record %i. %s' % \ (file_path, bibdoc_name, sysno, curdir), alert_admin=True) # Add icon iconpath = '' if doctype in create_icon_doctypes or \ '*' in create_icon_doctypes: iconpath = _create_icon(file_path, iconsize) if iconpath is not None: bibdoc.add_icon(iconpath) _do_log(curdir, 'Added icon to ' + \ bibdoc.get_docname() + ': ' + iconpath) # Automatically create additional formats when # possible. additional_formats = [] if createRelatedFormats_p: additional_formats = createRelatedFormats(file_path, overwrite=False) for additional_format in additional_formats: bibdoc.add_new_format(additional_format, bibdoc_name, prev_desc, prev_comment) # Log _do_log(curdir, ' Addeded format ' + additional_format + \ ' to ' + bibdoc.get_docname() + ': ' + iconpath) - except InvenioWebSubmitFileError, e: + except InvenioBibDocFileError, e: # Format already existed. How come? We should # have checked this in Create_Upload_Files_Interface.py register_exception(prefix='Move_Revised_Files_to_Storage ' \ 'tried to revise a file %s ' \ 'named %s in record %i. %s' % \ (file_path, bibdoc_name, sysno, curdir), alert_admin=True) def wash_function_parameters(parameters, curdir): """ Returns the functions (admin-defined) parameters washed and initialized properly, as a tuple: Parameters: check Move_Revised_Files_to_Storage(..) docstring Returns: tuple (element_name_and_doctype, create_icon_doctypes, iconsize, keep_previous_version_doctypes, createRelatedFormats_p) """ # The mapping element name -> doctype. # '|' is used to separate mapping groups, and '=' to separate # element name and doctype. # Eg: DEMOBOO_FILE=Main|DEMOBOO_FILEADDITIONAL=Additional File element_name_and_doctype = [mapping.strip().split("=") for mapping \ in parameters['elementNameToDoctype'].split('|') \ if mapping.strip() != ''] # The list of doctypes for which we want to create an icon # (list of values separated by "|") create_icon_doctypes = [doctype.strip() for doctype \ in parameters['createIconDoctypes'].split('|') \ if doctype.strip() != ''] # If we should create additional formats when applicable (1) or # not (0) try: createRelatedFormats_p = int(parameters['createRelatedFormats']) except ValueError, e: createRelatedFormats_p = False # Icons size iconsize = parameters.get('iconsize') # The list of doctypes for which we want to keep previous versions # of files visible. # (list of values separated by "|") keep_previous_version_doctypes = [doctype.strip() for doctype \ in parameters['keepPreviousVersionDoctypes'].split('|') \ if doctype.strip() != ''] if not keep_previous_version_doctypes: # Nothing specified: keep all by default keep_previous_version_doctypes = [doctype for (elem, doctype) \ in element_name_and_doctype] return (element_name_and_doctype, create_icon_doctypes, iconsize, keep_previous_version_doctypes, createRelatedFormats_p) def _do_log(log_dir, msg): """ Log what we have done, in case something went wrong. Nice to compare with bibdocactions.log Should be removed when the development is over. """ log_file = os.path.join(log_dir, 'performed_actions.log') file_desc = open(log_file, "a+") file_desc.write("%s --> %s\n" %(time.strftime("%Y-%m-%d %H:%M:%S"), msg)) file_desc.close() def _create_icon(file_path, icon_size, format='gif', verbosity=9): """ Creates icon of given file. Returns path to the icon. If creation fails, return None, and register exception (send email to admin). Parameters: - file_path : *str* full path to icon - icon_size : *int* the scaling information to be used for the creation of the new icon. - verbosity : *int* the verbosity level under which the program is to run; """ icon_path = None try: filename = os.path.splitext(os.path.basename(file_path))[0] (icon_dir, icon_name) = create_icon( {'input-file':file_path, 'icon-name': "icon-%s" % filename, 'multipage-icon': False, 'multipage-icon-delay': 0, 'icon-scale': icon_size, 'icon-file-format': format, 'verbosity': verbosity}) icon_path = icon_dir + os.sep + icon_name except InvenioWebSubmitIconCreatorError, e: register_exception(prefix='Icon for file %s could not be created: %s' % \ (file_path, str(e)), alert_admin=False) return icon_path def read_file(curdir, filename): """ Reads a file in curdir. Returns None if does not exist, cannot be read, or if file is not really in curdir """ try: file_path = os.path.abspath(os.path.join(curdir, filename)) if not file_path.startswith(curdir): return None file_desc = file(file_path, 'r') content = file_desc.read() file_desc.close() except: content = None return content diff --git a/modules/websubmit/lib/functions/Move_Uploaded_Files_to_Storage.py b/modules/websubmit/lib/functions/Move_Uploaded_Files_to_Storage.py index a91f9b11e..e78bc2e63 100644 --- a/modules/websubmit/lib/functions/Move_Uploaded_Files_to_Storage.py +++ b/modules/websubmit/lib/functions/Move_Uploaded_Files_to_Storage.py @@ -1,79 +1,79 @@ ## $Id: Move_Revised_Files_to_Storage.py,v 1.20 2009/03/26 13:48:42 jerome Exp $ ## This file is part of Invenio. ## Copyright (C) 2009, 2010, 2011 CERN. ## ## Invenio is free software; you can redistribute it and/or ## modify it under the terms of the GNU General Public License as ## published by the Free Software Foundation; either version 2 of the ## License, or (at your option) any later version. ## ## Invenio is distributed in the hope that it will be useful, but ## WITHOUT ANY WARRANTY; without even the implied warranty of ## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ## General Public License for more details. ## ## You should have received a copy of the GNU General Public License ## along with Invenio; if not, write to the Free Software Foundation, Inc., ## 59 Temple Place, Suite 330, Boston, MA 02111-1307, USA. """WebSubmit function - Archives files uploaded with the upload file interface. To be used on par with Create_Upload_Files_Interface.py function: - Create_Upload_Files_Interface records the actions performed by user. - Move_Uploaded_Files_to_Storage executes the recorded actions. NOTE: ===== - Due to the way WebSubmit works, this function can only work when positionned at step 2 in WebSubmit admin, and Create_Upload_Files_Interface is at step 1 """ __revision__ = "$Id$" -from invenio import websubmit_managedocfiles +from invenio import bibdocfile_managedocfiles def Move_Uploaded_Files_to_Storage(parameters, curdir, form, user_info=None): """ The function moves files uploaded using the Create_Upload_Files_Interface.py function. It reads the action previously performed by the user on the files and calls the corresponding functions of bibdocfile. @param parameters:(dictionary) - must contain: + iconsizes: sizes of the icons to create (when applicable), separated by commas. Eg: 180>,700> + createIconDoctypes: the list of doctypes for which an icon should be created. Eg: Figure|Graph ('|' separated values) Use '*' for all doctypes + forceFileRevision: when revising attributes of a file (comment, description) without uploading a new file, force a revision of the current version (so that old comment, description, etc. is kept) (1) or not (0). """ global sysno recid = int(sysno) iconsize = parameters.get('iconsize').split(',') create_icon_doctypes = parameters.get('createIconDoctypes').split('|') force_file_revision = (parameters.get('forceFileRevision') == '1') try: - websubmit_managedocfiles._read_file_revision_interface_configuration_from_disk(curdir) + bibdocfile_managedocfiles._read_file_revision_interface_configuration_from_disk(curdir) except IOError: return - websubmit_managedocfiles.move_uploaded_files_to_storage(curdir, + bibdocfile_managedocfiles.move_uploaded_files_to_storage(curdir, recid, iconsize, create_icon_doctypes, force_file_revision) diff --git a/modules/websubmit/lib/functions/Stamp_Replace_Single_File_Approval.py b/modules/websubmit/lib/functions/Stamp_Replace_Single_File_Approval.py index 76fd1b165..5833ac6f5 100644 --- a/modules/websubmit/lib/functions/Stamp_Replace_Single_File_Approval.py +++ b/modules/websubmit/lib/functions/Stamp_Replace_Single_File_Approval.py @@ -1,511 +1,511 @@ ## This file is part of Invenio. ## Copyright (C) 2008, 2010, 2011 CERN. ## ## Invenio is free software; you can redistribute it and/or ## modify it under the terms of the GNU General Public License as ## published by the Free Software Foundation; either version 2 of the ## License, or (at your option) any later version. ## ## Invenio is distributed in the hope that it will be useful, but ## WITHOUT ANY WARRANTY; without even the implied warranty of ## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ## General Public License for more details. ## ## You should have received a copy of the GNU General Public License ## along with Invenio; if not, write to the Free Software Foundation, Inc., ## 59 Temple Place, Suite 330, Boston, MA 02111-1307, USA. """Stamp_Replace_Single_File_Approval: A function to allow a single file that is already attached to a record to be stamped at approval time. """ __revision__ = "$Id$" -from invenio.bibdocfile import BibRecDocs, InvenioWebSubmitFileError +from invenio.bibdocfile import BibRecDocs, InvenioBibDocFileError from invenio.errorlib import register_exception from invenio import websubmit_file_stamper from invenio.websubmit_config import InvenioWebSubmitFunctionWarning, \ InvenioWebSubmitFunctionError, InvenioWebSubmitFileStamperError import os.path import re import cgi import time def Stamp_Replace_Single_File_Approval(parameters, \ curdir, \ form, \ user_info=None): """ This function is intended to be called when a document has been approved and needs to be stamped. The function should be used when there is ONLY ONE file to be stamped after approval (for example, the "main file"). The name of the file to be stamped should be known and should be stored in a file in the submission's working directory (without the extension). Generally, this will work our fine as the main file is named after the report number of the document, this will be stored in the report number file. @param parameters: (dictionary) - must contain: + latex_template: (string) - the name of the LaTeX template that should be used for the creation of the stamp. + latex_template_vars: (string) - a string-ified dictionary of variables to be replaced in the LaTeX template and the values (or names of files in curdir containing the values) with which to replace them. Use prefix 'FILE:' to specify that the stamped value must be read from a file in submission directory instead of being a fixed value to stamp. E.G.: { 'TITLE' : 'FILE:DEMOTHESIS_TITLE', 'DATE' : 'FILE:DEMOTHESIS_DATE' } + file_to_be_stamped: (string) - this is the name of a file in the submission's working directory that contains the name of the bibdocfile that is to be stamped. + new_file_name: (string) - this is the name of a file in the submission's working directory that contains the name that is to be given to the file after it has been stamped. If empty, or if that file doesn't exist, the file will not be renamed after stamping. + switch_file: (string) - when this value is set, specifies the name of a file that will swith on/off the stamping. The stamp will be applied if the file exists in the submission directory and is not empty. If the file cannot be found or is empty, the stamp is not applied. Useful for eg. if you want to let your users control the stamping with a checkbox on your submission page. Leave this parameter empty to always stamp by default. + stamp: (string) - the type of stamp to be applied to the file. should be one of: + first (only the first page is stamped); + all (all pages are stamped); + coverpage (a separate cover-page is added to the file as a first page); + layer: (string) - the position of the stamp. Should be one of: + background (invisible if original file has a white -not transparent- background layer) + foreground (on top of the stamped file. If the stamp does not have a transparent background, will hide all of the document layers) The default value is 'background'. """ ############ ## Definition of important variables: ############ ## The file stamper needs to be called with a dictionary of options of ## the following format: ## { 'latex-template' : "", ## TEMPLATE_NAME ## 'latex-template-var' : {}, ## TEMPLATE VARIABLES ## 'input-file' : "", ## INPUT FILE ## 'output-file' : "", ## OUTPUT FILE ## 'stamp' : "", ## STAMP TYPE ## 'layer' : "", ## LAYER TO STAMP ## 'verbosity' : 0, ## VERBOSITY (we don't care about it) ## } file_stamper_options = { 'latex-template' : "", 'latex-template-var' : { }, 'input-file' : "", 'output-file' : "", 'stamp' : "", 'layer' : "", 'verbosity' : 0, } ## Check if stamping is enabled switch_file = parameters.get('switch_file', '') if switch_file: # Good, a "switch file" was specified. Check if it exists, and # it its value is not empty. if not _read_in_file(os.path.join(curdir, switch_file)): # File does not exist, or is emtpy. Silently abort # stamping. return "" ## Submission access number: access = _read_in_file("%s/access" % curdir) ## record ID for the current submission. It is found in the special file ## "SN" (sysno) in curdir: recid = _read_in_file("%s/SN" % curdir) try: recid = int(recid) except ValueError: ## No record ID. Cannot continue. err_msg = "Error in Stamp_Replace_Single_File_Approval: " \ "Cannot recover record ID from the submission's working " \ "directory. Stamping cannot be carried out. The " \ "submission ID is [%s]." % cgi.escape(access) register_exception(prefix=err_msg) raise InvenioWebSubmitFunctionError(err_msg) ############ ## Resolution of function parameters: ############ ## The name of the LaTeX template to be used for stamp creation: latex_template = "%s" % ((type(parameters['latex_template']) is str \ and parameters['latex_template']) or "") ## A string containing the variables/values that should be substituted ## in the final (working) LaTeX template: latex_template_vars_string = "%s" % \ ((type(parameters['latex_template_vars']) is str \ and parameters['latex_template_vars']) or "") ## The type of stamp to be applied to the file(s): stamp = "%s" % ((type(parameters['stamp']) is str and \ parameters['stamp'].lower()) or "") ## The layer to use for stamping: try: layer = parameters['layer'] except KeyError: layer = "background" if not layer in ('background', 'foreground'): layer = "background" ## Get the name of the file to be stamped from the file indicated in ## the file_to_be_stamped parameter: try: file_to_stamp_file = parameters['file_to_be_stamped'] except KeyError: file_to_stamp_file = "" else: if file_to_stamp_file is None: file_to_stamp_file = "" ## Get the "basename" for the file to be stamped (it's mandatory that it ## be in curdir): file_to_stamp_file = os.path.basename(file_to_stamp_file).strip() name_file_to_stamp = _read_in_file("%s/%s" % (curdir, file_to_stamp_file)) name_file_to_stamp.replace("\n", "").replace("\r", "") ## ## Get the name to be given to the file after it has been stamped (if there ## is one.) Once more, it will be found in a file in curdir: try: new_file_name_file = parameters['new_file_name'] except KeyError: new_file_name_file = "" else: if new_file_name_file is None: new_file_name_file = "" ## Get the "basename" for the file containing the new file name. (It's ## mandatory that it be in curdir): new_file_name_file = os.path.basename(new_file_name_file).strip() new_file_name = _read_in_file("%s/%s" % (curdir, new_file_name_file)) ############ ## Begin: ############ ## ## If no name for the file to stamp, warning. if name_file_to_stamp == "": wrn_msg = "Warning in Stamp_Replace_Single_File_Approval: " \ "It was not possible to recover a valid name for the " \ "file to be stamped. Stamping could not, therefore, be " \ "carried out. The submission ID is [%s]." \ % access raise InvenioWebSubmitFunctionWarning(wrn_msg) ## ## The file to be stamped is a bibdoc. We will only stamp it (a) if it ## exists; and (b) if it is a PDF file. So, get the path (in the bibdocs ## tree) to the file to be stamped: ## ## First get the object representing the bibdocs belonging to this record: bibrecdocs = BibRecDocs(recid) try: bibdoc_file_to_stamp = bibrecdocs.get_bibdoc("%s" % name_file_to_stamp) - except InvenioWebSubmitFileError: + except InvenioBibDocFileError: ## Couldn't get a bibdoc object for this filename. Probably the file ## that we wanted to stamp wasn't attached to this record. wrn_msg = "Warning in Stamp_Replace_Single_File_Approval: " \ "It was not possible to recover a bibdoc object for the " \ "filename [%s] when trying to stamp the main file. " \ "Stamping could not be carried out. The submission ID is " \ "[%s] and the record ID is [%s]." \ % (name_file_to_stamp, access, recid) register_exception(prefix=wrn_msg) raise InvenioWebSubmitFunctionWarning(wrn_msg) ## Get the BibDocFile object for the PDF version of the bibdoc to be ## stamped: try: bibdocfile_file_to_stamp = bibdoc_file_to_stamp.get_file("pdf") - except InvenioWebSubmitFileError: + except InvenioBibDocFileError: ## This bibdoc doesn't have a physical file with the extension ".pdf" ## (take note of the lower-case extension - the bibdocfile library ## is case-sensitive with respect to filenames. Log that there was ## no "pdf" and check for a file with extension "PDF": wrn_msg = "Warning in Stamp_Replace_Single_File_Approval: " \ "It wasn't possible to recover a PDF BibDocFile object " \ "for the file with the name [%s], using the extension " \ "[pdf] - note the lower case - the bibdocfile library " \ "relies upon the case of an extension. The submission ID " \ "is [%s] and the record ID is [%s]. Going to try " \ "looking for a file with a [PDF] extension before giving " \ "up . . . " \ % (name_file_to_stamp, access, recid) register_exception(prefix=wrn_msg) try: bibdocfile_file_to_stamp = bibdoc_file_to_stamp.get_file("PDF") - except InvenioWebSubmitFileError: + except InvenioBibDocFileError: wrn_msg = "Warning in Stamp_Replace_Single_File_Approval: " \ "It wasn't possible to recover a PDF " \ "BibDocFile object for the file with the name [%s], " \ "using the extension [PDF] - note the upper case. " \ "Had previously tried searching for [pdf] - now " \ "giving up. Stamping could not be carried out. " \ "The submission ID is [%s] and the record ID is [%s]." \ % (name_file_to_stamp, access, recid) register_exception(prefix=wrn_msg) raise InvenioWebSubmitFunctionWarning(wrn_msg) ############ ## Go ahead and prepare the details for the LaTeX stamp template and its ## variables: ############ ## Strip the LaTeX filename into the basename (All templates should be ## in the template repository): latex_template = os.path.basename(latex_template) ## Convert the string of latex template variables into a dictionary ## of search-term/replacement-term pairs: latex_template_vars = get_dictionary_from_string(latex_template_vars_string) ## For each of the latex variables, check in `CURDIR' for a file with that ## name. If found, use it's contents as the template-variable's value. ## If not, just use the raw value string already held by the template ## variable: latex_template_varnames = latex_template_vars.keys() for varname in latex_template_varnames: ## Get this variable's value: varvalue = latex_template_vars[varname].strip() if not ((varvalue.find("date(") == 0 and varvalue[-1] == ")") or \ (varvalue.find("include(") == 0 and varvalue[-1] == ")")) \ and varvalue != "": ## We don't want to interfere with date() or include() directives, ## so we only do this if the variable value didn't contain them: ## ## Is this variable value the name of a file in the current ## submission's working directory, from which a literal value for ## use in the template should be extracted? If yes, it will ## begin with "FILE:". If no, we leave the value exactly as it is. if varvalue.upper().find("FILE:") == 0: ## The value to be used is to be taken from a file. Clean the ## file name and if it's OK, extract that value from the file. ## seekvalue_fname = varvalue[5:].strip() seekvalue_fname = os.path.basename(seekvalue_fname).strip() if seekvalue_fname != "": ## Attempt to extract the value from the file: if os.access("%s/%s" % (curdir, seekvalue_fname), \ os.R_OK|os.F_OK): ## The file exists. Extract its value: try: repl_file_val = \ open("%s/%s" \ % (curdir, seekvalue_fname), "r").readlines() except IOError: ## The file was unreadable. err_msg = "Error in Stamp_Replace_Single_File_" \ "Approval: The function attempted to " \ "read a LaTex template variable " \ "value from the following file in the " \ "current submission's working " \ "directory: [%s]. However, an " \ "unexpected error was encountered " \ "when doing so. Please inform the " \ "administrator." \ % seekvalue_fname register_exception(req=user_info['req']) raise InvenioWebSubmitFunctionError(err_msg) else: final_varval = "" for line in repl_file_val: final_varval += line final_varval = final_varval.rstrip() ## Replace the variable value with that which has ## been read from the file: latex_template_vars[varname] = final_varval else: ## The file didn't actually exist in the current ## submission's working directory. Use an empty ## value: latex_template_vars[varname] = "" else: ## The filename was not valid. err_msg = "Error in Stamp_Replace_Single_File_Approval: " \ "The function was configured to read a LaTeX " \ "template variable from a file with the " \ "following instruction: [%s --> %s]. The " \ "filename, however, was not considered valid. " \ "Please report this to the administrator." \ % (varname, varvalue) raise InvenioWebSubmitFunctionError(err_msg) ## Put the 'fixed' values into the file_stamper_options dictionary: file_stamper_options['latex-template'] = latex_template file_stamper_options['latex-template-var'] = latex_template_vars file_stamper_options['stamp'] = stamp file_stamper_options['layer'] = layer ## Put the input file and output file into the file_stamper_options ## dictionary: file_stamper_options['input-file'] = bibdocfile_file_to_stamp.fullpath file_stamper_options['output-file'] = bibdocfile_file_to_stamp.fullname ## ## Before attempting to stamp the file, log the dictionary of arguments ## that will be passed to websubmit_file_stamper: try: fh_log = open("%s/websubmit_file_stamper-calls-options.log" \ % curdir, "a+") fh_log.write("%s\n" % file_stamper_options) fh_log.flush() fh_log.close() except IOError: ## Unable to log the file stamper options. exception_prefix = "Unable to write websubmit_file_stamper " \ "options to log file " \ "%s/websubmit_file_stamper-calls-options.log" \ % curdir register_exception(prefix=exception_prefix) try: ## Try to stamp the file: (stamped_file_path_only, stamped_file_name) = \ websubmit_file_stamper.stamp_file(file_stamper_options) except InvenioWebSubmitFileStamperError: ## It wasn't possible to stamp this file. ## Register the exception along with an informational message: wrn_msg = "Warning in Stamp_Replace_Single_File_Approval: " \ "There was a problem stamping the file with the name [%s] " \ "and the fullpath [%s]. The file has not been stamped. " \ "The submission ID is [%s] and the record ID is [%s]." \ % (name_file_to_stamp, \ file_stamper_options['input-file'], \ access, \ recid) register_exception(prefix=wrn_msg) raise InvenioWebSubmitFunctionWarning(wrn_msg) else: ## Stamping was successful. The BibDocFile must now be revised with ## the latest (stamped) version of the file: file_comment = "Stamped by WebSubmit: %s" \ % time.strftime("%d/%m/%Y", time.localtime()) try: dummy = \ bibrecdocs.add_new_version("%s/%s" \ % (stamped_file_path_only, \ stamped_file_name), \ name_file_to_stamp, \ comment=file_comment, \ flags=('STAMPED', )) - except InvenioWebSubmitFileError: + except InvenioBibDocFileError: ## Unable to revise the file with the newly stamped version. wrn_msg = "Warning in Stamp_Replace_Single_File_Approval: " \ "After having stamped the file with the name [%s] " \ "and the fullpath [%s], it wasn't possible to revise " \ "that file with the newly stamped version. Stamping " \ "was unsuccessful. The submission ID is [%s] and the " \ "record ID is [%s]." \ % (name_file_to_stamp, \ file_stamper_options['input-file'], \ access, \ recid) register_exception(prefix=wrn_msg) raise InvenioWebSubmitFunctionWarning(wrn_msg) else: ## File revised. If the file should be renamed after stamping, ## do so. if new_file_name != "": try: bibdoc_file_to_stamp.change_name(new_file_name) - except (IOError, InvenioWebSubmitFileError): + except (IOError, InvenioBibDocFileError): ## Unable to change the name wrn_msg = "Warning in Stamp_Replace_Single_File_Approval" \ ": After having stamped and revised the file " \ "with the name [%s] and the fullpath [%s], it " \ "wasn't possible to rename it to [%s]. The " \ "submission ID is [%s] and the record ID is " \ "[%s]." \ % (name_file_to_stamp, \ file_stamper_options['input-file'], \ new_file_name, \ access, \ recid) ## Finished. return "" def get_dictionary_from_string(dict_string): """Given a string version of a "dictionary", split the string into a python dictionary. For example, given the following string: {'TITLE' : 'EX_TITLE', 'AUTHOR' : 'EX_AUTHOR', 'REPORTNUMBER' : 'EX_RN'} A dictionary in the following format will be returned: { 'TITLE' : 'EX_TITLE', 'AUTHOR' : 'EX_AUTHOR', 'REPORTNUMBER' : 'EX_RN', } @param dict_string: (string) - the string version of the dictionary. @return: (dictionary) - the dictionary build from the string. """ ## First, strip off the leading and trailing spaces and braces: dict_string = dict_string.strip(" {}") ## Next, split the string on commas (,) that have not been escaped ## So, the following string: """'hello' : 'world', 'click' : 'here'""" ## will be split into the following list: ## ["'hello' : 'world'", " 'click' : 'here'"] ## ## However, the string """'hello\, world' : '!', 'click' : 'here'""" ## will be split into: ["'hello\, world' : '!'", " 'click' : 'here'"] ## I.e. the comma that was escaped in the string has been kept. ## ## So basically, split on unescaped parameters at first: key_vals = re.split(r'(?This submission has been completed. Please go to the""" \ """ """ \ """main menu to start a new submission.""" \ % { 'doctype' : quote_plus(doctype), 'ln' : ln } return warningMsg(wrnmsg, req) ## retrieve the action and doctype data: ## Concatenate action ID and doctype ID to make the submission ID: subname = "%s%s" % (act, doctype) ## Get the submission storage directory from the DB: submission_dir = get_storage_directory_of_action(act) if submission_dir: indir = submission_dir else: ## Unable to determine the submission-directory: return warningMsg(_("Unable to find the submission directory for the action: %s") % escape(str(act)), req, c, ln) ## get the document type's long-name: doctype_lname = get_longname_of_doctype(doctype) if doctype_lname is not None: ## Got the doctype long-name: replace spaces with HTML chars: docname = doctype_lname.replace(" ", " ") else: ## Unknown document type: return warningMsg(_("Unknown document type"), req, c, ln) ## get the action's long-name: actname = get_longname_of_action(act) if actname is None: ## Unknown action: return warningMsg(_("Unknown action"), req, c, ln) ## Get the number of pages for this submission: num_submission_pages = get_num_pages_of_submission(subname) if num_submission_pages is not None: nbpages = num_submission_pages else: ## Unable to determine the number of pages for this submission: return warningMsg(_("Unable to determine the number of submission pages."), req, c, ln) ## If unknown, get the current page of submission: if startPg != "" and curpage in ("", 0): curpage = startPg ## retrieve the name of the file in which the reference of ## the submitted document will be stored rn_filename = get_parameter_value_for_doctype(doctype, "edsrn") if rn_filename is not None: edsrn = rn_filename else: ## Unknown value for edsrn - set it to an empty string: edsrn = "" ## This defines the path to the directory containing the action data curdir = os.path.join(CFG_WEBSUBMIT_STORAGEDIR, indir, doctype, access) try: assert(curdir == os.path.abspath(curdir)) except AssertionError: register_exception(req=req, prefix='indir="%s", doctype="%s", access="%s"' % (indir, doctype, access)) return warningMsg(_("Invalid parameters"), req, c, ln) ## if this submission comes from another one (fromdir is then set) ## We retrieve the previous submission directory and put it in the proper one if fromdir != "": olddir = os.path.join(CFG_WEBSUBMIT_STORAGEDIR, fromdir, doctype, access) try: assert(olddir == os.path.abspath(olddir)) except AssertionError: register_exception(req=req, prefix='fromdir="%s", doctype="%s", access="%s"' % (fromdir, doctype, access)) return warningMsg(_("Invalid parameters"), req, c, ln) if os.path.exists(olddir): os.rename(olddir, curdir) ## If the submission directory still does not exist, we create it if not os.path.exists(curdir): try: os.makedirs(curdir) except Exception, e: register_exception(req=req, alert_admin=True) return warningMsg(_("Unable to create a directory for this submission. The administrator has been alerted."), req, c, ln) ## Retrieve the previous page, as submitted to curdir (before we ## overwrite it with our curpage as declared from the incoming ## form) try: fp = open(os.path.join(curdir, "curpage")) previous_page_from_disk = fp.read() fp.close() except: previous_page_from_disk = "1" # retrieve the original main menu url and save it in the "mainmenu" file if mainmenu != "": fp = open(os.path.join(curdir, "mainmenu"), "w") fp.write(mainmenu) fp.close() # and if the file containing the URL to the main menu exists # we retrieve it and store it in the $mainmenu variable if os.path.exists(os.path.join(curdir, "mainmenu")): fp = open(os.path.join(curdir, "mainmenu"), "r"); mainmenu = fp.read() fp.close() else: mainmenu = "%s/submit" % (CFG_SITE_URL,) # various authentication related tasks... if uid_email != "guest" and uid_email != "": #First save the username (email address) in the SuE file. This way bibconvert will be able to use it if needed fp = open(os.path.join(curdir, "SuE"), "w") fp.write(uid_email) fp.close() if os.path.exists(os.path.join(curdir, "combo%s" % doctype)): fp = open(os.path.join(curdir, "combo%s" % doctype), "r"); categ = fp.read() fp.close() else: categ = req.form.get('combo%s' % doctype, '*') # is user authorized to perform this action? (auth_code, auth_message) = acc_authorize_action(req, 'submit', \ authorized_if_no_roles=not isGuestUser(uid), \ verbose=0, \ doctype=doctype, \ act=act, \ categ=categ) if not auth_code == 0: return warningMsg("""
    %s
    """ % auth_message, req) ## update the "journal of submission": ## Does the submission already exist in the log? submission_exists = \ submission_exists_in_log(doctype, act, access, uid_email) if submission_exists == 1: ## update the modification-date of this submission in the log: update_submission_modified_date_in_log(doctype, act, access, uid_email) else: ## Submission doesn't exist in log - create it: log_new_pending_submission(doctype, act, access, uid_email) ## Let's write in curdir file under curdir the curdir value ## in case e.g. it is needed in FFT. fp = open(os.path.join(curdir, "curdir"), "w") fp.write(curdir) fp.close() ## Let's write in ln file the current language fp = open(os.path.join(curdir, "ln"), "w") fp.write(ln) fp.close() # Save the form fields entered in the previous submission page # If the form was sent with the GET method form = dict(req.form) value = "" # we parse all the form variables for key, formfields in form.items(): filename = key.replace("[]", "") file_to_open = os.path.join(curdir, filename) try: assert(file_to_open == os.path.abspath(file_to_open)) except AssertionError: register_exception(req=req, prefix='curdir="%s", filename="%s"' % (curdir, filename)) return warningMsg(_("Invalid parameters"), req, c, ln) # Do not write reserved filenames to disk if filename in CFG_RESERVED_SUBMISSION_FILENAMES: # Unless there is really an element with that name on this # page or previous one (either visited, or declared to be # visited), which means that admin authorized it. if not ((str(curpage).isdigit() and \ filename in [submission_field[3] for submission_field in \ get_form_fields_on_submission_page(subname, curpage)]) or \ (str(curpage).isdigit() and int(curpage) > 1 and \ filename in [submission_field[3] for submission_field in \ get_form_fields_on_submission_page(subname, int(curpage) - 1)]) or \ (previous_page_from_disk.isdigit() and \ filename in [submission_field[3] for submission_field in \ get_form_fields_on_submission_page(subname, int(previous_page_from_disk))])): # Still this will filter out reserved field names that # might have been called by functions such as # Create_Modify_Interface function in MBI step, or # dynamic fields in response elements, but that is # unlikely to be a problem. continue # Skip variables containing characters that are not allowed in # WebSubmit elements if not string_is_alphanumeric_including_underscore(filename): continue # the field is an array if isinstance(formfields, types.ListType): fp = open(file_to_open, "w") for formfield in formfields: #stripslashes(value) value = specialchars(formfield) fp.write(value+"\n") fp.close() # the field is a normal string elif isinstance(formfields, types.StringTypes) and formfields != "": value = formfields fp = open(file_to_open, "w") fp.write(specialchars(value)) fp.close() # the field is a file elif hasattr(formfields,"filename") and formfields.filename: dir_to_open = os.path.join(curdir, 'files', key) try: assert(dir_to_open == os.path.abspath(dir_to_open)) assert(dir_to_open.startswith(CFG_WEBSUBMIT_STORAGEDIR)) except AssertionError: register_exception(req=req, prefix='curdir="%s", key="%s"' % (curdir, key)) return warningMsg(_("Invalid parameters"), req, c, ln) if not os.path.exists(dir_to_open): try: os.makedirs(dir_to_open) except: register_exception(req=req, alert_admin=True) return warningMsg(_("Cannot create submission directory. The administrator has been alerted."), req, c, ln) filename = formfields.filename ## Before saving the file to disc, wash the filename (in particular ## washing away UNIX and Windows (e.g. DFS) paths): filename = os.path.basename(filename.split('\\')[-1]) filename = filename.strip() if filename != "": fp = open(os.path.join(dir_to_open, filename), "w") while True: buf = formfields.file.read(10240) if buf: fp.write(buf) else: break fp.close() fp = open(os.path.join(curdir, "lastuploadedfile"), "w") fp.write(filename) fp.close() fp = open(file_to_open, "w") fp.write(filename) fp.close() else: return warningMsg(_("No file uploaded?"), req, c, ln) ## if the found field is the reference of the document, ## save this value in the "journal of submissions": if uid_email != "" and uid_email != "guest": if key == edsrn: update_submission_reference_in_log(doctype, access, uid_email, value) ## create the interface: subname = "%s%s" % (act, doctype) ## Get all of the form fields that appear on this page, ordered by fieldnum: form_fields = get_form_fields_on_submission_page(subname, curpage) full_fields = [] values = [] the_globals = { 'doctype' : doctype, 'action' : action, 'access' : access, 'ln' : ln, 'curdir' : curdir, 'uid' : uid, 'uid_email' : uid_email, 'form' : form, 'act' : act, 'action' : act, ## for backward compatibility 'req' : req, 'user_info' : user_info, 'InvenioWebSubmitFunctionError' : InvenioWebSubmitFunctionError, '__websubmit_in_jail__' : True, '__builtins__' : globals()['__builtins__'] } for field_instance in form_fields: full_field = {} ## Retrieve the field's description: element_descr = get_element_description(field_instance[3]) try: assert(element_descr is not None) except AssertionError: msg = _("Unknown form field found on submission page.") register_exception(req=req, alert_admin=True, prefix=msg) ## The form field doesn't seem to exist - return with error message: return warningMsg(_("Unknown form field found on submission page."), req, c, ln) if element_descr[8] is None: val = "" else: val = element_descr[8] ## we also retrieve and add the javascript code of the checking function, if needed ## Set it to empty string to begin with: full_field['javascript'] = '' if field_instance[7] != '': check_descr = get_element_check_description(field_instance[7]) if check_descr is not None: ## Retrieved the check description: full_field['javascript'] = check_descr full_field['type'] = element_descr[3] full_field['name'] = field_instance[3] full_field['rows'] = element_descr[5] full_field['cols'] = element_descr[6] full_field['val'] = val full_field['size'] = element_descr[4] full_field['maxlength'] = element_descr[7] full_field['htmlcode'] = element_descr[9] full_field['typename'] = field_instance[1] ## TODO: Investigate this, Not used? ## It also seems to refer to pagenum. # The 'R' fields must be executed in the engine's environment, # as the runtime functions access some global and local # variables. if full_field ['type'] == 'R': try: co = compile (full_field ['htmlcode'].replace("\r\n","\n"), "", "exec") the_globals['text'] = '' exec co in the_globals text = the_globals['text'] except: register_exception(req=req, alert_admin=True, prefix="Error in evaluating response element %s with globals %s" % (pprint.pformat(full_field), pprint.pformat(the_globals))) raise else: text = websubmit_templates.tmpl_submit_field (ln = ln, field = full_field) # we now determine the exact type of the created field if full_field['type'] not in [ 'D','R']: field.append(full_field['name']) level.append(field_instance[5]) fullDesc.append(field_instance[4]) txt.append(field_instance[6]) check.append(field_instance[7]) # If the field is not user-defined, we try to determine its type # (select, radio, file upload...) # check whether it is a select field or not if re.search("SELECT", text, re.IGNORECASE) is not None: select.append(1) else: select.append(0) # checks whether it is a radio field or not if re.search(r"TYPE=[\"']?radio", text, re.IGNORECASE) is not None: radio.append(1) else: radio.append(0) # checks whether it is a file upload or not if re.search(r"TYPE=[\"']?file", text, re.IGNORECASE) is not None: upload.append(1) else: upload.append(0) # if the field description contains the "" string, replace # it by the category selected on the document page submission page combofile = "combo%s" % doctype if os.path.exists("%s/%s" % (curdir, combofile)): f = open("%s/%s" % (curdir, combofile), "r") combo = f.read() f.close() else: combo="" text = text.replace("", combo) # if there is a tag in it, replace it by the current year year = time.strftime("%Y"); text = text.replace("", year) # if there is a tag in it, replace it by the current year today = time.strftime("%d/%m/%Y"); text = text.replace("", today) fieldhtml.append(text) else: select.append(0) radio.append(0) upload.append(0) # field.append(value) - initial version, not working with JS, taking a submitted value field.append(field_instance[3]) level.append(field_instance[5]) txt.append(field_instance[6]) fullDesc.append(field_instance[4]) check.append(field_instance[7]) fieldhtml.append(text) full_field['fullDesc'] = field_instance[4] full_field['text'] = text # If a file exists with the name of the field we extract the saved value text = '' if os.path.exists(os.path.join(curdir, full_field['name'])): file = open(os.path.join(curdir, full_field['name']), "r"); text = file.read() text = re.compile("[\n\r]*$").sub("", text) text = re.compile("\n").sub("\\n", text) text = re.compile("\r").sub("", text) file.close() values.append(text) full_fields.append(full_field) returnto = {} if int(curpage) == int(nbpages): subname = "%s%s" % (act, doctype) other_form_fields = \ get_form_fields_not_on_submission_page(subname, curpage) nbFields = 0 message = "" fullcheck_select = [] fullcheck_radio = [] fullcheck_upload = [] fullcheck_field = [] fullcheck_level = [] fullcheck_txt = [] fullcheck_noPage = [] fullcheck_check = [] for field_instance in other_form_fields: if field_instance[5] == "M": ## If this field is mandatory, get its description: element_descr = get_element_description(field_instance[3]) try: assert(element_descr is not None) except AssertionError: msg = _("Unknown form field found on submission page.") register_exception(req=req, alert_admin=True, prefix=msg) ## The form field doesn't seem to exist - return with error message: return warningMsg(_("Unknown form field found on submission page."), req, c, ln) if element_descr[3] in ['D', 'R']: if element_descr[3] == "D": text = element_descr[9] else: text = eval(element_descr[9]) formfields = text.split(">") for formfield in formfields: match = re.match("name=([^ <>]+)", formfield, re.IGNORECASE) if match is not None: names = match.groups for value in names: if value != "": value = re.compile("[\"']+").sub("", value) fullcheck_field.append(value) fullcheck_level.append(field_instance[5]) fullcheck_txt.append(field_instance[6]) fullcheck_noPage.append(field_instance[1]) fullcheck_check.append(field_instance[7]) nbFields = nbFields + 1 else: fullcheck_noPage.append(field_instance[1]) fullcheck_field.append(field_instance[3]) fullcheck_level.append(field_instance[5]) fullcheck_txt.append(field_instance[6]) fullcheck_check.append(field_instance[7]) nbFields = nbFields+1 # tests each mandatory field fld = 0 res = 1 for i in xrange(nbFields): res = 1 if not os.path.exists(os.path.join(curdir, fullcheck_field[i])): res=0 else: file = open(os.path.join(curdir, fullcheck_field[i]), "r") text = file.read() if text == '': res=0 else: if text == "Select:": res=0 if res == 0: fld = i break if not res: returnto = { 'field' : fullcheck_txt[fld], 'page' : fullcheck_noPage[fld], } t += websubmit_templates.tmpl_page_interface( ln = ln, docname = docname, actname = actname, curpage = curpage, nbpages = nbpages, nextPg = nextPg, access = access, nbPg = nbPg, doctype = doctype, act = act, fields = full_fields, javascript = websubmit_templates.tmpl_page_interface_js( ln = ln, upload = upload, field = field, fieldhtml = fieldhtml, txt = txt, check = check, level = level, curdir = curdir, values = values, select = select, radio = radio, curpage = curpage, nbpages = nbpages, returnto = returnto, ), mainmenu = mainmenu, ) t += websubmit_templates.tmpl_page_do_not_leave_submission_js(ln) # start display: req.content_type = "text/html" req.send_http_header() p_navtrail = """%(submit)s > %(docname)s """ % { 'submit' : _("Submit"), 'doctype' : quote_plus(doctype), 'docname' : docname, 'ln' : ln } return page(title= actname, body = t, navtrail = p_navtrail, description = "submit documents", keywords = "submit", uid = uid, language = ln, req = req, navmenuid='submit') def endaction(req, c=CFG_SITE_NAME, ln=CFG_SITE_LANG, doctype="", act="", startPg=1, access="", mainmenu="", fromdir="", nextPg="", nbPg="", curpage=1, step=1, mode="U"): """Having filled-in the WebSubmit form created for metadata by the interface function, the user clicks a button to either "finish the submission" or to "proceed" to the next stage of the submission. At this point, a variable called "step" will be given a value of 1 or above, which means that this function is called by websubmit_webinterface. So, during all non-zero steps of the submission, this function is called. In other words, this function is called during the BACK-END phase of a submission, in which WebSubmit *functions* are being called. The function first ensures that all of the WebSubmit form field values have been saved in the current working submission directory, in text- files with the same name as the field elements have. It then determines the functions to be called for the given step of the submission, and executes them. Following this, if this is the last step of the submission, it logs the submission as "finished" in the journal of submissions. @param req: (apache request object) *** NOTE: Added into this object, is a variable called "form" (req.form). This is added into the object in the index function of websubmit_webinterface. It contains a "mod_python.util.FieldStorage" instance, that contains the form-fields found on the previous submission page. @param c: (string), defaulted to CFG_SITE_NAME. The name of the Invenio installation. @param ln: (string), defaulted to CFG_SITE_LANG. The language in which to display the pages. @param doctype: (string) - the doctype ID of the doctype for which the submission is being made. @param act: (string) - The ID of the action being performed (e.g. submission of bibliographic information; modification of bibliographic information, etc). @param startPg: (integer) - Starting page for the submission? Defaults to 1. @param indir: (string) - the directory used to store all submissions of the given "type" of this submission. For example, if the submission is of the type "modify bibliographic information", this variable would contain "modify". @param access: (string) - the "access" number for the submission (e.g. 1174062451_7010). This number is also used as the name for the current working submission directory. @param mainmenu: (string) - contains the URL (minus the Invenio home stem) for the submission's home-page. (E.g. If this submission is "PICT", the "mainmenu" file would contain "/submit?doctype=PICT". @param fromdir: @param nextPg: @param nbPg: @param curpage: (integer) - the current submission page number. Defaults to 1. @param step: (integer) - the current step of the submission. Defaults to 1. @param mode: """ # load the right message language _ = gettext_set_language(ln) dismode = mode ln = wash_language(ln) sys.stdout = req rn = "" t = "" # get user ID: uid = getUid(req) uid_email = get_email(uid) ## Get the submission storage directory from the DB: submission_dir = get_storage_directory_of_action(act) if submission_dir: indir = submission_dir else: ## Unable to determine the submission-directory: return warningMsg(_("Unable to find the submission directory for the action: %s") % escape(str(act)), req, c, ln) curdir = os.path.join(CFG_WEBSUBMIT_STORAGEDIR, indir, doctype, access) if os.path.exists(os.path.join(curdir, "combo%s" % doctype)): fp = open(os.path.join(curdir, "combo%s" % doctype), "r"); categ = fp.read() fp.close() else: categ = req.form.get('combo%s' % doctype, '*') # is user authorized to perform this action? (auth_code, auth_message) = acc_authorize_action(req, 'submit', \ authorized_if_no_roles=not isGuestUser(uid), \ verbose=0, \ doctype=doctype, \ act=act, \ categ=categ) if not auth_code == 0: return warningMsg("""
    %s
    """ % auth_message, req) # Preliminary tasks ## check we have minimum fields if not doctype or not act or not access: ## We don't have all the necessary information to go ahead ## with this submission: return warningMsg(_("Not enough information to go ahead with the submission."), req, c, ln) if doctype and act: ## Let's clean the input details = get_details_of_submission(doctype, act) if not details: return warningMsg(_("Invalid doctype and act parameters"), req, c, ln) doctype = details[0] act = details[1] try: assert(not access or re.match('\d+_\d+', access)) except AssertionError: register_exception(req=req, prefix='doctype="%s", access="%s"' % (doctype, access)) return warningMsg(_("Invalid parameters"), req, c, ln) ## Before continuing to process the submitted data, verify that ## this submission has not already been completed: if submission_is_finished(doctype, act, access, uid_email): ## This submission has already been completed. ## This situation can arise when, having completed a submission, ## the user uses the browser's back-button to go back to the form ## stage of the submission and then tries to submit once more. ## This is unsafe and should not be allowed. Instead of re-processing ## the submitted data, display an error message to the user: wrnmsg = """This submission has been completed. Please go to the""" \ """ """ \ """main menu to start a new submission.""" \ % { 'doctype' : quote_plus(doctype), 'ln' : ln } return warningMsg(wrnmsg, req) ## Get the number of pages for this submission: subname = "%s%s" % (act, doctype) ## retrieve the action and doctype data ## Get the submission storage directory from the DB: submission_dir = get_storage_directory_of_action(act) if submission_dir: indir = submission_dir else: ## Unable to determine the submission-directory: return warningMsg(_("Unable to find the submission directory for the action: %s") % escape(str(act)), req, c, ln) # The following words are reserved and should not be used as field names reserved_words = ["stop", "file", "nextPg", "startPg", "access", "curpage", "nbPg", "act", \ "indir", "doctype", "mode", "step", "deleted", "file_path", "userfile_name"] # This defines the path to the directory containing the action data curdir = os.path.join(CFG_WEBSUBMIT_STORAGEDIR, indir, doctype, access) try: assert(curdir == os.path.abspath(curdir)) except AssertionError: register_exception(req=req, prefix='indir="%s", doctype=%s, access=%s' % (indir, doctype, access)) return warningMsg(_("Invalid parameters"), req, c, ln) ## If the submission directory still does not exist, we create it if not os.path.exists(curdir): try: os.makedirs(curdir) except Exception, e: register_exception(req=req, alert_admin=True) return warningMsg(_("Unable to create a directory for this submission. The administrator has been alerted."), req, c, ln) # retrieve the original main menu url ans save it in the "mainmenu" file if mainmenu != "": fp = open(os.path.join(curdir, "mainmenu"), "w") fp.write(mainmenu) fp.close() # and if the file containing the URL to the main menu exists # we retrieve it and store it in the $mainmenu variable if os.path.exists(os.path.join(curdir, "mainmenu")): fp = open(os.path.join(curdir, "mainmenu"), "r"); mainmenu = fp.read() fp.close() else: mainmenu = "%s/submit" % (CFG_SITE_URL,) num_submission_pages = get_num_pages_of_submission(subname) if num_submission_pages is not None: nbpages = num_submission_pages else: ## Unable to determine the number of pages for this submission: return warningMsg(_("Unable to determine the number of submission pages."), \ req, CFG_SITE_NAME, ln) ## Retrieve the previous page, as submitted to curdir (before we ## overwrite it with our curpage as declared from the incoming ## form) try: fp = open(os.path.join(curdir, "curpage")) previous_page_from_disk = fp.read() fp.close() except: previous_page_from_disk = str(num_submission_pages) ## retrieve the name of the file in which the reference of ## the submitted document will be stored rn_filename = get_parameter_value_for_doctype(doctype, "edsrn") if rn_filename is not None: edsrn = rn_filename else: ## Unknown value for edsrn - set it to an empty string: edsrn = "" ## Determine whether the action is finished ## (ie there are no other steps after the current one): finished = function_step_is_last(doctype, act, step) ## Let's write in curdir file under curdir the curdir value ## in case e.g. it is needed in FFT. fp = open(os.path.join(curdir, "curdir"), "w") fp.write(curdir) fp.close() ## Let's write in ln file the current language fp = open(os.path.join(curdir, "ln"), "w") fp.write(ln) fp.close() # Save the form fields entered in the previous submission page # If the form was sent with the GET method form = req.form value = "" # we parse all the form variables for key in form.keys(): formfields = form[key] filename = key.replace("[]", "") file_to_open = os.path.join(curdir, filename) try: assert(file_to_open == os.path.abspath(file_to_open)) assert(file_to_open.startswith(CFG_WEBSUBMIT_STORAGEDIR)) except AssertionError: register_exception(req=req, prefix='curdir="%s", filename="%s"' % (curdir, filename)) return warningMsg(_("Invalid parameters"), req, c, ln) # Do not write reserved filenames to disk if filename in CFG_RESERVED_SUBMISSION_FILENAMES: # Unless there is really an element with that name on this # page, or on the previously visited one, which means that # admin authorized it. Note that in endaction() curpage is # equivalent to the "previous" page value if not ((previous_page_from_disk.isdigit() and \ filename in [submission_field[3] for submission_field in \ get_form_fields_on_submission_page(subname, int(previous_page_from_disk))]) or \ (str(curpage).isdigit() and int(curpage) > 1 and \ filename in [submission_field[3] for submission_field in \ get_form_fields_on_submission_page(subname, int(curpage) - 1)])): # might have been called by functions such as # Create_Modify_Interface function in MBI step, or # dynamic fields in response elements, but that is # unlikely to be a problem. continue # Skip variables containing characters that are not allowed in # WebSubmit elements if not string_is_alphanumeric_including_underscore(filename): continue # the field is an array if isinstance(formfields,types.ListType): fp = open(file_to_open, "w") for formfield in formfields: #stripslashes(value) value = specialchars(formfield) fp.write(value+"\n") fp.close() # the field is a normal string elif isinstance(formfields, types.StringTypes) and formfields != "": value = formfields fp = open(file_to_open, "w") fp.write(specialchars(value)) fp.close() # the field is a file elif hasattr(formfields, "filename") and formfields.filename: dir_to_open = os.path.join(curdir, 'files', key) try: assert(dir_to_open == os.path.abspath(dir_to_open)) assert(dir_to_open.startswith(CFG_WEBSUBMIT_STORAGEDIR)) except AssertionError: register_exception(req=req, prefix='curdir="%s", key="%s"' % (curdir, key)) return warningMsg(_("Invalid parameters"), req, c, ln) if not os.path.exists(dir_to_open): try: os.makedirs(dir_to_open) except: register_exception(req=req, alert_admin=True) return warningMsg(_("Cannot create submission directory. The administrator has been alerted."), req, c, ln) filename = formfields.filename ## Before saving the file to disc, wash the filename (in particular ## washing away UNIX and Windows (e.g. DFS) paths): filename = os.path.basename(filename.split('\\')[-1]) filename = filename.strip() if filename != "": fp = open(os.path.join(dir_to_open, filename), "w") while True: buf = formfields.file.read(10240) if buf: fp.write(buf) else: break fp.close() fp = open(os.path.join(curdir, "lastuploadedfile"), "w") fp.write(filename) fp.close() fp = open(file_to_open, "w") fp.write(filename) fp.close() else: return warningMsg(_("No file uploaded?"), req, c, ln) ## if the found field is the reference of the document ## we save this value in the "journal of submissions" if uid_email != "" and uid_email != "guest": if key == edsrn: update_submission_reference_in_log(doctype, access, uid_email, value) ## get the document type's long-name: doctype_lname = get_longname_of_doctype(doctype) if doctype_lname is not None: ## Got the doctype long-name: replace spaces with HTML chars: docname = doctype_lname.replace(" ", " ") else: ## Unknown document type: return warningMsg(_("Unknown document type"), req, c, ln) ## get the action's long-name: actname = get_longname_of_action(act) if actname is None: ## Unknown action: return warningMsg(_("Unknown action"), req, c, ln) ## Determine whether the action is finished ## (ie there are no other steps after the current one): last_step = function_step_is_last(doctype, act, step) next_action = '' ## The next action to be proposed to the user # Prints the action details, returning the mandatory score action_score = action_details(doctype, act) current_level = get_level(doctype, act) # Calls all the function's actions function_content = '' try: ## Handle the execution of the functions for this ## submission/step: start_time = time.time() (function_content, last_step, action_score, rn) = \ print_function_calls(req=req, doctype=doctype, action=act, step=step, form=form, start_time=start_time, access=access, curdir=curdir, dismode=mode, rn=rn, last_step=last_step, action_score=action_score, ln=ln) except InvenioWebSubmitFunctionError, e: register_exception(req=req, alert_admin=True, prefix='doctype="%s", action="%s", step="%s", form="%s", start_time="%s"' % (doctype, act, step, form, start_time)) ## There was a serious function-error. Execution ends. if CFG_DEVEL_SITE: raise else: return warningMsg(_("A serious function-error has been encountered. Adminstrators have been alerted.
    Please not that this might be due to wrong characters inserted into the form (e.g. by copy and pasting some text from a PDF file)."), req, c, ln) except InvenioWebSubmitFunctionStop, e: ## For one reason or another, one of the functions has determined that ## the data-processing phase (i.e. the functions execution) should be ## halted and the user should be returned to the form interface once ## more. (NOTE: Redirecting the user to the Web-form interface is ## currently done using JavaScript. The "InvenioWebSubmitFunctionStop" ## exception contains a "value" string, which is effectively JavaScript ## - probably an alert box and a form that is submitted). **THIS WILL ## CHANGE IN THE FUTURE WHEN JavaScript IS REMOVED!** if e.value is not None: function_content = e.value else: function_content = e else: ## No function exceptions (InvenioWebSubmitFunctionStop, ## InvenioWebSubmitFunctionError) were raised by the functions. Propose ## the next action (if applicable), and log the submission as finished: ## If the action was mandatory we propose the next ## mandatory action (if any) if action_score != -1 and last_step == 1: next_action = Propose_Next_Action(doctype, \ action_score, \ access, \ current_level, \ indir) ## If we are in the last step of an action, we can update ## the "journal of submissions" if last_step == 1: if uid_email != "" and uid_email != "guest": ## update the "journal of submission": ## Does the submission already exist in the log? submission_exists = \ submission_exists_in_log(doctype, act, access, uid_email) if submission_exists == 1: ## update the rn and status to finished for this submission ## in the log: update_submission_reference_and_status_in_log(doctype, \ act, \ access, \ uid_email, \ rn, \ "finished") else: ## Submission doesn't exist in log - create it: log_new_completed_submission(doctype, \ act, \ access, \ uid_email, \ rn) ## Having executed the functions, create the page that will be displayed ## to the user: t = websubmit_templates.tmpl_page_endaction( ln = ln, # these fields are necessary for the navigation nextPg = nextPg, startPg = startPg, access = access, curpage = curpage, nbPg = nbPg, nbpages = nbpages, doctype = doctype, act = act, docname = docname, actname = actname, mainmenu = mainmenu, finished = finished, function_content = function_content, next_action = next_action, ) if finished: # register event in webstat try: register_customevent("websubmissions", [get_longname_of_doctype(doctype)]) except: register_exception(suffix="Do the webstat tables exists? Try with 'webstatadmin --load-config'") else: t += websubmit_templates.tmpl_page_do_not_leave_submission_js(ln) # start display: req.content_type = "text/html" req.send_http_header() p_navtrail = '' + _("Submit") +\ """ > %(docname)s""" % { 'doctype' : quote_plus(doctype), 'docname' : docname, 'ln' : ln, } return page(title= actname, body = t, navtrail = p_navtrail, description="submit documents", keywords="submit", uid = uid, language = ln, req = req, navmenuid='submit') def home(req, catalogues_text, c=CFG_SITE_NAME, ln=CFG_SITE_LANG): """This function generates the WebSubmit "home page". Basically, this page contains a list of submission-collections in WebSubmit, and gives links to the various document-type submissions. Document-types only appear on this page when they have been connected to a submission-collection in WebSubmit. @param req: (apache request object) @param catalogues_text (string): the computed catalogues tree @param c: (string) - defaults to CFG_SITE_NAME @param ln: (string) - The Invenio interface language of choice. Defaults to CFG_SITE_LANG (the default language of the installation). @return: (string) - the Web page to be displayed. """ ln = wash_language(ln) # get user ID: try: uid = getUid(req) except Error, e: return errorMsg(e, req, c, ln) # load the right message language _ = gettext_set_language(ln) finaltext = websubmit_templates.tmpl_submit_home_page( ln = ln, catalogues = catalogues_text ) return page(title=_("Submit"), body=finaltext, navtrail=[], description="submit documents", keywords="submit", uid=uid, language=ln, req=req, navmenuid='submit' ) def makeCataloguesTable(req, ln=CFG_SITE_LANG): """Build the 'catalogues' (submission-collections) tree for the WebSubmit home-page. This tree contains the links to the various document types in WebSubmit. @param req: (dict) - the user request object in order to decide whether to display a submission. @param ln: (string) - the language of the interface. (defaults to 'CFG_SITE_LANG'). @return: (string, bool, bool) - the submission-collections tree. True if there is at least one submission authorized for the user True if there is at least one submission """ def is_at_least_one_submission_authorized(cats): for cat in cats: if cat['docs']: return True if is_at_least_one_submission_authorized(cat['sons']): return True return False text = "" catalogues = [] ## Get the submission-collections attached at the top level ## of the submission-collection tree: top_level_collctns = get_collection_children_of_submission_collection(0) if len(top_level_collctns) != 0: ## There are submission-collections attatched to the top level. ## retrieve their details for displaying: for child_collctn in top_level_collctns: catalogues.append(getCatalogueBranch(child_collctn[0], 1, req)) text = websubmit_templates.tmpl_submit_home_catalogs( ln=ln, catalogs=catalogues) submissions_exist = True at_least_one_submission_authorized = is_at_least_one_submission_authorized(catalogues) else: text = websubmit_templates.tmpl_submit_home_catalog_no_content(ln=ln) submissions_exist = False at_least_one_submission_authorized = False return text, at_least_one_submission_authorized, submissions_exist def getCatalogueBranch(id_father, level, req): """Build up a given branch of the submission-collection tree. I.e. given a parent submission-collection ID, build up the tree below it. This tree will include doctype-children, as well as other submission- collections and their children. Finally, return the branch as a dictionary. @param id_father: (integer) - the ID of the submission-collection from which to begin building the branch. @param level: (integer) - the level of the current submission- collection branch. @param req: (dict) - the user request object in order to decide whether to display a submission. @return: (dictionary) - the branch and its sub-branches. """ elem = {} ## The dictionary to contain this branch of the tree. ## First, get the submission-collection-details: collctn_name = get_submission_collection_name(id_father) if collctn_name is not None: ## Got the submission-collection's name: elem['name'] = collctn_name else: ## The submission-collection is unknown to the DB ## set its name as empty: elem['name'] = "" elem['id'] = id_father elem['level'] = level ## Now get details of the doctype-children of this ## submission-collection: elem['docs'] = [] ## List to hold the doctype-children ## of the submission-collection doctype_children = \ get_doctype_children_of_submission_collection(id_father) user_info = collect_user_info(req) for child_doctype in doctype_children: ## To get access to a submission pipeline for a logged in user, ## it is decided by any authorization. If none are defined for the action ## then a logged in user will get access. ## If user is not logged in, a specific rule to allow the action is needed if acc_authorize_action(req, 'submit', \ authorized_if_no_roles=not isGuestUser(user_info['uid']), \ doctype=child_doctype[0])[0] == 0: elem['docs'].append(getDoctypeBranch(child_doctype[0])) ## Now, get the collection-children of this submission-collection: elem['sons'] = [] collctn_children = \ get_collection_children_of_submission_collection(id_father) for child_collctn in collctn_children: elem['sons'].append(getCatalogueBranch(child_collctn[0], level + 1, req)) ## Now return this branch of the built-up 'collection-tree': return elem def getDoctypeBranch(doctype): """Create a document-type 'leaf-node' for the submission-collections tree. Basically, this leaf is a dictionary containing the name and ID of the document-type submission to which it links. @param doctype: (string) - the ID of the document type. @return: (dictionary) - the document-type 'leaf node'. Contains the following values: + id: (string) - the document-type ID. + name: (string) - the (long) name of the document-type. """ ldocname = get_longname_of_doctype(doctype) if ldocname is None: ldocname = "Unknown Document Type" return { 'id' : doctype, 'name' : ldocname, } def displayCatalogueBranch(id_father, level, catalogues): text = "" collctn_name = get_submission_collection_name(id_father) if collctn_name is None: ## If this submission-collection wasn't known in the DB, ## give it the name "Unknown Submission-Collection" to ## avoid errors: collctn_name = "Unknown Submission-Collection" ## Now, create the display for this submission-collection: if level == 1: text = "
  • %s\n" \ % collctn_name else: ## TODO: These are the same (and the if is ugly.) Why? if level == 2: text = "
  • %s\n" % collctn_name else: if level > 2: text = "
  • %s\n" % collctn_name ## Now display the children document-types that are attached ## to this submission-collection: ## First, get the children: doctype_children = get_doctype_children_of_submission_collection(id_father) collctn_children = get_collection_children_of_submission_collection(id_father) if len(doctype_children) > 0 or len(collctn_children) > 0: ## There is something to display, so open a list: text = text + "
      \n" ## First, add the doctype leaves of this branch: for child_doctype in doctype_children: ## Add the doctype 'leaf-node': text = text + displayDoctypeBranch(child_doctype[0], catalogues) ## Now add the submission-collection sub-branches: for child_collctn in collctn_children: catalogues.append(child_collctn[0]) text = text + displayCatalogueBranch(child_collctn[0], level+1, catalogues) ## Finally, close up the list if there were nodes to display ## at this branch: if len(doctype_children) > 0 or len(collctn_children) > 0: text = text + "
    \n" return text def displayDoctypeBranch(doctype, catalogues): text = "" ldocname = get_longname_of_doctype(doctype) if ldocname is None: ldocname = "Unknown Document Type" text = "
  • %s\n" \ % (doctype, doctype, doctype, ldocname) return text def action(req, c=CFG_SITE_NAME, ln=CFG_SITE_LANG, doctype=""): # load the right message language _ = gettext_set_language(ln) nbCateg = 0 snameCateg = [] lnameCateg = [] actionShortDesc = [] indir = [] actionbutton = [] statustext = [] t = "" ln = wash_language(ln) # get user ID: try: uid = getUid(req) except Error, e: return errorMsg(e, req, c, ln) #parses database to get all data ## first, get the list of categories doctype_categs = get_categories_of_doctype(doctype) for doctype_categ in doctype_categs: if not acc_authorize_action(req, 'submit', \ authorized_if_no_roles=not isGuestUser(uid), \ verbose=0, \ doctype=doctype, \ categ=doctype_categ[0])[0] == 0: # This category is restricted for this user, move on to the next categories. continue nbCateg = nbCateg+1 snameCateg.append(doctype_categ[0]) lnameCateg.append(doctype_categ[1]) ## Now get the details of the document type: doctype_details = get_doctype_details(doctype) if doctype_details is None: ## Doctype doesn't exist - raise error: return warningMsg(_("Unable to find document type: %s") % escape(str(doctype)), req, c, ln) else: docFullDesc = doctype_details[0] # Also update the doctype as returned by the database, since # it might have a differnent case (eg. DemOJrN->demoJRN) doctype = docShortDesc = doctype_details[1] description = doctype_details[4] ## Get the details of the actions supported by this document-type: doctype_actions = get_actions_on_submission_page_for_doctype(doctype) for doctype_action in doctype_actions: if not acc_authorize_action(req, 'submit', \ authorized_if_no_roles=not isGuestUser(uid), \ doctype=doctype, \ act=doctype_action[0])[0] == 0: # This action is not authorized for this user, move on to the next actions. continue ## Get the details of this action: action_details = get_action_details(doctype_action[0]) if action_details is not None: actionShortDesc.append(doctype_action[0]) indir.append(action_details[1]) actionbutton.append(action_details[4]) statustext.append(action_details[5]) if not snameCateg and not actionShortDesc: return page_not_authorized(req, "../submit", uid=uid, navmenuid='submit') ## Send the gathered information to the template so that the doctype's ## home-page can be displayed: t = websubmit_templates.tmpl_action_page( ln=ln, uid=uid, pid = os.getpid(), now = time.time(), doctype = doctype, description = description, docfulldesc = docFullDesc, snameCateg = snameCateg, lnameCateg = lnameCateg, actionShortDesc = actionShortDesc, indir = indir, # actionbutton = actionbutton, statustext = statustext, ) p_navtrail = """%(submit)s""" % {'submit' : _("Submit"), 'ln' : ln} return page(title = docFullDesc, body=t, navtrail=p_navtrail, description="submit documents", keywords="submit", uid=uid, language=ln, req=req, navmenuid='submit' ) def Request_Print(m, txt): """The argumemts to this function are the display mode (m) and the text to be displayed (txt). """ return txt def Evaluate_Parameter (field, doctype): # Returns the literal value of the parameter. Assumes that the value is # uniquely determined by the doctype, i.e. doctype is the primary key in # the table # If the table name is not null, evaluate the parameter ## TODO: The above comment looks like nonesense? This ## function only seems to get the values of parameters ## from the db... ## Get the value for the parameter: param_val = get_parameter_value_for_doctype(doctype, field) if param_val is None: ## Couldn't find a value for this parameter for this doctype. ## Instead, try with the default doctype (DEF): param_val = get_parameter_value_for_doctype("DEF", field) if param_val is None: ## There was no value for the parameter for the default doctype. ## Nothing can be done about it - return an empty string: return "" else: ## There was some kind of value for the parameter; return it: return param_val def Get_Parameters (function, doctype): """For a given function of a given document type, a dictionary of the parameter names and values are returned. @param function: (string) - the name of the function for which the parameters are to be retrieved. @param doctype: (string) - the ID of the document type. @return: (dictionary) - of the parameters of the function. Keyed by the parameter name, values are of course the parameter values. """ parray = {} ## Get the names of the parameters expected by this function: func_params = get_parameters_of_function(function) for func_param in func_params: ## For each of the parameters, get its value for this document- ## type and add it into the dictionary of parameters: parameter = func_param[0] parray[parameter] = Evaluate_Parameter (parameter, doctype) return parray def get_level(doctype, action): """Get the level of a given submission. If unknown, return 0 as the level. @param doctype: (string) - the ID of the document type. @param action: (string) - the ID of the action. @return: (integer) - the level of the submission; 0 otherwise. """ subm_details = get_details_of_submission(doctype, action) if subm_details is not None: ## Return the level of this action subm_level = subm_details[9] try: int(subm_level) except ValueError: return 0 else: return subm_level else: return 0 def action_details (doctype, action): # Prints whether the action is mandatory or optional. The score of the # action is returned (-1 if the action was optional) subm_details = get_details_of_submission(doctype, action) if subm_details is not None: if subm_details[9] != "0": ## This action is mandatory; return the score: return subm_details[10] else: return -1 else: return -1 def print_function_calls(req, doctype, action, step, form, start_time, access, curdir, dismode, rn, last_step, action_score, ln=CFG_SITE_LANG): """ Calls the functions required by an 'action' action on a 'doctype' document In supervisor mode, a table of the function calls is produced @return: (function_output_string, last_step, action_score, rn) """ user_info = collect_user_info(req) # load the right message language _ = gettext_set_language(ln) t = "" ## Here follows the global protect environment. the_globals = { 'doctype' : doctype, 'action' : action, 'act' : action, ## for backward compatibility 'step' : step, 'access' : access, 'ln' : ln, 'curdir' : curdir, 'uid' : user_info['uid'], 'uid_email' : user_info['email'], 'rn' : rn, 'last_step' : last_step, 'action_score' : action_score, '__websubmit_in_jail__' : True, 'form' : form, 'user_info' : user_info, '__builtins__' : globals()['__builtins__'], 'Request_Print': Request_Print } ## Get the list of functions to be called funcs_to_call = get_functions_for_submission_step(doctype, action, step) ## If no functions are found at this step for this doctype, ## get the functions for the DEF(ault) doctype: if len(funcs_to_call) == 0: funcs_to_call = get_functions_for_submission_step("DEF", action, step) if len(funcs_to_call) > 0: # while there are functions left... functions = [] for function in funcs_to_call: try: function_name = function[0] function_score = function[1] currfunction = { 'name' : function_name, 'score' : function_score, 'error' : 0, 'text' : '', } if os.path.exists("%s/invenio/websubmit_functions/%s.py" % (CFG_PYLIBDIR, function_name)): # import the function itself #function = getattr(invenio.websubmit_functions, function_name) execfile("%s/invenio/websubmit_functions/%s.py" % (CFG_PYLIBDIR, function_name), the_globals) if function_name not in the_globals: currfunction['error'] = 1 else: the_globals['function'] = the_globals[function_name] # Evaluate the parameters, and place them in an array the_globals['parameters'] = Get_Parameters(function_name, doctype) # Call function: log_function(curdir, "Start %s" % function_name, start_time) try: try: ## Attempt to call the function with 4 arguments: ## ("parameters", "curdir" and "form" as usual), ## and "user_info" - the dictionary of user ## information: ## ## Note: The function should always be called with ## these keyword arguments because the "TypeError" ## except clause checks for a specific mention of ## the 'user_info' keyword argument when a legacy ## function (one that accepts only 'parameters', ## 'curdir' and 'form') has been called and if ## the error string doesn't contain this, ## the TypeError will be considered as a something ## that was incorrectly handled in the function and ## will be propagated as an ## InvenioWebSubmitFunctionError instead of the ## function being called again with the legacy 3 ## arguments. func_returnval = eval("function(parameters=parameters, curdir=curdir, form=form, user_info=user_info)", the_globals) except TypeError, err: ## If the error contains the string "got an ## unexpected keyword argument", it means that the ## function doesn't accept the "user_info" ## argument. Test for this: if "got an unexpected keyword argument 'user_info'" in \ str(err).lower(): ## As expected, the function doesn't accept ## the user_info keyword argument. Call it ## again with the legacy 3 arguments ## (parameters, curdir, form): func_returnval = eval("function(parameters=parameters, curdir=curdir, form=form)", the_globals) else: ## An unexpected "TypeError" was caught. ## It looks as though the function itself didn't ## handle something correctly. ## Convert this error into an ## InvenioWebSubmitFunctionError and raise it: msg = "Unhandled TypeError caught when " \ "calling [%s] WebSubmit function: " \ "[%s]" % (function_name, str(err)) raise InvenioWebSubmitFunctionError(msg) except InvenioWebSubmitFunctionWarning, err: ## There was an unexpected behaviour during the ## execution. Log the message into function's log ## and go to next function log_function(curdir, "***Warning*** from %s: %s" \ % (function_name, str(err)), start_time) ## Reset "func_returnval" to None: func_returnval = None register_exception(req=req, alert_admin=True, prefix="Warning in executing function %s with globals %s" % (pprint.pformat(currfunction), pprint.pformat(the_globals))) log_function(curdir, "End %s" % function_name, start_time) if func_returnval is not None: ## Append the returned value as a string: currfunction['text'] = str(func_returnval) else: ## The function the NoneType. Don't keep that value as ## the currfunction->text. Replace it with the empty ## string. currfunction['text'] = "" else: currfunction['error'] = 1 functions.append(currfunction) except InvenioWebSubmitFunctionStop, err: ## The submission asked to stop execution. This is ## ok. Do not alert admin, and raise exception further log_function(curdir, "***Stop*** from %s: %s" \ % (function_name, str(err)), start_time) raise except: register_exception(req=req, alert_admin=True, prefix="Error in executing function %s with globals %s" % (pprint.pformat(currfunction), pprint.pformat(the_globals))) raise t = websubmit_templates.tmpl_function_output( ln = ln, display_on = (dismode == 'S'), action = action, doctype = doctype, step = step, functions = functions, ) else : if dismode == 'S': t = "

    " + _("The chosen action is not supported by the document type.") + "" return (t, the_globals['last_step'], the_globals['action_score'], the_globals['rn']) def Propose_Next_Action (doctype, action_score, access, currentlevel, indir, ln=CFG_SITE_LANG): t = "" next_submissions = \ get_submissions_at_level_X_with_score_above_N(doctype, currentlevel, action_score) if len(next_submissions) > 0: actions = [] first_score = next_submissions[0][10] for action in next_submissions: if action[10] == first_score: ## Get the submission directory of this action: nextdir = get_storage_directory_of_action(action[1]) if nextdir is None: nextdir = "" curraction = { 'page' : action[11], 'action' : action[1], 'doctype' : doctype, 'nextdir' : nextdir, 'access' : access, 'indir' : indir, 'name' : action[12], } actions.append(curraction) t = websubmit_templates.tmpl_next_action( ln = ln, actions = actions, ) return t def specialchars(text): text = string.replace(text, "“", "\042"); text = string.replace(text, "”", "\042"); text = string.replace(text, "’", "\047"); text = string.replace(text, "—", "\055"); text = string.replace(text, "…", "\056\056\056"); return text def log_function(curdir, message, start_time, filename="function_log"): """Write into file the message and the difference of time between starttime and current time @param curdir:(string) path to the destination dir @param message: (string) message to write into the file @param starttime: (float) time to compute from @param filname: (string) name of log file """ time_lap = "%.3f" % (time.time() - start_time) if os.access(curdir, os.F_OK|os.W_OK): fd = open("%s/%s" % (curdir, filename), "a+") fd.write("""%s --- %s\n""" % (message, time_lap)) fd.close() ## FIXME: Duplicated def errorMsg(title, req, c=CFG_SITE_NAME, ln=CFG_SITE_LANG): # load the right message language _ = gettext_set_language(ln) return page(title = _("Error"), body = create_error_box(req, title=title, verbose=0, ln=ln), description="%s - Internal Error" % c, keywords="%s, Internal Error" % c, uid = getUid(req), language=ln, req=req, navmenuid='submit') def warningMsg(title, req, c=CFG_SITE_NAME, ln=CFG_SITE_LANG): # load the right message language _ = gettext_set_language(ln) return page(title = _("Warning"), body = title, description="%s - Warning" % c, keywords="%s, Warning" % c, uid = getUid(req), language=ln, req=req, navmenuid='submit') diff --git a/modules/websubmit/lib/websubmit_file_converter.py b/modules/websubmit/lib/websubmit_file_converter.py index 97627c4ce..e57752856 100644 --- a/modules/websubmit/lib/websubmit_file_converter.py +++ b/modules/websubmit/lib/websubmit_file_converter.py @@ -1,1462 +1,1462 @@ # -*- coding: utf-8 -*- ## This file is part of Invenio. ## Copyright (C) 2009, 2010, 2011, 2012 CERN. ## ## Invenio is free software; you can redistribute it and/or ## modify it under the terms of the GNU General Public License as ## published by the Free Software Foundation; either version 2 of the ## License, or (at your option) any later version. ## ## Invenio is distributed in the hope that it will be useful, but ## WITHOUT ANY WARRANTY; without even the implied warranty of ## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ## General Public License for more details. ## ## You should have received a copy of the GNU General Public License ## along with Invenio; if not, write to the Free Software Foundation, Inc., ## 59 Temple Place, Suite 330, Boston, MA 02111-1307, USA. """ This module implement fulltext conversion between many different file formats. """ import os import stat import re import sys import shutil import tempfile import HTMLParser import time import subprocess import atexit import signal import threading from logging import DEBUG, getLogger from htmlentitydefs import entitydefs from optparse import OptionParser try: from invenio.hocrlib import create_pdf, extract_hocr, CFG_PPM_RESOLUTION from pyPdf import PdfFileReader, PdfFileWriter CFG_CAN_DO_OCR = True except ImportError: CFG_CAN_DO_OCR = False from invenio.textutils import wrap_text_in_a_box from invenio.shellutils import run_process_with_timeout, run_shell_command from invenio.config import CFG_TMPDIR, CFG_ETCDIR, CFG_PYLIBDIR, \ CFG_PATH_ANY2DJVU, \ CFG_PATH_PDFINFO, \ CFG_PATH_GS, \ CFG_PATH_PDFOPT, \ CFG_PATH_PDFTOPS, \ CFG_PATH_GZIP, \ CFG_PATH_GUNZIP, \ CFG_PATH_PDFTOTEXT, \ CFG_PATH_PDFTOPPM, \ CFG_PATH_OCROSCRIPT, \ CFG_PATH_DJVUPS, \ CFG_PATH_DJVUTXT, \ CFG_PATH_OPENOFFICE_PYTHON, \ CFG_PATH_PSTOTEXT, \ CFG_PATH_TIFF2PDF, \ CFG_PATH_PS2PDF, \ CFG_OPENOFFICE_SERVER_HOST, \ CFG_OPENOFFICE_SERVER_PORT, \ CFG_OPENOFFICE_USER, \ CFG_PATH_CONVERT, \ CFG_PATH_PAMFILE, \ CFG_BINDIR, \ CFG_LOGDIR, \ CFG_BIBSCHED_PROCESS_USER, \ CFG_BIBDOCFILE_BEST_FORMATS_TO_EXTRACT_TEXT_FROM, \ - CFG_WEBSUBMIT_DESIRED_CONVERSIONS + CFG_BIBDOCFILE_DESIRED_CONVERSIONS from invenio.errorlib import register_exception def get_file_converter_logger(): return getLogger("InvenioWebSubmitFileConverterLogger") CFG_TWO2THREE_LANG_CODES = { 'en': 'eng', 'nl': 'nld', 'es': 'spa', 'de': 'deu', 'it': 'ita', 'fr': 'fra', } CFG_OPENOFFICE_TMPDIR = os.path.join(CFG_TMPDIR, 'ooffice-tmp-files') CFG_GS_MINIMAL_VERSION_FOR_PDFA = "8.65" CFG_GS_MINIMAL_VERSION_FOR_PDFX = "8.52" CFG_ICC_PATH = os.path.join(CFG_ETCDIR, 'websubmit', 'file_converter_templates', 'ISOCoatedsb.icc') CFG_PDFA_DEF_PATH = os.path.join(CFG_ETCDIR, 'websubmit', 'file_converter_templates', 'PDFA_def.ps') CFG_PDFX_DEF_PATH = os.path.join(CFG_ETCDIR, 'websubmit', 'file_converter_templates', 'PDFX_def.ps') CFG_UNOCONV_LOG_PATH = os.path.join(CFG_LOGDIR, 'unoconv.log') _RE_CLEAN_SPACES = re.compile(r'\s+') class InvenioWebSubmitFileConverterError(Exception): pass def get_conversion_map(): """Return a dictionary of the form: '.pdf' : {'.ps.gz' : ('pdf2ps', {param1 : value1...}) """ ret = { '.csv': {}, '.djvu': {}, '.doc': {}, '.docx': {}, '.sxw': {}, '.htm': {}, '.html': {}, '.odp': {}, '.ods': {}, '.odt': {}, '.pdf': {}, '.ppt': {}, '.pptx': {}, '.sxi': {}, '.ps': {}, '.ps.gz': {}, '.rtf': {}, '.tif': {}, '.tiff': {}, '.txt': {}, '.xls': {}, '.xlsx': {}, '.sxc': {}, '.xml': {}, '.hocr': {}, '.pdf;pdfa': {}, '.asc': {}, } if CFG_PATH_GZIP: ret['.ps']['.ps.gz'] = (gzip, {}) if CFG_PATH_GUNZIP: ret['.ps.gz']['.ps'] = (gunzip, {}) if CFG_PATH_ANY2DJVU: ret['.pdf']['.djvu'] = (any2djvu, {}) ret['.ps']['.djvu'] = (any2djvu, {}) if CFG_PATH_DJVUPS: ret['.djvu']['.ps'] = (djvu2ps, {'compress': False}) if CFG_PATH_GZIP: ret['.djvu']['.ps.gz'] = (djvu2ps, {'compress': True}) if CFG_PATH_DJVUTXT: ret['.djvu']['.txt'] = (djvu2text, {}) if CFG_PATH_PSTOTEXT: ret['.ps']['.txt'] = (pstotext, {}) if CFG_PATH_GUNZIP: ret['.ps.gz']['.txt'] = (pstotext, {}) if can_pdfa(): ret['.ps']['.pdf;pdfa'] = (ps2pdfa, {}) ret['.pdf']['.pdf;pdfa'] = (pdf2pdfa, {}) if CFG_PATH_GUNZIP: ret['.ps.gz']['.pdf;pdfa'] = (ps2pdfa, {}) else: if CFG_PATH_PS2PDF: ret['.ps']['.pdf;pdfa'] = (ps2pdf, {}) if CFG_PATH_GUNZIP: ret['.ps.gz']['.pdf'] = (ps2pdf, {}) if can_pdfx(): ret['.ps']['.pdf;pdfx'] = (ps2pdfx, {}) ret['.pdf']['.pdf;pdfx'] = (pdf2pdfx, {}) if CFG_PATH_GUNZIP: ret['.ps.gz']['.pdf;pdfx'] = (ps2pdfx, {}) if CFG_PATH_PDFTOPS: ret['.pdf']['.ps'] = (pdf2ps, {'compress': False}) ret['.pdf;pdfa']['.ps'] = (pdf2ps, {'compress': False}) if CFG_PATH_GZIP: ret['.pdf']['.ps.gz'] = (pdf2ps, {'compress': True}) ret['.pdf;pdfa']['.ps.gz'] = (pdf2ps, {'compress': True}) if CFG_PATH_PDFTOTEXT: ret['.pdf']['.txt'] = (pdf2text, {}) ret['.pdf;pdfa']['.txt'] = (pdf2text, {}) ret['.asc']['.txt'] = (txt2text, {}) ret['.txt']['.txt'] = (txt2text, {}) ret['.csv']['.txt'] = (txt2text, {}) ret['.html']['.txt'] = (html2text, {}) ret['.htm']['.txt'] = (html2text, {}) ret['.xml']['.txt'] = (html2text, {}) if CFG_PATH_TIFF2PDF: ret['.tiff']['.pdf'] = (tiff2pdf, {}) ret['.tif']['.pdf'] = (tiff2pdf, {}) if CFG_PATH_OPENOFFICE_PYTHON and CFG_OPENOFFICE_SERVER_HOST: ret['.rtf']['.odt'] = (unoconv, {'output_format': 'odt'}) ret['.rtf']['.pdf;pdfa'] = (unoconv, {'output_format': 'pdf'}) ret['.rtf']['.txt'] = (unoconv, {'output_format': 'txt'}) ret['.rtf']['.docx'] = (unoconv, {'output_format': 'docx'}) ret['.doc']['.odt'] = (unoconv, {'output_format': 'odt'}) ret['.doc']['.pdf;pdfa'] = (unoconv, {'output_format': 'pdf'}) ret['.doc']['.txt'] = (unoconv, {'output_format': 'txt'}) ret['.doc']['.docx'] = (unoconv, {'output_format': 'docx'}) ret['.docx']['.odt'] = (unoconv, {'output_format': 'odt'}) ret['.docx']['.pdf;pdfa'] = (unoconv, {'output_format': 'pdf'}) ret['.docx']['.txt'] = (unoconv, {'output_format': 'txt'}) ret['.sxw']['.odt'] = (unoconv, {'output_format': 'odt'}) ret['.sxw']['.pdf;pdfa'] = (unoconv, {'output_format': 'pdf'}) ret['.sxw']['.txt'] = (unoconv, {'output_format': 'txt'}) ret['.docx']['.docx'] = (unoconv, {'output_format': 'docx'}) ret['.odt']['.doc'] = (unoconv, {'output_format': 'doc'}) ret['.odt']['.pdf;pdfa'] = (unoconv, {'output_format': 'pdf'}) ret['.odt']['.txt'] = (unoconv, {'output_format': 'txt'}) ret['.odt']['.docx'] = (unoconv, {'output_format': 'docx'}) ret['.ppt']['.odp'] = (unoconv, {'output_format': 'odp'}) ret['.ppt']['.pdf;pdfa'] = (unoconv, {'output_format': 'pdf'}) ret['.ppt']['.txt'] = (unoconv, {'output_format': 'txt'}) ret['.ppt']['.pptx'] = (unoconv, {'output_format': 'pptx'}) ret['.pptx']['.odp'] = (unoconv, {'output_format': 'odp'}) ret['.pptx']['.pdf;pdfa'] = (unoconv, {'output_format': 'pdf'}) ret['.pptx']['.txt'] = (unoconv, {'output_format': 'txt'}) ret['.sxi']['.odp'] = (unoconv, {'output_format': 'odp'}) ret['.sxi']['.pdf;pdfa'] = (unoconv, {'output_format': 'pdf'}) ret['.sxi']['.txt'] = (unoconv, {'output_format': 'txt'}) ret['.sxi']['.pptx'] = (unoconv, {'output_format': 'pptx'}) ret['.odp']['.ppt'] = (unoconv, {'output_format': 'ppt'}) ret['.odp']['.pptx'] = (unoconv, {'output_format': 'pptx'}) ret['.odp']['.pdf;pdfa'] = (unoconv, {'output_format': 'pdf'}) ret['.odp']['.txt'] = (unoconv, {'output_format': 'txt'}) ret['.odp']['.pptx'] = (unoconv, {'output_format': 'pptx'}) ret['.xls']['.ods'] = (unoconv, {'output_format': 'ods'}) ret['.xls']['.xlsx'] = (unoconv, {'output_format': 'xslx'}) ret['.xlsx']['.ods'] = (unoconv, {'output_format': 'ods'}) ret['.sxc']['.ods'] = (unoconv, {'output_format': 'ods'}) ret['.sxc']['.xlsx'] = (unoconv, {'output_format': 'xslx'}) ret['.ods']['.xls'] = (unoconv, {'output_format': 'xls'}) ret['.ods']['.pdf;pdfa'] = (unoconv, {'output_format': 'pdf'}) ret['.ods']['.csv'] = (unoconv, {'output_format': 'csv'}) ret['.ods']['.xlsx'] = (unoconv, {'output_format': 'xslx'}) ret['.csv']['.txt'] = (txt2text, {}) ## Let's add all the existing output formats as potential input formats. for value in ret.values(): for key in value.keys(): if key not in ret: ret[key] = {} return ret def get_best_format_to_extract_text_from(filelist, best_formats=CFG_BIBDOCFILE_BEST_FORMATS_TO_EXTRACT_TEXT_FROM): """ Return among the filelist the best file whose format is best suited for extracting text. """ from invenio.bibdocfile import decompose_file, normalize_format best_formats = [normalize_format(aformat) for aformat in best_formats if can_convert(aformat, '.txt')] for aformat in best_formats: for filename in filelist: if decompose_file(filename, skip_version=True)[2].endswith(aformat): return filename raise InvenioWebSubmitFileConverterError("It's not possible to extract valuable text from any of the proposed files.") def get_missing_formats(filelist, desired_conversion=None): """Given a list of files it will return a dictionary of the form: file1 : missing formats to generate from it... """ from invenio.bibdocfile import normalize_format, decompose_file def normalize_desired_conversion(): ret = {} for key, value in desired_conversion.iteritems(): ret[normalize_format(key)] = [normalize_format(aformat) for aformat in value] return ret if desired_conversion is None: - desired_conversion = CFG_WEBSUBMIT_DESIRED_CONVERSIONS + desired_conversion = CFG_BIBDOCFILE_DESIRED_CONVERSIONS available_formats = [decompose_file(filename, skip_version=True)[2] for filename in filelist] missing_formats = [] desired_conversion = normalize_desired_conversion() ret = {} for filename in filelist: aformat = decompose_file(filename, skip_version=True)[2] if aformat in desired_conversion: for desired_format in desired_conversion[aformat]: if desired_format not in available_formats and desired_format not in missing_formats: missing_formats.append(desired_format) if filename not in ret: ret[filename] = [] ret[filename].append(desired_format) return ret def can_convert(input_format, output_format, max_intermediate_conversions=4): """Return the chain of conversion to transform input_format into output_format, if any.""" from invenio.bibdocfile import normalize_format if max_intermediate_conversions <= 0: return [] input_format = normalize_format(input_format) output_format = normalize_format(output_format) if input_format in __CONVERSION_MAP: if output_format in __CONVERSION_MAP[input_format]: return [__CONVERSION_MAP[input_format][output_format]] best_res = [] best_intermediate = '' for intermediate_format in __CONVERSION_MAP[input_format]: res = can_convert(intermediate_format, output_format, max_intermediate_conversions-1) if res and (len(res) < best_res or not best_res): best_res = res best_intermediate = intermediate_format if best_res: return [__CONVERSION_MAP[input_format][best_intermediate]] + best_res return [] def can_pdfopt(verbose=False): """Return True if it's possible to optimize PDFs.""" if CFG_PATH_PDFOPT: return True elif verbose: print >> sys.stderr, "PDF linearization is not supported because the pdfopt executable is not available" return False def can_pdfx(verbose=False): """Return True if it's possible to generate PDF/Xs.""" if not CFG_PATH_PDFTOPS: if verbose: print >> sys.stderr, "Conversion of PS or PDF to PDF/X is not possible because the pdftops executable is not available" return False if not CFG_PATH_GS: if verbose: print >> sys.stderr, "Conversion of PS or PDF to PDF/X is not possible because the gs executable is not available" return False else: try: output = run_shell_command("%s --version" % CFG_PATH_GS)[1].strip() if not output: raise ValueError("No version information returned") if [int(number) for number in output.split('.')] < [int(number) for number in CFG_GS_MINIMAL_VERSION_FOR_PDFX.split('.')]: print >> sys.stderr, "Conversion of PS or PDF to PDF/X is not possible because the minimal gs version for the executable %s is not met: it should be %s but %s has been found" % (CFG_PATH_GS, CFG_GS_MINIMAL_VERSION_FOR_PDFX, output) return False except Exception, err: print >> sys.stderr, "Conversion of PS or PDF to PDF/X is not possible because it's not possible to retrieve the gs version using the executable %s: %s" % (CFG_PATH_GS, err) return False if not CFG_PATH_PDFINFO: if verbose: print >> sys.stderr, "Conversion of PS or PDF to PDF/X is not possible because the pdfinfo executable is not available" return False if not os.path.exists(CFG_ICC_PATH): if verbose: print >> sys.stderr, "Conversion of PS or PDF to PDF/X is not possible because %s does not exists. Have you run make install-pdfa-helper-files?" % CFG_ICC_PATH return False return True def can_pdfa(verbose=False): """Return True if it's possible to generate PDF/As.""" if not CFG_PATH_PDFTOPS: if verbose: print >> sys.stderr, "Conversion of PS or PDF to PDF/A is not possible because the pdftops executable is not available" return False if not CFG_PATH_GS: if verbose: print >> sys.stderr, "Conversion of PS or PDF to PDF/A is not possible because the gs executable is not available" return False else: try: output = run_shell_command("%s --version" % CFG_PATH_GS)[1].strip() if not output: raise ValueError("No version information returned") if [int(number) for number in output.split('.')] < [int(number) for number in CFG_GS_MINIMAL_VERSION_FOR_PDFA.split('.')]: print >> sys.stderr, "Conversion of PS or PDF to PDF/A is not possible because the minimal gs version for the executable %s is not met: it should be %s but %s has been found" % (CFG_PATH_GS, CFG_GS_MINIMAL_VERSION_FOR_PDFA, output) return False except Exception, err: print >> sys.stderr, "Conversion of PS or PDF to PDF/A is not possible because it's not possible to retrieve the gs version using the executable %s: %s" % (CFG_PATH_GS, err) return False if not CFG_PATH_PDFINFO: if verbose: print >> sys.stderr, "Conversion of PS or PDF to PDF/A is not possible because the pdfinfo executable is not available" return False if not os.path.exists(CFG_ICC_PATH): if verbose: print >> sys.stderr, "Conversion of PS or PDF to PDF/A is not possible because %s does not exists. Have you run make install-pdfa-helper-files?" % CFG_ICC_PATH return False return True def can_perform_ocr(verbose=False): """Return True if it's possible to perform OCR.""" if not CFG_CAN_DO_OCR: if verbose: print >> sys.stderr, "OCR is not supported because either the pyPdf of ReportLab Python libraries are missing" return False if not CFG_PATH_OCROSCRIPT: if verbose: print >> sys.stderr, "OCR is not supported because the ocroscript executable is not available" return False if not CFG_PATH_PDFTOPPM: if verbose: print >> sys.stderr, "OCR is not supported because the pdftoppm executable is not available" return False return True def guess_ocropus_produced_garbage(input_file, hocr_p): """Return True if the output produced by OCROpus in hocr format contains only garbage instead of text. This is implemented via an heuristic: if the most common length for sentences encoded in UTF-8 is 1 then this is Garbage (tm). """ def _get_words_from_text(): ret = [] for row in open(input_file): for word in row.strip().split(' '): ret.append(word.strip()) return ret def _get_words_from_hocr(): ret = [] hocr = extract_hocr(open(input_file).read()) for dummy, dummy, lines in hocr: for dummy, line in lines: for word in line.split(): ret.append(word.strip()) return ret if hocr_p: words = _get_words_from_hocr() else: words = _get_words_from_text() #stats = {} #most_common_len = 0 #most_common_how_many = 0 #for word in words: #if word: #word_length = len(word.decode('utf-8')) #stats[word_length] = stats.get(word_length, 0) + 1 #if stats[word_length] > most_common_how_many: #most_common_len = word_length #most_common_how_many = stats[word_length] goods = 0 bads = 0 for word in words: for char in word.decode('utf-8'): if (u'a' <= char <= u'z') or (u'A' <= char <= u'Z'): goods += 1 else: bads += 1 if bads > goods: get_file_converter_logger().debug('OCROpus produced garbage') return True else: return False def guess_is_OCR_needed(input_file, ln='en'): """ Tries to see if enough text is retrievable from input_file. Return True if OCR is needed, False if it's already possible to retrieve information from the document. """ ## FIXME: a way to understand if pdftotext has returned garbage ## shuould be found. E.g. 1.0*len(text)/len(zlib.compress(text)) < 2.1 ## could be a good hint for garbage being found. return True def convert_file(input_file, output_file=None, output_format=None, **params): """ Convert files from one format to another. @param input_file [string] the path to an existing file @param output_file [string] the path to the desired ouput. (if None a temporary file is generated) @param output_format [string] the desired format (if None it is taken from output_file) @param params other paramaters to pass to the particular converter @return [string] the final output_file """ from invenio.bibdocfile import decompose_file, normalize_format if output_format is None: if output_file is None: raise ValueError("At least output_file or format should be specified.") else: output_ext = decompose_file(output_file, skip_version=True)[2] else: output_ext = normalize_format(output_format) input_ext = decompose_file(input_file, skip_version=True)[2] conversion_chain = can_convert(input_ext, output_ext) if conversion_chain: get_file_converter_logger().debug("Conversion chain from %s to %s: %s" % (input_ext, output_ext, conversion_chain)) current_input = input_file for i, (converter, final_params) in enumerate(conversion_chain): current_output = None if i == (len(conversion_chain) - 1): current_output = output_file final_params = dict(final_params) final_params.update(params) try: get_file_converter_logger().debug("Converting from %s to %s using %s with params %s" % (current_input, current_output, converter, final_params)) current_output = converter(current_input, current_output, **final_params) get_file_converter_logger().debug("... current_output %s" % (current_output, )) except InvenioWebSubmitFileConverterError, err: raise InvenioWebSubmitFileConverterError("Error when converting from %s to %s: %s" % (input_file, output_ext, err)) except Exception, err: register_exception(alert_admin=True) raise InvenioWebSubmitFileConverterError("Unexpected error when converting from %s to %s (%s): %s" % (input_file, output_ext, type(err), err)) if current_input != input_file: os.remove(current_input) current_input = current_output return current_output else: raise InvenioWebSubmitFileConverterError("It's impossible to convert from %s to %s" % (input_ext, output_ext)) try: _UNOCONV_DAEMON except NameError: _UNOCONV_DAEMON = None _UNOCONV_DAEMON_LOCK = threading.Lock() def _register_unoconv(): global _UNOCONV_DAEMON if CFG_OPENOFFICE_SERVER_HOST != 'localhost': return _UNOCONV_DAEMON_LOCK.acquire() try: if not _UNOCONV_DAEMON: output_log = open(CFG_UNOCONV_LOG_PATH, 'a') _UNOCONV_DAEMON = subprocess.Popen(['sudo', '-S', '-u', CFG_OPENOFFICE_USER, os.path.join(CFG_BINDIR, 'inveniounoconv'), '-vvv', '-s', CFG_OPENOFFICE_SERVER_HOST, '-p', str(CFG_OPENOFFICE_SERVER_PORT), '-l'], stdin=open('/dev/null', 'r'), stdout=output_log, stderr=output_log) time.sleep(3) finally: _UNOCONV_DAEMON_LOCK.release() def _unregister_unoconv(): global _UNOCONV_DAEMON if CFG_OPENOFFICE_SERVER_HOST != 'localhost': return _UNOCONV_DAEMON_LOCK.acquire() try: if _UNOCONV_DAEMON: output_log = open(CFG_UNOCONV_LOG_PATH, 'a') subprocess.call(['sudo', '-S', '-u', CFG_OPENOFFICE_USER, os.path.join(CFG_BINDIR, 'inveniounoconv'), '-k', '-vvv'], stdin=open('/dev/null', 'r'), stdout=output_log, stderr=output_log) time.sleep(1) if _UNOCONV_DAEMON.poll(): try: os.kill(_UNOCONV_DAEMON.pid, signal.SIGTERM) except OSError: pass if _UNOCONV_DAEMON.poll(): try: os.kill(_UNOCONV_DAEMON.pid, signal.SIGKILL) except OSError: pass finally: _UNOCONV_DAEMON_LOCK.release() ## NOTE: in case we switch back keeping LibreOffice running, uncomment ## the following line. #atexit.register(_unregister_unoconv) def unoconv(input_file, output_file=None, output_format='txt', pdfopt=True, **dummy): """Use unconv to convert among OpenOffice understood documents.""" from invenio.bibdocfile import normalize_format ## NOTE: in case we switch back keeping LibreOffice running, uncomment ## the following line. #_register_unoconv() input_file, output_file, dummy = prepare_io(input_file, output_file, output_format, need_working_dir=False) if output_format == 'txt': unoconv_format = 'text' else: unoconv_format = output_format try: try: ## We copy the input file and we make it available to OpenOffice ## with the user nobody from invenio.bibdocfile import decompose_file input_format = decompose_file(input_file, skip_version=True)[2] fd, tmpinputfile = tempfile.mkstemp(dir=CFG_TMPDIR, suffix=normalize_format(input_format)) os.close(fd) shutil.copy(input_file, tmpinputfile) get_file_converter_logger().debug("Prepared input file %s" % tmpinputfile) os.chmod(tmpinputfile, stat.S_IRUSR | stat.S_IWUSR | stat.S_IRGRP | stat.S_IROTH) tmpoutputfile = tempfile.mktemp(dir=CFG_OPENOFFICE_TMPDIR, suffix=normalize_format(output_format)) get_file_converter_logger().debug("Prepared output file %s" % tmpoutputfile) try: execute_command(os.path.join(CFG_BINDIR, 'inveniounoconv'), '-vvv', '-s', CFG_OPENOFFICE_SERVER_HOST, '-p', str(CFG_OPENOFFICE_SERVER_PORT), '--output', tmpoutputfile, '-f', unoconv_format, tmpinputfile, sudo=CFG_OPENOFFICE_USER) except: register_exception(alert_admin=True) raise except InvenioWebSubmitFileConverterError: ## Ok maybe OpenOffice hanged. Let's better kill it and restarted! if CFG_OPENOFFICE_SERVER_HOST != 'localhost': ## There's not that much that we can do. Let's bail out if not os.path.exists(tmpoutputfile) or not os.path.getsize(tmpoutputfile): raise else: ## Sometimes OpenOffice crashes but we don't care :-) ## it still have created a nice file. pass else: execute_command(os.path.join(CFG_BINDIR, 'inveniounoconv'), '-vvv', '-k', sudo=CFG_OPENOFFICE_USER) ## NOTE: in case we switch back keeping LibreOffice running, uncomment ## the following lines. #_unregister_unoconv() #_register_unoconv() time.sleep(5) try: execute_command(os.path.join(CFG_BINDIR, 'inveniounoconv'), '-vvv', '-s', CFG_OPENOFFICE_SERVER_HOST, '-p', str(CFG_OPENOFFICE_SERVER_PORT), '--output', tmpoutputfile, '-f', unoconv_format, tmpinputfile, sudo=CFG_OPENOFFICE_USER) except InvenioWebSubmitFileConverterError: execute_command(os.path.join(CFG_BINDIR, 'inveniounoconv'), '-vvv', '-k', sudo=CFG_OPENOFFICE_USER) if not os.path.exists(tmpoutputfile) or not os.path.getsize(tmpoutputfile): raise InvenioWebSubmitFileConverterError('No output was generated by OpenOffice') else: ## Sometimes OpenOffice crashes but we don't care :-) ## it still have created a nice file. pass except Exception, err: raise InvenioWebSubmitFileConverterError(get_unoconv_installation_guideline(err)) output_format = normalize_format(output_format) if output_format == '.pdf' and pdfopt: pdf2pdfopt(tmpoutputfile, output_file) else: shutil.copy(tmpoutputfile, output_file) execute_command(os.path.join(CFG_BINDIR, 'inveniounoconv'), '-r', tmpoutputfile, sudo=CFG_OPENOFFICE_USER) os.remove(tmpinputfile) return output_file def get_unoconv_installation_guideline(err): """Return the Libre/OpenOffice installation guideline (embedding the current error message). """ from invenio.bibtask import guess_apache_process_user return wrap_text_in_a_box("""\ OpenOffice.org can't properly create files in the OpenOffice.org temporary directory %(tmpdir)s, as the user %(nobody)s (as configured in CFG_OPENOFFICE_USER invenio(-local).conf variable): %(err)s. In your /etc/sudoers file, you should authorize the %(apache)s user to run %(unoconv)s as %(nobody)s user as in: %(apache)s ALL=(%(nobody)s) NOPASSWD: %(unoconv)s You should then run the following commands: $ sudo mkdir -p %(tmpdir)s $ sudo chown -R %(nobody)s %(tmpdir)s $ sudo chmod -R 755 %(tmpdir)s""" % { 'tmpdir' : CFG_OPENOFFICE_TMPDIR, 'nobody' : CFG_OPENOFFICE_USER, 'err' : err, 'apache' : CFG_BIBSCHED_PROCESS_USER or guess_apache_process_user(), 'python' : CFG_PATH_OPENOFFICE_PYTHON, 'unoconv' : os.path.join(CFG_BINDIR, 'inveniounoconv') }) def can_unoconv(verbose=False): """ If OpenOffice.org integration is enabled, checks whether the system is properly configured. """ if CFG_PATH_OPENOFFICE_PYTHON and CFG_OPENOFFICE_SERVER_HOST: try: test = os.path.join(CFG_TMPDIR, 'test.txt') open(test, 'w').write('test') output = unoconv(test, output_format='pdf') output2 = convert_file(output, output_format='.txt') if 'test' not in open(output2).read(): raise Exception("Coulnd't produce a valid PDF with Libre/OpenOffice.org") os.remove(output2) os.remove(output) os.remove(test) return True except Exception, err: if verbose: print >> sys.stderr, get_unoconv_installation_guideline(err) return False else: if verbose: print >> sys.stderr, "Libre/OpenOffice.org integration not enabled" return False def any2djvu(input_file, output_file=None, resolution=400, ocr=True, input_format=5, **dummy): """ Transform input_file into a .djvu file. @param input_file [string] the input file name @param output_file [string] the output_file file name, None for temporary generated @param resolution [int] the resolution of the output_file @param input_format [int] [1-9]: 1 - DjVu Document (for verification or OCR) 2 - PS/PS.GZ/PDF Document (default) 3 - Photo/Picture/Icon 4 - Scanned Document - B&W - <200 dpi 5 - Scanned Document - B&W - 200-400 dpi 6 - Scanned Document - B&W - >400 dpi 7 - Scanned Document - Color/Mixed - <200 dpi 8 - Scanned Document - Color/Mixed - 200-400 dpi 9 - Scanned Document - Color/Mixed - >400 dpi @return [string] output_file input_file. raise InvenioWebSubmitFileConverterError in case of errors. Note: due to the bottleneck of using a centralized server, it is very slow and is not suitable for interactive usage (e.g. WebSubmit functions) """ from invenio.bibdocfile import decompose_file input_file, output_file, working_dir = prepare_io(input_file, output_file, '.djvu') ocr = ocr and "1" or "0" ## Any2djvu expect to find the file in the current directory. execute_command(CFG_PATH_ANY2DJVU, '-a', '-c', '-r', resolution, '-o', ocr, '-f', input_format, os.path.basename(input_file), cwd=working_dir) ## Any2djvu doesn't let you choose the output_file file name. djvu_output = os.path.join(working_dir, decompose_file(input_file)[1] + '.djvu') shutil.move(djvu_output, output_file) clean_working_dir(working_dir) return output_file _RE_FIND_TITLE = re.compile(r'^Title:\s*(.*?)\s*$') def pdf2pdfx(input_file, output_file=None, title=None, pdfopt=False, profile="pdf/x-3:2002", **dummy): """ Transform any PDF into a PDF/X (see: ) @param input_file [string] the input file name @param output_file [string] the output_file file name, None for temporary generated @param title [string] the title of the document. None for autodiscovery. @param pdfopt [bool] whether to linearize the pdf, too. @param profile: [string] the PDFX profile to use. Supports: 'pdf/x-1a:2001', 'pdf/x-1a:2003', 'pdf/x-3:2002' @return [string] output_file input_file raise InvenioWebSubmitFileConverterError in case of errors. """ input_file, output_file, working_dir = prepare_io(input_file, output_file, '.pdf') if title is None: stdout = execute_command(CFG_PATH_PDFINFO, input_file) for line in stdout.split('\n'): g = _RE_FIND_TITLE.match(line) if g: title = g.group(1) break if not title: title = 'No title' get_file_converter_logger().debug("Extracted title is %s" % title) if os.path.exists(CFG_ICC_PATH): shutil.copy(CFG_ICC_PATH, working_dir) else: raise InvenioWebSubmitFileConverterError('ERROR: ISOCoatedsb.icc file missing. Have you run "make install-pdfa-helper-files" as part of your Invenio deployment?') pdfx_header = open(CFG_PDFX_DEF_PATH).read() pdfx_header = pdfx_header.replace('<<<>>>', title) icc_iso_profile_def = '' if profile == 'pdf/x-1a:2001': pdfx_version = 'PDF/X-1a:2001' pdfx_conformance = 'PDF/X-1a:2001' elif profile == 'pdf/x-1a:2003': pdfx_version = 'PDF/X-1a:2003' pdfx_conformance = 'PDF/X-1a:2003' elif profile == 'pdf/x-3:2002': icc_iso_profile_def = '/ICCProfile (ISOCoatedsb.icc)' pdfx_version = 'PDF/X-3:2002' pdfx_conformance = 'PDF/X-3:2002' pdfx_header = pdfx_header.replace('<<<>>>', icc_iso_profile_def) pdfx_header = pdfx_header.replace('<<<>>>', pdfx_version) pdfx_header = pdfx_header.replace('<<<>>>', pdfx_conformance) outputpdf = os.path.join(working_dir, 'output_file.pdf') open(os.path.join(working_dir, 'PDFX_def.ps'), 'w').write(pdfx_header) if profile in ['pdf/x-3:2002']: execute_command(CFG_PATH_GS, '-sProcessColorModel=DeviceCMYK', '-dPDFX', '-dBATCH', '-dNOPAUSE', '-dNOOUTERSAVE', '-dUseCIEColor', '-sDEVICE=pdfwrite', '-dAutoRotatePages=/None', '-sOutputFile=output_file.pdf', os.path.join(working_dir, 'PDFX_def.ps'), input_file, cwd=working_dir) elif profile in ['pdf/x-1a:2001', 'pdf/x-1a:2003']: execute_command(CFG_PATH_GS, '-sProcessColorModel=DeviceCMYK', '-dPDFX', '-dBATCH', '-dNOPAUSE', '-dNOOUTERSAVE', '-sColorConversionStrategy=CMYK', '-sDEVICE=pdfwrite', '-dAutoRotatePages=/None', '-sOutputFile=output_file.pdf', os.path.join(working_dir, 'PDFX_def.ps'), input_file, cwd=working_dir) if pdfopt: execute_command(CFG_PATH_PDFOPT, outputpdf, output_file) else: shutil.move(outputpdf, output_file) clean_working_dir(working_dir) return output_file def pdf2pdfa(input_file, output_file=None, title=None, pdfopt=True, **dummy): """ Transform any PDF into a PDF/A (see: ) @param input_file [string] the input file name @param output_file [string] the output_file file name, None for temporary generated @param title [string] the title of the document. None for autodiscovery. @param pdfopt [bool] whether to linearize the pdf, too. @return [string] output_file input_file raise InvenioWebSubmitFileConverterError in case of errors. """ input_file, output_file, working_dir = prepare_io(input_file, output_file, '.pdf') if title is None: stdout = execute_command(CFG_PATH_PDFINFO, input_file) for line in stdout.split('\n'): g = _RE_FIND_TITLE.match(line) if g: title = g.group(1) break if not title: title = 'No title' get_file_converter_logger().debug("Extracted title is %s" % title) if os.path.exists(CFG_ICC_PATH): shutil.copy(CFG_ICC_PATH, working_dir) else: raise InvenioWebSubmitFileConverterError('ERROR: ISOCoatedsb.icc file missing. Have you run "make install-pdfa-helper-files" as part of your Invenio deployment?') pdfa_header = open(CFG_PDFA_DEF_PATH).read() pdfa_header = pdfa_header.replace('<<<>>>', title) inputps = os.path.join(working_dir, 'input.ps') outputpdf = os.path.join(working_dir, 'output_file.pdf') open(os.path.join(working_dir, 'PDFA_def.ps'), 'w').write(pdfa_header) execute_command(CFG_PATH_PDFTOPS, '-level3', input_file, inputps) execute_command(CFG_PATH_GS, '-sProcessColorModel=DeviceCMYK', '-dPDFA', '-dBATCH', '-dNOPAUSE', '-dNOOUTERSAVE', '-dUseCIEColor', '-sDEVICE=pdfwrite', '-dAutoRotatePages=/None', '-sOutputFile=output_file.pdf', os.path.join(working_dir, 'PDFA_def.ps'), 'input.ps', cwd=working_dir) if pdfopt: execute_command(CFG_PATH_PDFOPT, outputpdf, output_file) else: shutil.move(outputpdf, output_file) clean_working_dir(working_dir) return output_file def pdf2pdfopt(input_file, output_file=None, **dummy): """ Linearize the input PDF in order to improve the web-experience when visualizing the document through the web. @param input_file [string] the input input_file @param output_file [string] the output_file file name, None for temporary generated @return [string] output_file input_file raise InvenioWebSubmitFileConverterError in case of errors. """ input_file, output_file, dummy = prepare_io(input_file, output_file, '.pdf', need_working_dir=False) execute_command(CFG_PATH_PDFOPT, input_file, output_file) return output_file def pdf2ps(input_file, output_file=None, level=2, compress=True, **dummy): """ Convert from Pdf to Postscript. """ if compress: suffix = '.ps.gz' else: suffix = '.ps' input_file, output_file, working_dir = prepare_io(input_file, output_file, suffix) execute_command(CFG_PATH_PDFTOPS, '-level%i' % level, input_file, os.path.join(working_dir, 'output.ps')) if compress: execute_command(CFG_PATH_GZIP, '-c', os.path.join(working_dir, 'output.ps'), filename_out=output_file) else: shutil.move(os.path.join(working_dir, 'output.ps'), output_file) clean_working_dir(working_dir) return output_file def ps2pdfx(input_file, output_file=None, title=None, pdfopt=False, profile="pdf/x-3:2002", **dummy): """ Transform any PS into a PDF/X (see: ) @param input_file [string] the input file name @param output_file [string] the output_file file name, None for temporary generated @param title [string] the title of the document. None for autodiscovery. @param pdfopt [bool] whether to linearize the pdf, too. @param profile: [string] the PDFX profile to use. Supports: 'pdf/x-1a:2001', 'pdf/x-1a:2003', 'pdf/x-3:2002' @return [string] output_file input_file raise InvenioWebSubmitFileConverterError in case of errors. """ input_file, output_file, working_dir = prepare_io(input_file, output_file, '.pdf') if input_file.endswith('.gz'): new_input_file = os.path.join(working_dir, 'input.ps') execute_command(CFG_PATH_GUNZIP, '-c', input_file, filename_out=new_input_file) input_file = new_input_file if not title: title = 'No title' shutil.copy(CFG_ICC_PATH, working_dir) pdfx_header = open(CFG_PDFX_DEF_PATH).read() pdfx_header = pdfx_header.replace('<<<>>>', title) icc_iso_profile_def = '' if profile == 'pdf/x-1a:2001': pdfx_version = 'PDF/X-1a:2001' pdfx_conformance = 'PDF/X-1a:2001' elif profile == 'pdf/x-1a:2003': pdfx_version = 'PDF/X-1a:2003' pdfx_conformance = 'PDF/X-1a:2003' elif profile == 'pdf/x-3:2002': icc_iso_profile_def = '/ICCProfile (ISOCoatedsb.icc)' pdfx_version = 'PDF/X-3:2002' pdfx_conformance = 'PDF/X-3:2002' pdfx_header = pdfx_header.replace('<<<>>>', icc_iso_profile_def) pdfx_header = pdfx_header.replace('<<<>>>', pdfx_version) pdfx_header = pdfx_header.replace('<<<>>>', title) outputpdf = os.path.join(working_dir, 'output_file.pdf') open(os.path.join(working_dir, 'PDFX_def.ps'), 'w').write(pdfx_header) if profile in ['pdf/x-3:2002']: execute_command(CFG_PATH_GS, '-sProcessColorModel=DeviceCMYK', '-dPDFX', '-dBATCH', '-dNOPAUSE', '-dNOOUTERSAVE', '-dUseCIEColor', '-sDEVICE=pdfwrite', '-dAutoRotatePages=/None', '-sOutputFile=output_file.pdf', os.path.join(working_dir, 'PDFX_def.ps'), 'input.ps', cwd=working_dir) elif profile in ['pdf/x-1a:2001', 'pdf/x-1a:2003']: execute_command(CFG_PATH_GS, '-sProcessColorModel=DeviceCMYK', '-dPDFX', '-dBATCH', '-dNOPAUSE', '-dNOOUTERSAVE', '-sColorConversionStrategy=CMYK', '-dAutoRotatePages=/None', '-sDEVICE=pdfwrite', '-sOutputFile=output_file.pdf', os.path.join(working_dir, 'PDFX_def.ps'), 'input.ps', cwd=working_dir) if pdfopt: execute_command(CFG_PATH_PDFOPT, outputpdf, output_file) else: shutil.move(outputpdf, output_file) clean_working_dir(working_dir) return output_file def ps2pdfa(input_file, output_file=None, title=None, pdfopt=True, **dummy): """ Transform any PS into a PDF/A (see: ) @param input_file [string] the input file name @param output_file [string] the output_file file name, None for temporary generated @param title [string] the title of the document. None for autodiscovery. @param pdfopt [bool] whether to linearize the pdf, too. @return [string] output_file input_file raise InvenioWebSubmitFileConverterError in case of errors. """ input_file, output_file, working_dir = prepare_io(input_file, output_file, '.pdf') if input_file.endswith('.gz'): new_input_file = os.path.join(working_dir, 'input.ps') execute_command(CFG_PATH_GUNZIP, '-c', input_file, filename_out=new_input_file) input_file = new_input_file if not title: title = 'No title' shutil.copy(CFG_ICC_PATH, working_dir) pdfa_header = open(CFG_PDFA_DEF_PATH).read() pdfa_header = pdfa_header.replace('<<<>>>', title) outputpdf = os.path.join(working_dir, 'output_file.pdf') open(os.path.join(working_dir, 'PDFA_def.ps'), 'w').write(pdfa_header) execute_command(CFG_PATH_GS, '-sProcessColorModel=DeviceCMYK', '-dPDFA', '-dBATCH', '-dNOPAUSE', '-dNOOUTERSAVE', '-dUseCIEColor', '-sDEVICE=pdfwrite', '-dAutoRotatePages=/None', '-sOutputFile=output_file.pdf', os.path.join(working_dir, 'PDFA_def.ps'), input_file, cwd=working_dir) if pdfopt: execute_command(CFG_PATH_PDFOPT, outputpdf, output_file) else: shutil.move(outputpdf, output_file) clean_working_dir(working_dir) return output_file def ps2pdf(input_file, output_file=None, pdfopt=True, **dummy): """ Transform any PS into a PDF @param input_file [string] the input file name @param output_file [string] the output_file file name, None for temporary generated @param pdfopt [bool] whether to linearize the pdf, too. @return [string] output_file input_file raise InvenioWebSubmitFileConverterError in case of errors. """ input_file, output_file, working_dir = prepare_io(input_file, output_file, '.pdf') if input_file.endswith('.gz'): new_input_file = os.path.join(working_dir, 'input.ps') execute_command(CFG_PATH_GUNZIP, '-c', input_file, filename_out=new_input_file) input_file = new_input_file outputpdf = os.path.join(working_dir, 'output_file.pdf') execute_command(CFG_PATH_PS2PDF, input_file, outputpdf, cwd=working_dir) if pdfopt: execute_command(CFG_PATH_PDFOPT, outputpdf, output_file) else: shutil.move(outputpdf, output_file) clean_working_dir(working_dir) return output_file def pdf2pdfhocr(input_pdf, text_hocr, output_pdf, rotations=None, font='Courier', draft=False): """ Adds the OCRed text to the original pdf. @param rotations: a list of angles by which pages should be rotated """ def _get_page_rotation(i): if len(rotations) > i: return rotations[i] return 0 if rotations is None: rotations = [] input_pdf, hocr_pdf, dummy = prepare_io(input_pdf, output_ext='.pdf', need_working_dir=False) create_pdf(extract_hocr(open(text_hocr).read()), hocr_pdf, font, draft) input1 = PdfFileReader(file(input_pdf, "rb")) input2 = PdfFileReader(file(hocr_pdf, "rb")) output = PdfFileWriter() info = input1.getDocumentInfo() if info: infoDict = output._info.getObject() infoDict.update(info) for i in range(0, input1.getNumPages()): orig_page = input1.getPage(i) text_page = input2.getPage(i) angle = _get_page_rotation(i) if angle != 0: print >> sys.stderr, "Rotating page %d by %d degrees." % (i, angle) text_page = text_page.rotateClockwise(angle) if draft: below, above = orig_page, text_page else: below, above = text_page, orig_page below.mergePage(above) if angle != 0 and not draft: print >> sys.stderr, "Rotating back page %d by %d degrees." % (i, angle) below.rotateCounterClockwise(angle) output.addPage(below) outputStream = file(output_pdf, "wb") output.write(outputStream) outputStream.close() os.remove(hocr_pdf) return output_pdf def pdf2hocr2pdf(input_file, output_file=None, ln='en', return_working_dir=False, extract_only_text=False, pdfopt=True, font='Courier', draft=False, **dummy): """ Return the text content in input_file. @param ln is a two letter language code to give the OCR tool a hint. @param return_working_dir if set to True, will return output_file path and the working_dir path, instead of deleting the working_dir. This is useful in case you need the intermediate images to build again a PDF. """ def _perform_rotate(working_dir, imagefile, angle): """Rotate imagefile of the corresponding angle. Creates a new file with rotated.ppm.""" get_file_converter_logger().debug('Performing rotate on %s by %s degrees' % (imagefile, angle)) if not angle: #execute_command('%s %s %s', CFG_PATH_CONVERT, os.path.join(working_dir, imagefile), os.path.join(working_dir, 'rotated-%s' % imagefile)) shutil.copy(os.path.join(working_dir, imagefile), os.path.join(working_dir, 'rotated.ppm')) else: execute_command(CFG_PATH_CONVERT, os.path.join(working_dir, imagefile), '-rotate', str(angle), '-depth', str(8), os.path.join(working_dir, 'rotated.ppm')) return True def _perform_deskew(working_dir): """Perform ocroscript deskew. Expect to work on rotated-imagefile. Creates deskewed.ppm. Return True if deskewing was fine.""" get_file_converter_logger().debug('Performing deskew') try: dummy, stderr = execute_command_with_stderr(CFG_PATH_OCROSCRIPT, os.path.join(CFG_ETCDIR, 'websubmit', 'file_converter_templates', 'deskew.lua'), os.path.join(working_dir, 'rotated.ppm'), os.path.join(working_dir, 'deskewed.ppm')) if stderr.strip(): get_file_converter_logger().debug('Errors found during deskewing') return False else: return True except InvenioWebSubmitFileConverterError, err: get_file_converter_logger().debug('Deskewing error: %s' % err) return False def _perform_recognize(working_dir): """Perform ocroscript recognize. Expect to work on deskewed.ppm. Creates recognized.out Return True if recognizing was fine.""" get_file_converter_logger().debug('Performing recognize') if extract_only_text: output_mode = 'text' else: output_mode = 'hocr' try: dummy, stderr = execute_command_with_stderr(CFG_PATH_OCROSCRIPT, 'recognize', '--tesslanguage=%s' % ln, '--output-mode=%s' % output_mode, os.path.join(working_dir, 'deskewed.ppm'), filename_out=os.path.join(working_dir, 'recognize.out')) if stderr.strip(): ## There was some output on stderr get_file_converter_logger().debug('Errors found in recognize.err') return False return not guess_ocropus_produced_garbage(os.path.join(working_dir, 'recognize.out'), not extract_only_text) except InvenioWebSubmitFileConverterError, err: get_file_converter_logger().debug('Recognizer error: %s' % err) return False def _perform_dummy_recognize(working_dir): """Return an empty text or an empty hocr referencing the image.""" get_file_converter_logger().debug('Performing dummy recognize') if extract_only_text: out = '' else: out = """ OCR Output
    """ open(os.path.join(working_dir, 'recognize.out'), 'w').write(out) def _find_image_file(working_dir, imageprefix, page): ret = '%s-%d.ppm' % (imageprefix, page) if os.path.exists(os.path.join(working_dir, ret)): return ret ret = '%s-%02d.ppm' % (imageprefix, page) if os.path.exists(os.path.join(working_dir, ret)): return ret ret = '%s-%03d.ppm' % (imageprefix, page) if os.path.exists(os.path.join(working_dir, ret)): return ret ret = '%s-%04d.ppm' % (imageprefix, page) if os.path.exists(os.path.join(working_dir, ret)): return ret ret = '%s-%05d.ppm' % (imageprefix, page) if os.path.exists(os.path.join(working_dir, ret)): return ret ret = '%s-%06d.ppm' % (imageprefix, page) if os.path.exists(os.path.join(working_dir, ret)): return ret ## I guess we won't have documents with more than million pages return None def _ocr(tmp_output_file): """ Append to tmp_output_file the partial results of OCROpus recognize. Return a list of rotations. """ page = 0 rotations = [] while True: page += 1 get_file_converter_logger().debug('Page %d.' % page) execute_command(CFG_PATH_PDFTOPPM, '-f', str(page), '-l', str(page), '-r', str(CFG_PPM_RESOLUTION), '-aa', 'yes', '-freetype', 'yes', input_file, os.path.join(working_dir, 'image')) imagefile = _find_image_file(working_dir, 'image', page) if imagefile == None: break for angle in (0, 180, 90, 270): get_file_converter_logger().debug('Trying %d degrees...' % angle) if _perform_rotate(working_dir, imagefile, angle) and _perform_deskew(working_dir) and _perform_recognize(working_dir): rotations.append(angle) break else: get_file_converter_logger().debug('Dummy recognize') rotations.append(0) _perform_dummy_recognize(working_dir) open(tmp_output_file, 'a').write(open(os.path.join(working_dir, 'recognize.out')).read()) # clean os.remove(os.path.join(working_dir, imagefile)) return rotations if CFG_PATH_OCROSCRIPT: if len(ln) == 2: ln = CFG_TWO2THREE_LANG_CODES.get(ln, 'eng') if extract_only_text: input_file, output_file, working_dir = prepare_io(input_file, output_file, output_ext='.txt') _ocr(output_file) else: input_file, tmp_output_hocr, working_dir = prepare_io(input_file, output_ext='.hocr') rotations = _ocr(tmp_output_hocr) if pdfopt: input_file, tmp_output_pdf, dummy = prepare_io(input_file, output_ext='.pdf', need_working_dir=False) tmp_output_pdf, output_file, dummy = prepare_io(tmp_output_pdf, output_file, output_ext='.pdf', need_working_dir=False) pdf2pdfhocr(input_file, tmp_output_hocr, tmp_output_pdf, rotations=rotations, font=font, draft=draft) pdf2pdfopt(tmp_output_pdf, output_file) os.remove(tmp_output_pdf) else: input_file, output_file, dummy = prepare_io(input_file, output_file, output_ext='.pdf', need_working_dir=False) pdf2pdfhocr(input_file, tmp_output_hocr, output_file, rotations=rotations, font=font, draft=draft) clean_working_dir(working_dir) return output_file else: raise InvenioWebSubmitFileConverterError("It's impossible to generate HOCR output from PDF. OCROpus is not available.") def pdf2text(input_file, output_file=None, perform_ocr=True, ln='en', **dummy): """ Return the text content in input_file. """ input_file, output_file, dummy = prepare_io(input_file, output_file, '.txt', need_working_dir=False) execute_command(CFG_PATH_PDFTOTEXT, '-enc', 'UTF-8', '-eol', 'unix', '-nopgbrk', input_file, output_file) if perform_ocr and can_perform_ocr(): ocred_output = pdf2hocr2pdf(input_file, ln=ln, extract_only_text=True) try: output = open(output_file, 'a') for row in open(ocred_output): output.write(row) output.close() finally: silent_remove(ocred_output) return output_file def txt2text(input_file, output_file=None, **dummy): """ Return the text content in input_file """ input_file, output_file, dummy = prepare_io(input_file, output_file, '.txt', need_working_dir=False) shutil.copy(input_file, output_file) return output_file def html2text(input_file, output_file=None, **dummy): """ Return the text content of an HTML/XML file. """ class HTMLStripper(HTMLParser.HTMLParser): def __init__(self, output_file): HTMLParser.HTMLParser.__init__(self) self.output_file = output_file def handle_entityref(self, name): if name in entitydefs: self.output_file.write(entitydefs[name].decode('latin1').encode('utf8')) def handle_data(self, data): if data.strip(): self.output_file.write(_RE_CLEAN_SPACES.sub(' ', data)) def handle_charref(self, data): try: self.output_file.write(unichr(int(data)).encode('utf8')) except: pass def close(self): self.output_file.close() HTMLParser.HTMLParser.close(self) input_file, output_file, dummy = prepare_io(input_file, output_file, '.txt', need_working_dir=False) html_stripper = HTMLStripper(open(output_file, 'w')) for line in open(input_file): html_stripper.feed(line) html_stripper.close() return output_file def djvu2text(input_file, output_file=None, **dummy): """ Return the text content in input_file. """ input_file, output_file, dummy = prepare_io(input_file, output_file, '.txt', need_working_dir=False) execute_command(CFG_PATH_DJVUTXT, input_file, output_file) return output_file def djvu2ps(input_file, output_file=None, level=2, compress=True, **dummy): """ Convert a djvu into a .ps[.gz] """ if compress: input_file, output_file, working_dir = prepare_io(input_file, output_file, output_ext='.ps.gz') try: execute_command(CFG_PATH_DJVUPS, input_file, os.path.join(working_dir, 'output.ps')) execute_command(CFG_PATH_GZIP, '-c', os.path.join(working_dir, 'output.ps'), filename_out=output_file) finally: clean_working_dir(working_dir) else: try: input_file, output_file, working_dir = prepare_io(input_file, output_file, output_ext='.ps') execute_command(CFG_PATH_DJVUPS, '-level=%i' % level, input_file, output_file) finally: clean_working_dir(working_dir) return output_file def tiff2pdf(input_file, output_file=None, pdfopt=True, pdfa=True, perform_ocr=True, **args): """ Convert a .tiff into a .pdf """ if pdfa or pdfopt or perform_ocr: input_file, output_file, working_dir = prepare_io(input_file, output_file, '.pdf') try: partial_output = os.path.join(working_dir, 'output.pdf') execute_command(CFG_PATH_TIFF2PDF, '-o', partial_output, input_file) if perform_ocr: pdf2hocr2pdf(partial_output, output_file, pdfopt=pdfopt, **args) elif pdfa: pdf2pdfa(partial_output, output_file, pdfopt=pdfopt, **args) else: pdfopt(partial_output, output_file) finally: clean_working_dir(working_dir) else: input_file, output_file, dummy = prepare_io(input_file, output_file, '.pdf', need_working_dir=False) execute_command(CFG_PATH_TIFF2PDF, '-o', output_file, input_file) return output_file def pstotext(input_file, output_file=None, **dummy): """ Convert a .ps[.gz] into text. """ input_file, output_file, working_dir = prepare_io(input_file, output_file, '.txt') try: if input_file.endswith('.gz'): new_input_file = os.path.join(working_dir, 'input.ps') execute_command(CFG_PATH_GUNZIP, '-c', input_file, filename_out=new_input_file) input_file = new_input_file execute_command(CFG_PATH_PSTOTEXT, '-output', output_file, input_file) finally: clean_working_dir(working_dir) return output_file def gzip(input_file, output_file=None, **dummy): """ Compress a file. """ input_file, output_file, dummy = prepare_io(input_file, output_file, '.gz', need_working_dir=False) execute_command(CFG_PATH_GZIP, '-c', input_file, filename_out=output_file) return output_file def gunzip(input_file, output_file=None, **dummy): """ Uncompress a file. """ from invenio.bibdocfile import decompose_file input_ext = decompose_file(input_file, skip_version=True)[2] if input_ext.endswith('.gz'): input_ext = input_ext[:-len('.gz')] else: input_ext = None input_file, output_file, dummy = prepare_io(input_file, output_file, input_ext, need_working_dir=False) execute_command(CFG_PATH_GUNZIP, '-c', input_file, filename_out=output_file) return output_file def prepare_io(input_file, output_file=None, output_ext=None, need_working_dir=True): """Clean input_file and the output_file.""" from invenio.bibdocfile import decompose_file, normalize_format output_ext = normalize_format(output_ext) get_file_converter_logger().debug('Preparing IO for input=%s, output=%s, output_ext=%s' % (input_file, output_file, output_ext)) if output_ext is None: if output_file is None: output_ext = '.tmp' else: output_ext = decompose_file(output_file, skip_version=True)[2] if output_file is None: try: (fd, output_file) = tempfile.mkstemp(suffix=output_ext, dir=CFG_TMPDIR) os.close(fd) except IOError, err: raise InvenioWebSubmitFileConverterError("It's impossible to create a temporary file: %s" % err) else: output_file = os.path.abspath(output_file) if os.path.exists(output_file): os.remove(output_file) if need_working_dir: try: working_dir = tempfile.mkdtemp(dir=CFG_TMPDIR, prefix='conversion') except IOError, err: raise InvenioWebSubmitFileConverterError("It's impossible to create a temporary directory: %s" % err) input_ext = decompose_file(input_file, skip_version=True)[2] new_input_file = os.path.join(working_dir, 'input' + input_ext) shutil.copy(input_file, new_input_file) input_file = new_input_file else: working_dir = None input_file = os.path.abspath(input_file) get_file_converter_logger().debug('IO prepared: input_file=%s, output_file=%s, working_dir=%s' % (input_file, output_file, working_dir)) return (input_file, output_file, working_dir) def clean_working_dir(working_dir): """ Remove the working_dir. """ get_file_converter_logger().debug('Cleaning working_dir: %s' % working_dir) shutil.rmtree(working_dir) def execute_command(*args, **argd): """Wrapper to run_process_with_timeout.""" get_file_converter_logger().debug("Executing: %s" % (args, )) args = [str(arg) for arg in args] res, stdout, stderr = run_process_with_timeout(args, cwd=argd.get('cwd'), filename_out=argd.get('filename_out'), filename_err=argd.get('filename_err'), sudo=argd.get('sudo')) get_file_converter_logger().debug('res: %s, stdout: %s, stderr: %s' % (res, stdout, stderr)) if res != 0: message = "ERROR: Error in running %s\n stdout:\n%s\nstderr:\n%s\n" % (args, stdout, stderr) get_file_converter_logger().error(message) raise InvenioWebSubmitFileConverterError(message) return stdout def execute_command_with_stderr(*args, **argd): """Wrapper to run_process_with_timeout.""" get_file_converter_logger().debug("Executing: %s" % (args, )) res, stdout, stderr = run_process_with_timeout(args, cwd=argd.get('cwd'), filename_out=argd.get('filename_out'), sudo=argd.get('sudo')) if res != 0: message = "ERROR: Error in running %s\n stdout:\n%s\nstderr:\n%s\n" % (args, stdout, stderr) get_file_converter_logger().error(message) raise InvenioWebSubmitFileConverterError(message) return stdout, stderr def silent_remove(path): """Remove without errors a path.""" if os.path.exists(path): try: os.remove(path) except OSError: pass __CONVERSION_MAP = get_conversion_map() def main_cli(): """ main function when the library behaves as a normal CLI tool. """ from invenio.bibdocfile import normalize_format parser = OptionParser() parser.add_option("-c", "--convert", dest="input_name", help="convert the specified FILE", metavar="FILE") parser.add_option("-d", "--debug", dest="debug", action="store_true", help="Enable debug information") parser.add_option("--special-pdf2hocr2pdf", dest="ocrize", help="convert the given scanned PDF into a PDF with OCRed text", metavar="FILE") parser.add_option("-f", "--format", dest="output_format", help="the desired output format", metavar="FORMAT") parser.add_option("-o", "--output", dest="output_name", help="the desired output FILE (if not specified a new file will be generated with the desired output format)") parser.add_option("--without-pdfa", action="store_false", dest="pdf_a", default=True, help="don't force creation of PDF/A PDFs") parser.add_option("--without-pdfopt", action="store_false", dest="pdfopt", default=True, help="don't force optimization of PDFs files") parser.add_option("--without-ocr", action="store_false", dest="ocr", default=True, help="don't force OCR") parser.add_option("--can-convert", dest="can_convert", help="display all the possible format that is possible to generate from the given format", metavar="FORMAT") parser.add_option("--is-ocr-needed", dest="check_ocr_is_needed", help="check if OCR is needed for the FILE specified", metavar="FILE") parser.add_option("-t", "--title", dest="title", help="specify the title (used when creating PDFs)", metavar="TITLE") parser.add_option("-l", "--language", dest="ln", help="specify the language (used when performing OCR, e.g. en, it, fr...)", metavar="LN", default='en') (options, dummy) = parser.parse_args() if options.debug: from logging import basicConfig basicConfig() get_file_converter_logger().setLevel(DEBUG) if options.can_convert: if options.can_convert: input_format = normalize_format(options.can_convert) if input_format == '.pdf': if can_pdfopt(True): print "PDF linearization supported" else: print "No PDF linearization support" if can_pdfa(True): print "PDF/A generation supported" else: print "No PDF/A generation support" if can_perform_ocr(True): print "OCR supported" else: print "OCR not supported" print 'Can convert from "%s" to:' % input_format[1:], for output_format in __CONVERSION_MAP: if can_convert(input_format, output_format): print '"%s"' % output_format[1:], print elif options.check_ocr_is_needed: print "Checking if OCR is needed on %s..." % options.check_ocr_is_needed, sys.stdout.flush() if guess_is_OCR_needed(options.check_ocr_is_needed): print "needed." else: print "not needed." elif options.ocrize: try: output = pdf2hocr2pdf(options.ocrize, output_file=options.output_name, title=options.title, ln=options.ln) print "Output stored in %s" % output except InvenioWebSubmitFileConverterError, err: print "ERROR: %s" % err sys.exit(1) else: try: if not options.output_name and not options.output_format: parser.error("Either --format, --output should be specified") if not options.input_name: parser.error("An input should be specified!") output = convert_file(options.input_name, output_file=options.output_name, output_format=options.output_format, pdfopt=options.pdfopt, pdfa=options.pdf_a, title=options.title, ln=options.ln) print "Output stored in %s" % output except InvenioWebSubmitFileConverterError, err: print "ERROR: %s" % err sys.exit(1) if __name__ == "__main__": main_cli() diff --git a/modules/websubmit/lib/websubmit_regression_tests.py b/modules/websubmit/lib/websubmit_regression_tests.py index bfeb3d667..c0c7f233a 100644 --- a/modules/websubmit/lib/websubmit_regression_tests.py +++ b/modules/websubmit/lib/websubmit_regression_tests.py @@ -1,270 +1,268 @@ # -*- coding: utf-8 -*- ## ## This file is part of Invenio. ## Copyright (C) 2006, 2007, 2008, 2009, 2010, 2011 CERN. ## ## Invenio is free software; you can redistribute it and/or ## modify it under the terms of the GNU General Public License as ## published by the Free Software Foundation; either version 2 of the ## License, or (at your option) any later version. ## ## Invenio is distributed in the hope that it will be useful, but ## WITHOUT ANY WARRANTY; without even the implied warranty of ## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ## General Public License for more details. ## ## You should have received a copy of the GNU General Public License ## along with Invenio; if not, write to the Free Software Foundation, Inc., ## 59 Temple Place, Suite 330, Boston, MA 02111-1307, USA. """WebSubmit Regression Test Suite.""" __revision__ = "$Id$" import unittest import os -import sys -from warnings import warn from logging import StreamHandler, DEBUG from cStringIO import StringIO from invenio.websubmit_file_converter import get_file_converter_logger from invenio.errorlib import register_exception from invenio.config import CFG_SITE_URL, CFG_PREFIX, CFG_TMPDIR from invenio.testutils import make_test_suite, run_test_suite, \ test_web_page_content, merge_error_messages from invenio import websubmit_file_stamper class WebSubmitWebPagesAvailabilityTest(unittest.TestCase): """Check WebSubmit web pages whether they are up or not.""" def test_submission_pages_availability(self): """websubmit - availability of submission pages""" baseurl = CFG_SITE_URL + '/submit/' _exports = ['', 'direct'] error_messages = [] for url in [baseurl + page for page in _exports]: error_messages.extend(test_web_page_content(url)) if error_messages: self.fail(merge_error_messages(error_messages)) return def test_publiline_pages_availability(self): """websubmit - availability of approval pages""" baseurl = CFG_SITE_URL _exports = ['/approve.py', '/publiline.py', '/yourapprovals.py'] error_messages = [] for url in [baseurl + page for page in _exports]: error_messages.extend(test_web_page_content(url)) if error_messages: self.fail(merge_error_messages(error_messages)) return def test_your_submissions_pages_availability(self): """websubmit - availability of Your Submissions pages""" baseurl = CFG_SITE_URL _exports = ['/yoursubmissions.py'] error_messages = [] for url in [baseurl + page for page in _exports]: error_messages.extend(test_web_page_content(url)) if error_messages: self.fail(merge_error_messages(error_messages)) return def test_help_page_availability(self): """websubmit - availability of WebSubmit help page""" self.assertEqual([], test_web_page_content(CFG_SITE_URL + '/help/submit-guide', expected_text="Submit Guide")) class WebSubmitLegacyURLsTest(unittest.TestCase): """ Check that the application still responds to legacy URLs""" def test_legacy_help_page_link(self): """websubmit - legacy Submit Guide page link""" self.assertEqual([], test_web_page_content(CFG_SITE_URL + '/help/submit', expected_text="Submit Guide")) self.assertEqual([], test_web_page_content(CFG_SITE_URL + '/help/submit/', expected_text="Submit Guide")) self.assertEqual([], test_web_page_content(CFG_SITE_URL + '/help/submit/index.en.html', expected_text="Submit Guide")) self.assertEqual([], test_web_page_content(CFG_SITE_URL + '/help/submit/access.en.html', expected_text="Submit Guide")) class WebSubmitXSSVulnerabilityTest(unittest.TestCase): """Test possible XSS vulnerabilities of the submission engine.""" def test_xss_in_submission_doctype(self): """websubmit - no XSS vulnerability in doctype parameter""" self.assertEqual([], test_web_page_content(CFG_SITE_URL + '/submit?doctype=%3CSCRIPT%3Ealert%28%22XSS%22%29%3B%3C%2FSCRIPT%3E', expected_text='Unable to find document type: <SCRIPT>alert("XSS")', username="jekyll", password="j123ekyll")) def test_xss_in_submission_act(self): """websubmit - no XSS vulnerability in act parameter""" self.assertEqual([], test_web_page_content(CFG_SITE_URL + '/submit?doctype=DEMOTHE&access=1_1&act=%3CSCRIPT%3Ealert%28%22XSS%22%29%3B%3C%2FSCRIPT%3E', expected_text='Invalid doctype and act parameters', username="jekyll", password="j123ekyll")) def test_xss_in_submission_page(self): """websubmit - no XSS vulnerability in access parameter""" self.assertEqual([], test_web_page_content(CFG_SITE_URL + '/submit?doctype=DEMOTHE&access=/../../../etc/passwd&act=SBI&startPg=1&ln=en&ln=en', expected_text='Invalid parameters', username="jekyll", password="j123ekyll")) self.assertEqual([], test_web_page_content(CFG_SITE_URL + '/submit?doctype=DEMOTHE&access=%3CSCRIPT%3Ealert%28%22XSS%22%29%3B%3C%2FSCRIPT%3E&act=SBI', expected_text='Invalid parameters', username="jekyll", password="j123ekyll")) def WebSubmitFileConverterTestGenerator(): from invenio.websubmit_file_converter import get_conversion_map, can_convert if can_convert('.odt', '.txt'): ## Special test for unoconv/LibreOffice yield WebSubmitFileConverterTest(os.path.join(CFG_PREFIX, 'lib', 'webtest', 'invenio', 'test.odt'), '.odt', '.txt') if can_convert('.doc', '.txt'): ## Special test for unoconv/LibreOffice yield WebSubmitFileConverterTest(os.path.join(CFG_PREFIX, 'lib', 'webtest', 'invenio', 'test.doc'), '.doc', '.txt') for from_format in get_conversion_map().keys(): input_file = os.path.join(CFG_PREFIX, 'lib', 'webtest', 'invenio', 'test%s' % from_format) if not os.path.exists(input_file): ## Can't run such a test because there is no test example continue for to_format in get_conversion_map().keys(): if from_format == to_format: continue conversion_map = can_convert(from_format, to_format) if conversion_map: if [converter for converter in conversion_map if converter[0].__name__ == 'unoconv']: ## We don't want to test unoconv which is tested separately continue yield WebSubmitFileConverterTest(input_file, from_format, to_format) class WebSubmitFileConverterTest(unittest.TestCase): """Test WebSubmit file converter tool""" def __init__(self, input_file, from_format, to_format): super(WebSubmitFileConverterTest, self).__init__('runTest') self.from_format = from_format self.to_format = to_format self.input_file = input_file def setUp(self): logger = get_file_converter_logger() self.log = StringIO() logger.setLevel(DEBUG) for handler in logger.handlers: logger.removeHandler(handler) handler = StreamHandler(self.log) handler.setLevel(DEBUG) logger.addHandler(handler) def shortDescription(self): return """websubmit - test %s to %s conversion""" % (self.from_format, self.to_format) def runTest(self): from invenio.websubmit_file_converter import InvenioWebSubmitFileConverterError, convert_file try: tmpdir_snapshot1 = set(os.listdir(CFG_TMPDIR)) output_file = convert_file(self.input_file, output_format=self.to_format) tmpdir_snapshot2 = set(os.listdir(CFG_TMPDIR)) tmpdir_snapshot2.discard(os.path.basename(output_file)) if not os.path.exists(output_file): raise InvenioWebSubmitFileConverterError("output_file %s was not correctly created" % output_file) if tmpdir_snapshot2 - tmpdir_snapshot1: raise InvenioWebSubmitFileConverterError("Some temporary files were left over: %s" % (tmpdir_snapshot2 - tmpdir_snapshot1)) except Exception, err: register_exception(alert_admin=True) self.fail("ERROR: when converting from %s to %s: %s, the log contained: %s" % (self.from_format, self.to_format, err, self.log.getvalue())) class WebSubmitStampingTest(unittest.TestCase): """Test WebSubmit file stamping tool""" def test_stamp_coverpage(self): """websubmit - creation of a PDF cover page stamp (APIs)""" file_stamper_options = { 'latex-template' : "demo-stamp-left.tex", 'latex-template-var' : {'REPORTNUMBER':'TEST-2010','DATE':'10/10/2000'}, 'input-file' : CFG_PREFIX + "/lib/webtest/invenio/test.pdf", 'output-file' : "test-stamp-coverpage.pdf", 'stamp' : "coverpage", 'layer' : "foreground", 'verbosity' : 0, } try: (stamped_file_path_only, stamped_file_name) = \ websubmit_file_stamper.stamp_file(file_stamper_options) except: self.fail("Stamping failed") # Test that file is now bigger... assert os.path.getsize(os.path.join(stamped_file_path_only, stamped_file_name)) > 12695 def test_stamp_firstpage(self): """websubmit - stamping first page of a PDF (APIs)""" file_stamper_options = { 'latex-template' : "demo-stamp-left.tex", 'latex-template-var' : {'REPORTNUMBER':'TEST-2010','DATE':'10/10/2000'}, 'input-file' : CFG_PREFIX + "/lib/webtest/invenio/test.pdf", 'output-file' : "test-stamp-firstpage.pdf", 'stamp' : "first", 'layer' : "background", 'verbosity' : 0, } try: (stamped_file_path_only, stamped_file_name) = \ websubmit_file_stamper.stamp_file(file_stamper_options) except: self.fail("Stamping failed") # Test that file is now bigger... assert os.path.getsize(os.path.join(stamped_file_path_only, stamped_file_name)) > 12695 def test_stamp_allpages(self): """websubmit - stamping all pages of a PDF (APIs)""" file_stamper_options = { 'latex-template' : "demo-stamp-left.tex", 'latex-template-var' : {'REPORTNUMBER':'TEST-2010','DATE':'10/10/2000'}, 'input-file' : CFG_PREFIX + "/lib/webtest/invenio/test.pdf", 'output-file' : "test-stamp-allpages.pdf", 'stamp' : "all", 'layer' : "foreground", 'verbosity' : 0, } try: (stamped_file_path_only, stamped_file_name) = \ websubmit_file_stamper.stamp_file(file_stamper_options) except: self.fail("Stamping failed") # Test that file is now bigger... assert os.path.getsize(os.path.join(stamped_file_path_only, stamped_file_name)) > 12695 TEST_SUITE = make_test_suite(WebSubmitWebPagesAvailabilityTest, WebSubmitLegacyURLsTest, WebSubmitXSSVulnerabilityTest, WebSubmitStampingTest) for test in WebSubmitFileConverterTestGenerator(): TEST_SUITE.addTest(test) if __name__ == "__main__": run_test_suite(TEST_SUITE, warn_user=True) diff --git a/modules/websubmit/lib/websubmit_templates.py b/modules/websubmit/lib/websubmit_templates.py index 5b8f19666..8226cfb9d 100644 --- a/modules/websubmit/lib/websubmit_templates.py +++ b/modules/websubmit/lib/websubmit_templates.py @@ -1,3098 +1,2897 @@ ## This file is part of Invenio. ## Copyright (C) 2005, 2006, 2007, 2008, 2009, 2010, 2011 CERN. ## ## Invenio is free software; you can redistribute it and/or ## modify it under the terms of the GNU General Public License as ## published by the Free Software Foundation; either version 2 of the ## License, or (at your option) any later version. ## ## Invenio is distributed in the hope that it will be useful, but ## WITHOUT ANY WARRANTY; without even the implied warranty of ## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ## General Public License for more details. ## ## You should have received a copy of the GNU General Public License ## along with Invenio; if not, write to the Free Software Foundation, Inc., ## 59 Temple Place, Suite 330, Boston, MA 02111-1307, USA. __revision__ = "$Id$" -import urllib import cgi import re import operator -from invenio.config import CFG_SITE_URL, \ - CFG_SITE_LANG, CFG_SITE_RECORD, CFG_INSPIRE_SITE +from invenio.config import CFG_SITE_URL, CFG_SITE_LANG, CFG_SITE_RECORD from invenio.messages import gettext_set_language -from invenio.dateutils import convert_datetext_to_dategui, convert_datestruct_to_dategui +from invenio.dateutils import convert_datetext_to_dategui from invenio.urlutils import create_html_link from invenio.webmessage_mailutils import email_quoted_txt2html from invenio.htmlutils import escape_html -from websubmit_config import \ - CFG_WEBSUBMIT_CHECK_USER_LEAVES_SUBMISSION +from invenio.websubmit_config import CFG_WEBSUBMIT_CHECK_USER_LEAVES_SUBMISSION class Template: # Parameters allowed in the web interface for fetching files files_default_urlargd = { 'version': (str, ""), # version "" means "latest" 'docname': (str, ""), # the docname (optional) 'format' : (str, ""), # the format 'verbose' : (int, 0), # the verbosity 'subformat': (str, ""), # the subformat 'download': (int, 0), # download as attachment } def tmpl_submit_home_page(self, ln, catalogues): """ The content of the home page of the submit engine Parameters: - 'ln' *string* - The language to display the interface in - 'catalogues' *string* - The HTML code for the catalogues list """ # load the right message language _ = gettext_set_language(ln) return """ """ % { 'document_types' : _("Document types available for submission"), 'please_select' : _("Please select the type of document you want to submit"), 'catalogues' : catalogues, 'ln' : ln, } def tmpl_submit_home_catalog_no_content(self, ln): """ The content of the home page of submit in case no doctypes are available Parameters: - 'ln' *string* - The language to display the interface in """ # load the right message language _ = gettext_set_language(ln) out = "

    " + _("No document types available.") + "

    \n" return out def tmpl_submit_home_catalogs(self, ln, catalogs): """ Produces the catalogs' list HTML code Parameters: - 'ln' *string* - The language to display the interface in - 'catalogs' *array* - The catalogs of documents, each one a hash with the properties: - 'id' - the internal id - 'name' - the name - 'sons' - sub-catalogs - 'docs' - the contained document types, in the form: - 'id' - the internal id - 'name' - the name There is at least one catalog """ # load the right message language _ = gettext_set_language(ln) # import pprint # out = "
    " + pprint.pformat(catalogs)
             out = ""
             for catalog in catalogs:
                 out += "\n
      " out += self.tmpl_submit_home_catalogs_sub(ln, catalog) out += "\n
    \n" return out def tmpl_print_warning(self, msg, type, prologue, epilogue): """Prints warning message and flushes output. Parameters: - 'msg' *string* - The message string - 'type' *string* - the warning type - 'prologue' *string* - HTML code to display before the warning - 'epilogue' *string* - HTML code to display after the warning """ out = '\n%s' % (prologue) if type: out += '%s: ' % type out += '%s%s' % (msg, epilogue) return out def tmpl_submit_home_catalogs_sub(self, ln, catalog): """ Recursive function that produces a catalog's HTML display Parameters: - 'ln' *string* - The language to display the interface in - 'catalog' *array* - A catalog of documents, with the properties: - 'id' - the internal id - 'name' - the name - 'sons' - sub-catalogs - 'docs' - the contained document types, in the form: - 'id' - the internal id - 'name' - the name """ # load the right message language _ = gettext_set_language(ln) if catalog['level'] == 1: out = "
  • %s\n" % catalog['name'] else: if catalog['level'] == 2: out = "
  • %s\n" % cgi.escape(catalog['name']) else: if catalog['level'] > 2: out = "
  • %s\n" % cgi.escape(catalog['name']) if len(catalog['docs']) or len(catalog['sons']): out += "
      \n" if len(catalog['docs']) != 0: for row in catalog['docs']: out += self.tmpl_submit_home_catalogs_doctype(ln, row) if len(catalog['sons']) != 0: for row in catalog['sons']: out += self.tmpl_submit_home_catalogs_sub(ln, row) if len(catalog['docs']) or len(catalog['sons']): out += "
  • " else: out += "
  • " return out def tmpl_submit_home_catalogs_doctype(self, ln, doc): """ Recursive function that produces a catalog's HTML display Parameters: - 'ln' *string* - The language to display the interface in - 'doc' *array* - A catalog of documents, with the properties: - 'id' - the internal id - 'name' - the name """ # load the right message language _ = gettext_set_language(ln) return """
  • %s
  • """ % create_html_link('%s/submit' % CFG_SITE_URL, {'doctype' : doc['id'], 'ln' : ln}, doc['name']) def tmpl_action_page(self, ln, uid, pid, now, doctype, description, docfulldesc, snameCateg, lnameCateg, actionShortDesc, indir, statustext): """ Recursive function that produces a catalog's HTML display Parameters: - 'ln' *string* - The language to display the interface in - 'pid' *string* - The current process id - 'now' *string* - The current time (security control features) - 'doctype' *string* - The selected doctype - 'description' *string* - The description of the doctype - 'docfulldesc' *string* - The title text of the page - 'snameCateg' *array* - The short names of all the categories of documents - 'lnameCateg' *array* - The long names of all the categories of documents - 'actionShortDesc' *array* - The short names (codes) for the different actions - 'indir' *array* - The directories for each of the actions - 'statustext' *array* - The names of the different action buttons """ # load the right message language _ = gettext_set_language(ln) out = "" out += """

    %(continue_explain)s
    Access Number:

    """ % { 'continue_explain' : _("To continue with a previously interrupted submission, enter an access number into the box below:"), 'doctype' : doctype, 'go' : _("GO"), 'ln' : ln, } return out def tmpl_warning_message(self, ln, msg): """ Produces a warning message for the specified text Parameters: - 'ln' *string* - The language to display the interface in - 'msg' *string* - The message to display """ # load the right message language _ = gettext_set_language(ln) return """
    %s
    """ % msg def tmpl_page_interface(self, ln, docname, actname, curpage, nbpages, nextPg, access, nbPg, doctype, act, fields, javascript, mainmenu): """ Produces a page with the specified fields (in the submit chain) Parameters: - 'ln' *string* - The language to display the interface in - 'doctype' *string* - The document type - 'docname' *string* - The document type name - 'actname' *string* - The action name - 'act' *string* - The action - 'curpage' *int* - The current page of submitting engine - 'nbpages' *int* - The total number of pages - 'nextPg' *int* - The next page - 'access' *string* - The submission number - 'nbPg' *string* - ?? - 'fields' *array* - the fields to display in the page, with each record having the structure: - 'fullDesc' *string* - the description of the field - 'text' *string* - the HTML code of the field - 'javascript' *string* - if the field has some associated javascript code - 'type' *string* - the type of field (T, F, I, H, D, S, R) - 'name' *string* - the name of the field - 'rows' *string* - the number of rows for textareas - 'cols' *string* - the number of columns for textareas - 'val' *string* - the default value of the field - 'size' *string* - the size for text fields - 'maxlength' *string* - the maximum length for text fields - 'htmlcode' *string* - the complete HTML code for user-defined fields - 'typename' *string* - the long name of the type - 'javascript' *string* - the javascript code to insert in the page - 'mainmenu' *string* - the url of the main menu """ # load the right message language _ = gettext_set_language(ln) # top menu out = """
    \n" # Display the navigation cell # Display "previous page" navigation arrows out += """
    %(docname)s   %(actname)s  """ % { 'docname' : docname, 'actname' : actname, } for i in range(1, nbpages+1): if i == int(curpage): out += """""" % curpage else: out += """""" % (i, i) out += """
       page: %s  %s   
     %(summary)s(2) 

    """ % { 'summary' : _("SUMMARY"), 'doctype' : cgi.escape(doctype), 'act' : cgi.escape(act), 'access' : cgi.escape(access), 'nextPg' : cgi.escape(nextPg), 'curpage' : cgi.escape(curpage), 'nbPg' : cgi.escape(nbPg), 'ln' : cgi.escape(ln), } for field in fields: if field['javascript']: out += """ """ % field['javascript'] # now displays the html form field(s) out += "%s\n%s\n" % (field['fullDesc'], field['text']) out += javascript out += "
     
     
    """ if int(curpage) != 1: out += """ """ % { 'prpage' : int(curpage) - 1, 'images' : CFG_SITE_URL + '/img', 'prevpage' : _("Previous page"), } else: out += """ """ # Display the submission number out += """ \n""" % { 'submission' : _("Submission number") + '(1)', 'access' : cgi.escape(access), } # Display the "next page" navigation arrow if int(curpage) != int(nbpages): out += """ """ % { 'nxpage' : int(curpage) + 1, 'images' : CFG_SITE_URL + '/img', 'nextpage' : _("Next page"), } else: out += """ """ out += """
      %(prevpage)s %(prevpage)s  %(submission)s: %(access)s %(nextpage)s %(nextpage)s  


    %(back)s


    %(take_note)s
    %(explain_summary)s
    """ % { 'surequit' : _("Are you sure you want to quit this submission?"), 'check_not_already_enabled': CFG_WEBSUBMIT_CHECK_USER_LEAVES_SUBMISSION and 'false' or 'true', 'back' : _("Back to main menu"), 'mainmenu' : cgi.escape(mainmenu), 'images' : CFG_SITE_URL + '/img', 'take_note' : '(1) ' + _("This is your submission access number. It can be used to continue with an interrupted submission in case of problems."), 'explain_summary' : '(2) ' + _("Mandatory fields appear in red in the SUMMARY window."), } return out def tmpl_submit_field(self, ln, field): """ Produces the HTML code for the specified field Parameters: - 'ln' *string* - The language to display the interface in - 'field' *array* - the field to display in the page, with the following structure: - 'javascript' *string* - if the field has some associated javascript code - 'type' *string* - the type of field (T, F, I, H, D, S, R) - 'name' *string* - the name of the field - 'rows' *string* - the number of rows for textareas - 'cols' *string* - the number of columns for textareas - 'val' *string* - the default value of the field - 'size' *string* - the size for text fields - 'maxlength' *string* - the maximum length for text fields - 'htmlcode' *string* - the complete HTML code for user-defined fields - 'typename' *string* - the long name of the type """ # load the right message language _ = gettext_set_language(ln) # If the field is a textarea if field['type'] == 'T': ## Field is a textarea: text = "" \ % (field['name'], field['rows'], field['cols'], cgi.escape(str(field['val']), 1)) # If the field is a file upload elif field['type'] == 'F': ## the field is a file input: text = """""" \ % (field['name'], field['size'], "%s" \ % ((field['maxlength'] in (0, None) and " ") or (""" maxlength="%s\"""" % field['maxlength'])) ) # If the field is a text input elif field['type'] == 'I': ## Field is a text input: text = """""" \ % (field['name'], field['size'], field['val'], "%s" \ % ((field['maxlength'] in (0, None) and " ") or (""" maxlength="%s\"""" % field['maxlength'])) ) # If the field is a hidden input elif field['type'] == 'H': text = "" % (field['name'], field['val']) # If the field is user-defined elif field['type'] == 'D': text = field['htmlcode'] # If the field is a select box elif field['type'] == 'S': text = field['htmlcode'] # If the field type is not recognized else: text = "%s: unknown field type" % field['typename'] return text def tmpl_page_interface_js(self, ln, upload, field, fieldhtml, txt, check, level, curdir, values, select, radio, curpage, nbpages, returnto): """ Produces the javascript for validation and value filling for a submit interface page Parameters: - 'ln' *string* - The language to display the interface in - 'upload' *array* - booleans if the field is a field - 'field' *array* - the fields' names - 'fieldhtml' *array* - the fields' HTML representation - 'txt' *array* - the fields' long name - 'check' *array* - if the fields should be checked (in javascript) - 'level' *array* - strings, if the fields should be filled (M) or not (O) - 'curdir' *array* - the current directory of the submission - 'values' *array* - the current values of the fields - 'select' *array* - booleans, if the controls are "select" controls - 'radio' *array* - booleans, if the controls are "radio" controls - 'curpage' *int* - the current page - 'nbpages' *int* - the total number of pages - 'returnto' *array* - a structure with 'field' and 'page', if a mandatory field on antoher page was not completed """ # load the right message language _ = gettext_set_language(ln) nbFields = len(upload) # if there is a file upload field, we change the encoding type out = """""" return out def tmpl_page_do_not_leave_submission_js(self, ln, enabled=CFG_WEBSUBMIT_CHECK_USER_LEAVES_SUBMISSION): """ Code to ask user confirmation when leaving the page, so that the submission is not interrupted by mistake. All submission functions should set the Javascript variable 'user_must_confirm_before_leaving_page' to 'false' before programmatically submitting the submission form. Parameters: - 'ln' *string* - The language to display the interface in - 'enabled' *bool* - If the check applies or not """ # load the right message language _ = gettext_set_language(ln) out = ''' ''' % (enabled and 'true' or 'false', _('Your modifications will not be saved.').replace('"', '\\"')) return out def tmpl_page_endaction(self, ln, nextPg, startPg, access, curpage, nbPg, nbpages, doctype, act, docname, actname, mainmenu, finished, function_content, next_action): """ Produces the pages after all the fields have been submitted. Parameters: - 'ln' *string* - The language to display the interface in - 'doctype' *string* - The document type - 'act' *string* - The action - 'docname' *string* - The document type name - 'actname' *string* - The action name - 'curpage' *int* - The current page of submitting engine - 'startPg' *int* - The start page - 'nextPg' *int* - The next page - 'access' *string* - The submission number - 'nbPg' *string* - total number of pages - 'nbpages' *string* - number of pages (?) - 'mainmenu' *string* - the url of the main menu - 'finished' *bool* - if the submission is finished - 'function_content' *string* - HTML code produced by some function executed - 'next_action' *string* - if there is another action to be completed, the HTML code for linking to it """ # load the right message language _ = gettext_set_language(ln) out = """
    """ % { 'finished' : _("finished!"), } else: for i in range(1, nbpages + 1): out += """""" % (i,i) + %s """ % (i, i) out += """
    %(docname)s   %(actname)s  """ % { 'nextPg' : cgi.escape(nextPg), 'startPg' : cgi.escape(startPg), 'access' : cgi.escape(access), 'curpage' : cgi.escape(curpage), 'nbPg' : cgi.escape(nbPg), 'doctype' : cgi.escape(doctype), 'act' : cgi.escape(act), 'docname' : docname, 'actname' : actname, 'mainmenu' : cgi.escape(mainmenu), 'ln' : cgi.escape(ln), } if finished == 1: out += """
      %(finished)s   
       - %s %(end_action)s  
     %(summary)s(2) """ % { 'end_action' : _("end of action"), 'summary' : _("SUMMARY"), 'doctype' : cgi.escape(doctype), 'act' : cgi.escape(act), 'access' : cgi.escape(access), 'ln' : cgi.escape(ln), } out += """

    %(function_content)s %(next_action)s

    """ % { 'function_content' : function_content, 'next_action' : next_action, } if finished == 0: out += """%(submission)s²: %(access)s""" % { 'submission' : _("Submission no"), 'access' : cgi.escape(access), } else: out += " \n" out += """


    """ # Add the "back to main menu" button if finished == 0: out += """ %(back)s

    """ % { 'surequit' : _("Are you sure you want to quit this submission?"), 'back' : _("Back to main menu"), 'images' : CFG_SITE_URL + '/img', 'mainmenu' : cgi.escape(mainmenu), 'check_not_already_enabled': CFG_WEBSUBMIT_CHECK_USER_LEAVES_SUBMISSION and 'false' or 'true', } else: out += """ %(back)s

    """ % { 'back' : _("Back to main menu"), 'images' : CFG_SITE_URL + '/img', 'mainmenu' : cgi.escape(mainmenu), } return out def tmpl_function_output(self, ln, display_on, action, doctype, step, functions): """ Produces the output of the functions. Parameters: - 'ln' *string* - The language to display the interface in - 'display_on' *bool* - If debug information should be displayed - 'doctype' *string* - The document type - 'action' *string* - The action - 'step' *int* - The current step in submission - 'functions' *aray* - HTML code produced by functions executed and informations about the functions - 'name' *string* - the name of the function - 'score' *string* - the score of the function - 'error' *bool* - if the function execution produced errors - 'text' *string* - the HTML code produced by the function """ # load the right message language _ = gettext_set_language(ln) out = "" if display_on: out += """

    %(function_list)s

    """ % { 'function_list' : _("Here is the %(x_action)s function list for %(x_doctype)s documents at level %(x_step)s") % { 'x_action' : action, 'x_doctype' : doctype, 'x_step' : step, }, 'function' : _("Function"), 'score' : _("Score"), 'running' : _("Running function"), } for function in functions: out += """""" % { 'name' : function['name'], 'score' : function['score'], 'result' : function['error'] and (_("Function %s does not exist.") % function['name'] + "
    ") or function['text'] } out += "
    %(function)s%(score)s%(running)s
    %(name)s%(score)s%(result)s
    " else: for function in functions: if not function['error']: out += function['text'] return out def tmpl_next_action(self, ln, actions): """ Produces the output of the functions. Parameters: - 'ln' *string* - The language to display the interface in - 'actions' *array* - The actions to display, in the structure - 'page' *string* - the starting page - 'action' *string* - the action (in terms of submission) - 'doctype' *string* - the doctype - 'nextdir' *string* - the path to the submission data - 'access' *string* - the submission number - 'indir' *string* - ?? - 'name' *string* - the name of the action """ # load the right message language _ = gettext_set_language(ln) out = "

    %(haveto)s

      " % { 'haveto' : _("You must now"), } i = 0 for action in actions: if i > 0: out += " " + _("or") + " " i += 1 out += """
    • %(name)s
    • """ % action out += "
    " return out - def tmpl_filelist(self, ln, filelist='', recid='', docname='', version=''): - """ - Displays the file list for a record. - - Parameters: - - - 'ln' *string* - The language to display the interface in - - - 'recid' *int* - The record id - - - 'docname' *string* - The document name - - - 'version' *int* - The version of the document - - - 'filelist' *string* - The HTML string of the filelist (produced by the BibDoc classes) - """ - - # load the right message language - _ = gettext_set_language(ln) - - title = _("record") + ' #' + '%s' % (CFG_SITE_URL, CFG_SITE_RECORD, recid, recid) - if docname != "": - title += ' ' + _("document") + ' #' + str(docname) - if version != "": - title += ' ' + _("version") + ' #' + str(version) - - out = """
    - - %s -
    - """ % (filelist) - - return out - - def tmpl_bibrecdoc_filelist(self, ln, types, verbose_files=''): - """ - Displays the file list for a record. - - Parameters: - - - 'ln' *string* - The language to display the interface in - - - 'types' *array* - The different types to display, each record in the format: - - - 'name' *string* - The name of the format - - - 'content' *array of string* - The HTML code produced by tmpl_bibdoc_filelist, for the right files - - - 'verbose_files' - A string representing in a verbose way the - file information. - """ - - # load the right message language - _ = gettext_set_language(ln) - - out = "" - for mytype in types: - if mytype['name']: - if not (CFG_INSPIRE_SITE and mytype['name'] == 'INSPIRE-PUBLIC'): - out += "%s %s:" % (mytype['name'], _("file(s)")) - out += "
      " - for content in mytype['content']: - out += content - out += "
    " - if verbose_files: - out += "
    %s
    " % verbose_files - return out - - def tmpl_bibdoc_filelist(self, ln, versions=[], imageurl='', recid='', docname='', status=''): - """ - Displays the file list for a record. - - Parameters: - - - 'ln' *string* - The language to display the interface in - - - 'versions' *array* - The different versions to display, each record in the format: - - - 'version' *string* - The version - - - 'content' *string* - The HTML code produced by tmpl_bibdocfile_filelist, for the right file - - - 'previous' *bool* - If the file has previous versions - - - 'imageurl' *string* - The URL to the file image - - - 'recid' *int* - The record id - - - 'docname' *string* - The name of the document - - - 'status' *string* - The status of a document - """ - - # load the right message language - _ = gettext_set_language(ln) - - out = """ - %(restriction_label)s - - - """ % { - 'imageurl' : imageurl, - 'docname' : docname, - 'restriction_label': status and ('' % _('Restricted')) or '' - } - for version in versions: - if version['previous']: - versiontext = """
    (%(see)s %(previous)s)""" % { - 'see' : _("see"), - 'siteurl' : CFG_SITE_URL, - 'CFG_SITE_RECORD': CFG_SITE_RECORD, - 'docname' : urllib.quote(docname), - 'recID': recid, - 'previous': _("previous"), - 'ln_link': (ln != CFG_SITE_LANG and '&ln=' + ln) or '', - } - else: - versiontext = "" - out += """ - - " - out += "" - return out - - def tmpl_bibdocfile_filelist(self, ln, recid, name, version, md, superformat, subformat, nice_size, description): - """ - Displays a file in the file list. - - Parameters: - - - 'ln' *string* - The language to display the interface in - - - 'recid' *int* - The id of the record - - - 'name' *string* - The name of the file - - - 'version' *string* - The version - - - 'md' *datetime* - the modification date - - - 'superformat' *string* - The display superformat - - - 'subformat' *string* - The display subformat - - - 'nice_size' *string* - The nice_size of the file - - - 'description' *string* - The description that might have been associated - to the particular file - """ - - # load the right message language - _ = gettext_set_language(ln) - - urlbase = '%s/%s/%s/files/%s' % ( - CFG_SITE_URL, - CFG_SITE_RECORD, - recid, - '%s%s' % (name, superformat)) - - urlargd = {'version' : version} - if subformat: - urlargd['subformat'] = subformat - - link_label = '%s%s' % (name, superformat) - if subformat: - link_label += ' (%s)' % subformat - - link = create_html_link(urlbase, urlargd, cgi.escape(link_label)) - - return """ - - %(link)s - - - [%(nice_size)s] - %(md)s - - %(description)s - """ % { - 'link' : link, - 'nice_size' : nice_size, - 'md' : convert_datestruct_to_dategui(md.timetuple(), ln), - 'description' : cgi.escape(description), - } - def tmpl_submit_summary (self, ln, values): """ Displays the summary for the submit procedure. Parameters: - 'ln' *string* - The language to display the interface in - 'values' *array* - The values of submit. Each of the records contain the following fields: - 'name' *string* - The name of the field - 'mandatory' *bool* - If the field is mandatory or not - 'value' *string* - The inserted value - 'page' *int* - The submit page on which the field is entered """ # load the right message language _ = gettext_set_language(ln) out = """""" % \ { 'images' : CFG_SITE_URL + '/img' } for value in values: if value['mandatory']: color = "red" else: color = "" out += """""" % { 'color' : color, 'name' : value['name'], 'value' : value['value'], 'page' : value['page'], 'ln' : ln } out += "
    %(name)s %(value)s
    " return out def tmpl_yoursubmissions(self, ln, order, doctypes, submissions): """ Displays the list of the user's submissions. Parameters: - 'ln' *string* - The language to display the interface in - 'order' *string* - The ordering parameter - 'doctypes' *array* - All the available doctypes, in structures: - 'id' *string* - The doctype id - 'name' *string* - The display name of the doctype - 'selected' *bool* - If the doctype should be selected - 'submissions' *array* - The available submissions, in structures: - 'docname' *string* - The document name - 'actname' *string* - The action name - 'status' *string* - The status of the document - 'cdate' *string* - Creation date - 'mdate' *string* - Modification date - 'id' *string* - The id of the submission - 'reference' *string* - The display name of the doctype - 'pending' *bool* - If the submission is pending - 'act' *string* - The action code - 'doctype' *string* - The doctype code """ # load the right message language _ = gettext_set_language(ln) out = "" out += """
    " return out def tmpl_yourapprovals(self, ln, referees): """ Displays the doctypes and categories for which the user is referee Parameters: - 'ln' *string* - The language to display the interface in - 'referees' *array* - All the doctypes for which the user is referee: - 'doctype' *string* - The doctype - 'docname' *string* - The display name of the doctype - 'categories' *array* - The specific categories for which the user is referee: - 'id' *string* - The category id - 'name' *string* - The display name of the category """ # load the right message language _ = gettext_set_language(ln) out = """ " out += '''

    To see the status of documents for which approval has been requested, click here

    ''' % {'url' : CFG_SITE_URL} return out def tmpl_publiline_selectdoctype(self, ln, docs): """ Displays the doctypes that the user can select Parameters: - 'ln' *string* - The language to display the interface in - 'docs' *array* - All the doctypes that the user can select: - 'doctype' *string* - The doctype - 'docname' *string* - The display name of the doctype """ # load the right message language _ = gettext_set_language(ln) out = """ %s""" % (ln, _("Go to specific approval workflow")) return out def tmpl_publiline_selectcplxdoctype(self, ln, docs): """ Displays the doctypes that the user can select in a complex workflow Parameters: - 'ln' *string* - The language to display the interface in - 'docs' *array* - All the doctypes that the user can select: - 'doctype' *string* - The doctype - 'docname' *string* - The display name of the doctype """ # load the right message language _ = gettext_set_language(ln) out = """
    """ return out def tmpl_publiline_selectcateg(self, ln, doctype, title, categories): """ Displays the categories from a doctype that the user can select Parameters: - 'ln' *string* - The language to display the interface in - 'doctype' *string* - The doctype - 'title' *string* - The doctype name - 'categories' *array* - All the categories that the user can select: - 'id' *string* - The id of the category - 'waiting' *int* - The number of documents waiting - 'approved' *int* - The number of approved documents - 'rejected' *int* - The number of rejected documents """ # load the right message language _ = gettext_set_language(ln) out = """ """ % { 'key' : _("Key"), 'pending' : _("Pending"), 'images' : CFG_SITE_URL + '/img', 'waiting' : _("Waiting for approval"), 'approved' : _("Approved"), 'already_approved' : _("Already approved"), 'rejected' : _("Rejected"), 'rejected_text' : _("Rejected"), 'somepending' : _("Some documents are pending."), } return out def tmpl_publiline_selectcplxcateg(self, ln, doctype, title, types): """ Displays the categories from a doctype that the user can select Parameters: - 'ln' *string* - The language to display the interface in - 'doctype' *string* - The doctype - 'title' *string* - The doctype name - 'categories' *array* - All the categories that the user can select: - 'id' *string* - The id of the category - 'waiting' *int* - The number of documents waiting - 'approved' *int* - The number of approved documents - 'rejected' *int* - The number of rejected documents """ # load the right message language _ = gettext_set_language(ln) out = "" #out = """ # # # # #
    # # """ % { # 'title' : title, # 'list_type' : _("List of specific approvals"), # } columns = [] columns.append ({'apptype' : 'RRP', 'list_categ' : _("List of refereing categories"), 'id_form' : 0, }) #columns.append ({'apptype' : 'RPB', # 'list_categ' : _("List of publication categories"), # 'id_form' : 1, # }) #columns.append ({'apptype' : 'RDA', # 'list_categ' : _("List of direct approval categories"), # 'id_form' : 2, # }) for column in columns: out += """ """ # Key out += """ """ % { 'key' : _("Key"), 'pending' : _("Pending"), 'images' : CFG_SITE_URL + '/img', 'waiting' : _("Waiting for approval"), 'approved' : _("Approved"), 'already_approved' : _("Already approved"), 'rejected' : _("Rejected"), 'rejected_text' : _("Rejected"), 'cancelled' : _("Cancelled"), 'cancelled_text' : _("Cancelled"), 'somepending' : _("Some documents are pending."), } return out def tmpl_publiline_selectdocument(self, ln, doctype, title, categ, docs): """ Displays the documents that the user can select in the specified category Parameters: - 'ln' *string* - The language to display the interface in - 'doctype' *string* - The doctype - 'title' *string* - The doctype name - 'categ' *string* - the category - 'docs' *array* - All the categories that the user can select: - 'RN' *string* - The id of the document - 'status' *string* - The status of the document """ # load the right message language _ = gettext_set_language(ln) out = """ """ return out def tmpl_publiline_selectcplxdocument(self, ln, doctype, title, categ, categname, docs, apptype): """ Displays the documents that the user can select in the specified category Parameters: - 'ln' *string* - The language to display the interface in - 'doctype' *string* - The doctype - 'title' *string* - The doctype name - 'categ' *string* - the category - 'docs' *array* - All the categories that the user can select: - 'RN' *string* - The id of the document - 'status' *string* - The status of the document - 'apptype' *string* - the approval type """ # load the right message language _ = gettext_set_language(ln) listtype = "" if apptype == "RRP": listtype = _("List of refereed documents") elif apptype == "RPB": listtype = _("List of publication documents") elif apptype == "RDA": listtype = _("List of direct approval documents") out = """ """ return out def tmpl_publiline_displaydoc(self, ln, doctype, docname, categ, rn, status, dFirstReq, dLastReq, dAction, access, confirm_send, auth_code, auth_message, authors, title, sysno, newrn, note): """ Displays the categories from a doctype that the user can select Parameters: - 'ln' *string* - The language to display the interface in - 'doctype' *string* - The doctype - 'docname' *string* - The doctype name - 'categ' *string* - the category - 'rn' *string* - The document RN (id number) - 'status' *string* - The status of the document - 'dFirstReq' *string* - The date of the first approval request - 'dLastReq' *string* - The date of the last approval request - 'dAction' *string* - The date of the last action (approval or rejection) - 'confirm_send' *bool* - must display a confirmation message about sending approval email - 'auth_code' *bool* - authorised to referee this document - 'auth_message' *string* - ??? - 'authors' *string* - the authors of the submission - 'title' *string* - the title of the submission - 'sysno' *string* - the unique database id for the record - 'newrn' *string* - the record number assigned to the submission - 'note' *string* - Note about the approval request. """ # load the right message language _ = gettext_set_language(ln) if status == "waiting": image = """""" % (CFG_SITE_URL + '/img') elif status == "approved": image = """""" % (CFG_SITE_URL + '/img') elif status == "rejected": image = """""" % (CFG_SITE_URL + '/img') else: image = "" out = """ """ return out def tmpl_publiline_displaycplxdoc(self, ln, doctype, docname, categ, rn, apptype, status, dates, isPubCom, isEdBoard, isReferee, isProjectLeader, isAuthor, authors, title, sysno, newrn): # load the right message language _ = gettext_set_language(ln) if status == "waiting": image = """""" % (CFG_SITE_URL + '/img') elif status == "approved": image = """""" % (CFG_SITE_URL + '/img') elif status == "rejected": image = """""" % (CFG_SITE_URL + '/img') elif status == "cancelled": image = """""" % (CFG_SITE_URL + '/img') else: image = "" out = """ """ return out def tmpl_publiline_displaycplxdocitem(self, doctype, categ, rn, apptype, action, comments, (user_can_view_comments, user_can_add_comment, user_can_delete_comment), selected_category, selected_topic, selected_group_id, comment_subject, comment_body, ln): _ = gettext_set_language(ln) if comments and user_can_view_comments: comments_text = '' comments_overview = '
      ' for comment in comments: (cmt_uid, cmt_nickname, cmt_title, cmt_body, cmt_date, cmt_priority, cmtid) = comment comments_overview += '
    • %s - %s (%s)
    • ' % (cmtid, cmt_nickname, cmt_title, convert_datetext_to_dategui (cmt_date)) comments_text += """
      %s - %s (%s)ReplyTop
      %s
      """ % (cmtid, cmt_nickname, cmt_title, convert_datetext_to_dategui (cmt_date), CFG_SITE_URL, doctype, apptype, categ, rn, cmt_uid, ln, email_quoted_txt2html(cmt_body)) comments_overview += '
    ' else: comments_text = '' comments_overview = 'None.' body = '' if user_can_view_comments: body += """

    %(comments_label)s

    """ if user_can_view_comments: body += """%(comments)s""" if user_can_add_comment: validation = """ """ % {'button_label': _("Add Comment")} body += self.tmpl_publiline_displaywritecomment (doctype, categ, rn, apptype, action, _("Add Comment"), comment_subject, validation, comment_body, ln) body %= { 'comments_label': _("Comments"), 'action': action, 'button_label': _("Write a comment"), 'comments': comments_text} content = '
    ' out = """

    %(comments_overview_label)s

    %(comments_overview)s
    %(body)s
    """ % { 'comments_overview_label' : _('Comments overview'), 'comments_overview' : comments_overview, 'body' : body,} return out def tmpl_publiline_displaywritecomment(self, doctype, categ, rn, apptype, action, write_label, title, validation, reply_message, ln): _ = gettext_set_language(ln) return """

    %(write_label)s

    %(title_label)s:

    %(comment_label)s:


    %(validation)s
    """ % {'write_label': write_label, 'title_label': _("Title"), 'title': title, 'comment_label': _("Comment"), 'rn' : rn, 'categ' : categ, 'doctype' : doctype, 'apptype' : apptype, 'action' : action, 'validation' : validation, 'reply_message' : reply_message, 'ln' : ln, } def tmpl_publiline_displaydocplxaction(self, ln, doctype, categ, rn, apptype, action, status, authors, title, sysno, subtitle1, email_user_pattern, stopon1, users, extrausers, stopon2, subtitle2, usersremove, stopon3, validate_btn): # load the right message language _ = gettext_set_language(ln) if status == "waiting": image = """""" % (CFG_SITE_URL + '/img') elif status == "approved": image = """""" % (CFG_SITE_URL + '/img') elif status == "rejected": image = """""" % (CFG_SITE_URL + '/img') else: image = "" out = """ """ if ((apptype == "RRP") or (apptype == "RPB")) and ((action == "EdBoardSel") or (action == "RefereeSel")): out += """ """ if action == "EdBoardSel": out += """ """ if validate_btn != "": out += """
    """ % { 'rn' : rn, 'categ' : categ, 'doctype' : doctype, 'apptype' : apptype, 'action' : action, 'validate_btn' : validate_btn, 'ln': ln, } return out def tmpl_publiline_displaycplxrecom(self, ln, doctype, categ, rn, apptype, action, status, authors, title, sysno, msg_to, msg_to_group, msg_subject): # load the right message language _ = gettext_set_language(ln) if status == "waiting": image = """""" % (CFG_SITE_URL + '/img') elif status == "approved": image = """""" % (CFG_SITE_URL + '/img') elif status == "rejected": image = """""" % (CFG_SITE_URL + '/img') else: image = "" out = """ """ # escape forbidden character msg_to = escape_html(msg_to) msg_to_group = escape_html(msg_to_group) msg_subject = escape_html(msg_subject) write_box = """
    """ if msg_to != "": addr_box = """ """ % {'users_label': _("User"), 'to_users' : msg_to, } if msg_to_group != "": addr_box += """ """ % {'groups_label': _("Group"), 'to_groups': msg_to_group, } elif msg_to_group != "": addr_box = """ """ % {'groups_label': _("Group"), 'to_groups': msg_to_group, } else: addr_box = """ """ write_box += addr_box write_box += """
    %(to_label)s%(users_label)s %(to_users)s
      %(groups_label)s %(to_groups)s%(groups_label)s %(to_groups)s   
         
    %(subject_label)s
    %(message_label)s
    """ write_box = write_box % {'rn' : rn, 'categ' : categ, 'doctype' : doctype, 'apptype' : apptype, 'action' : action, 'subject' : msg_subject, 'to_label': _("To:"), 'subject_label': _("Subject:"), 'message_label': _("Message:"), 'send_label': _("SEND"), 'select' : _("Select:"), 'approve' : _("approve"), 'reject' : _("reject"), 'ln': ln, } out += write_box return out def displaycplxdoc_displayauthaction(action, linkText): return """ (%(linkText)s)""" % { "action" : action, "linkText" : linkText } diff --git a/modules/websubmit/lib/websubmit_web_tests.py b/modules/websubmit/lib/websubmit_web_tests.py index e40f7858f..f7c2d1f2c 100644 --- a/modules/websubmit/lib/websubmit_web_tests.py +++ b/modules/websubmit/lib/websubmit_web_tests.py @@ -1,235 +1,235 @@ # -*- coding: utf-8 -*- ## This file is part of Invenio. ## Copyright (C) 2011 CERN. ## ## Invenio is free software; you can redistribute it and/or ## modify it under the terms of the GNU General Public License as ## published by the Free Software Foundation; either version 2 of the ## License, or (at your option) any later version. ## ## Invenio is distributed in the hope that it will be useful, but ## WITHOUT ANY WARRANTY; without even the implied warranty of ## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ## General Public License for more details. ## ## You should have received a copy of the GNU General Public License ## along with Invenio; if not, write to the Free Software Foundation, Inc., ## 59 Temple Place, Suite 330, Boston, MA 02111-1307, USA. """WebSubmit module web tests.""" from invenio.config import CFG_SITE_SECURE_URL from invenio.testutils import make_test_suite, \ run_test_suite, \ InvenioWebTestCase class InvenioWebSubmitWebTest(InvenioWebTestCase): """WebSubmit web tests.""" def test_submit_article(self): """websubmit - web test submit an article""" self.browser.get(CFG_SITE_SECURE_URL) # login as jekyll self.login(username="jekyll", password="j123ekyll") self.find_element_by_link_text_with_timeout("Submit") self.browser.find_element_by_link_text("Submit").click() self.find_element_by_link_text_with_timeout("Demo Article Submission") self.browser.find_element_by_link_text("Demo Article Submission").click() self.find_element_by_id_with_timeout("comboARTICLE") self.browser.find_element_by_id("comboARTICLE").click() self.find_element_by_xpath_with_timeout("//input[@value='Submit New Record']") self.browser.find_element_by_xpath("//input[@value='Submit New Record']").click() self.fill_textbox(textbox_name="DEMOART_REP", text="Test-Ref-001\nTest-Ref-002") self.fill_textbox(textbox_name="DEMOART_TITLE", text="Test article document title") self.fill_textbox(textbox_name="DEMOART_AU", text="Author1, Firstname1\nAuthor2, Firstname2") self.fill_textbox(textbox_name="DEMOART_ABS", text="This is a test abstract.\nIt has some more lines.\n\n...and some empty lines.\n\nAnd it finishes here.") self.fill_textbox(textbox_name="DEMOART_NUMP", text="1234") self.choose_selectbox_option_by_label(selectbox_name="DEMOART_LANG", label="French") self.fill_textbox(textbox_name="DEMOART_DATE", text="11/01/2001") self.fill_textbox(textbox_name="DEMOART_KW", text="test keyword1\ntest keyword2\ntest keyword3") self.fill_textbox(textbox_name="DEMOART_NOTE", text="I don't think I have any additional comments.\nBut maybe I'll input some quotes here: \" ' ` and the rest.") self.fill_textbox(textbox_name="DEMOART_FILE", text="/opt/invenio/lib/webtest/invenio/test.pdf") self.find_element_by_name_with_timeout("endS") self.browser.find_element_by_name("endS").click() self.page_source_test(expected_text=['Submission Complete!', \ 'Your document has the following reference(s): DEMO-ARTICLE-']) self.logout() def test_submit_book(self): """websubmit - web test submit a book""" - + self.browser.get(CFG_SITE_SECURE_URL) # login as jekyll self.login( username="jekyll", password="j123ekyll") self.find_element_by_link_text_with_timeout("Submit") self.browser.find_element_by_link_text("Submit").click() self.find_element_by_link_text_with_timeout("Demo Book Submission (Refereed)") self.browser.find_element_by_link_text("Demo Book Submission (Refereed)").click() self.find_element_by_xpath_with_timeout("//input[@value='Submit New Record']") self.browser.find_element_by_xpath("//input[@value='Submit New Record']").click() self.fill_textbox(textbox_name="DEMOBOO_REP", text="test-bk-ref-1\ntest-bk-ref-2") self.fill_textbox(textbox_name="DEMOBOO_TITLE", text="Test book title") self.fill_textbox(textbox_name="DEMOBOO_AU", text="Doe, John") self.fill_textbox(textbox_name="DEMOBOO_ABS", text="This is a test abstract of this test book record.") self.fill_textbox(textbox_name="DEMOBOO_NUMP", text="20") self.choose_selectbox_option_by_label(selectbox_name="DEMOBOO_LANG", label="English") self.fill_textbox(textbox_name="DEMOBOO_DATE", text="10/01/2001") self.fill_textbox(textbox_name="DEMOBOO_KW", text="test keyword 1\ntest keyword 2") self.fill_textbox(textbox_name="DEMOBOO_NOTE", text="No additional notes.") self.find_element_by_name_with_timeout("endS") self.browser.find_element_by_name("endS").click() self.page_source_test(expected_text=['Submission Complete!', \ 'Your document has the following reference(s): DEMO-BOOK-', \ 'An email has been sent to the referee.']) self.logout() def test_submit_book_approval(self): """websubmit - web test submit a book approval""" import time year = time.localtime().tm_year self.browser.get(CFG_SITE_SECURE_URL) # login as hyde self.login(username="hyde", password="h123yde") self.browser.get(CFG_SITE_SECURE_URL + "/yourapprovals.py") self.page_source_test(expected_text='You are not authorized to use approval system.') self.browser.get(CFG_SITE_SECURE_URL + "/publiline.py?doctype=DEMOBOO") self.browser.find_element_by_link_text("DEMO-BOOK-%s-001" % str(year)).click() self.page_source_test(unexpected_text='As a referee for this document, you may click this button to approve or reject it') self.logout() # login as dorian self.login(username="dorian", password="d123orian") self.find_element_by_link_text_with_timeout("your approvals") self.browser.find_element_by_link_text("your approvals").click() self.page_source_test(expected_text='You are a general referee') self.find_element_by_link_text_with_timeout("You are a general referee") self.browser.find_element_by_link_text("You are a general referee").click() self.page_source_test(expected_text='DEMO-BOOK-') self.browser.find_element_by_link_text("DEMO-BOOK-%s-001" % str(year)).click() self.page_source_test(expected_text=['Approval and Refereeing Workflow', \ 'The record you are trying to access', \ 'It is currently restricted for security reasons']) self.logout() def test_submit_journal(self): """websubmit - web test submit a journal""" self.browser.get(CFG_SITE_SECURE_URL + "/submit?doctype=DEMOJRN") # login as jekyll self.login(username="jekyll", password="j123ekyll") self.browser.get(CFG_SITE_SECURE_URL + "/submit?doctype=DEMOJRN") self.page_source_test(unexpected_text='Arts') self.browser.get(CFG_SITE_SECURE_URL) self.find_element_by_link_text_with_timeout("Submit") self.browser.find_element_by_link_text("Submit").click() self.page_source_test(unexpected_text='Demo Journal Submission') self.logout() # login as romeo self.login(username="romeo", password="r123omeo") self.find_element_by_link_text_with_timeout("Submit") self.browser.find_element_by_link_text("Submit").click() self.find_element_by_link_text_with_timeout("Demo Journal Submission") self.browser.find_element_by_link_text("Demo Journal Submission").click() self.find_element_by_id_with_timeout("comboARTS") self.browser.find_element_by_id("comboARTS").click() self.find_element_by_xpath_with_timeout("//input[@value='Submit New Record']") self.browser.find_element_by_xpath("//input[@value='Submit New Record']").click() self.choose_selectbox_option_by_label(selectbox_name="DEMOJRN_TYPE", label="Offline") self.fill_textbox(textbox_name="DEMOJRN_ORDER1", text="1") self.fill_textbox(textbox_name="DEMOJRN_ORDER2", text="1") self.fill_textbox(textbox_name="DEMOJRN_AU", text="Author1, Firstname1\nAuthor2, Firstname2") self.fill_textbox(textbox_name="DEMOJRN_TITLEE", text="This is a test title") self.fill_textbox(textbox_name="DEMOJRN_TITLEF", text="Ceci est un titre test") self.find_element_by_name_with_timeout("endS") self.browser.find_element_by_name("endS").click() self.page_source_test(expected_text=['Submission Complete!', \ 'Your document has the following reference(s): BUL-ARTS-']) self.logout() def test_submit_poetry(self): """websubmit - web test submit a poem""" self.browser.get(CFG_SITE_SECURE_URL) # login as jekyll self.login(username="jekyll", password="j123ekyll") self.find_element_by_link_text_with_timeout("Submit") self.browser.find_element_by_link_text("Submit").click() self.find_element_by_link_text_with_timeout("Demo Poetry Submission") self.browser.find_element_by_link_text("Demo Poetry Submission").click() self.find_element_by_xpath_with_timeout("//input[@value='Submit New Record']") self.browser.find_element_by_xpath("//input[@value='Submit New Record']").click() self.fill_textbox(textbox_name="DEMOPOE_TITLE", text="A test poem") self.fill_textbox(textbox_name="DEMOPOE_AU", text="Doe, John") self.choose_selectbox_option_by_label(selectbox_name="DEMOPOE_LANG", label="Slovak") self.fill_textbox(textbox_name="DEMOPOE_YEAR", text="1234") self.find_element_by_xpath_with_timeout("//strong/font") self.browser.find_element_by_xpath("//strong/font").click() self.fill_textbox(textbox_name="DEMOPOE_ABS", text=u"This is a test poem
    \na test poem indeed
    \nwith some accented characters
    \n
    \nΕλληνικά
    \n日本語
    \nEspañol") self.find_element_by_name_with_timeout("endS") self.browser.find_element_by_name("endS").click() self.page_source_test(expected_text=['Submission Complete!', \ 'Your document has the following reference(s): DEMO-POETRY-']) self.logout() def test_submit_tar_gz(self): """websubmit - web test submit an article with a tar.gz file """ self.browser.get(CFG_SITE_SECURE_URL) # login as jekyll self.login(username="jekyll", password="j123ekyll") self.find_element_by_link_text_with_timeout("Submit") self.browser.find_element_by_link_text("Submit").click() self.find_element_by_link_text_with_timeout("Demo Article Submission") self.browser.find_element_by_link_text("Demo Article Submission").click() self.find_element_by_id_with_timeout("comboARTICLE") self.browser.find_element_by_id("comboARTICLE").click() self.find_element_by_xpath_with_timeout("//input[@value='Submit New Record']") self.browser.find_element_by_xpath("//input[@value='Submit New Record']").click() self.fill_textbox(textbox_name="DEMOART_REP", text="Test-Ref-001\nTest-Ref-002") self.fill_textbox(textbox_name="DEMOART_TITLE", text="Test article tar gz document title") self.fill_textbox(textbox_name="DEMOART_AU", text="Author1, Firstname1\nAuthor2, Firstname2") self.fill_textbox(textbox_name="DEMOART_ABS", text="This is a test abstract.\nIt has some more lines.\n\n...and some empty lines.\n\nAnd it finishes here.") self.fill_textbox(textbox_name="DEMOART_NUMP", text="1234") self.choose_selectbox_option_by_label(selectbox_name="DEMOART_LANG", label="French") self.fill_textbox(textbox_name="DEMOART_DATE", text="11/01/2001") self.fill_textbox(textbox_name="DEMOART_KW", text="test keyword1\ntest keyword2\ntest keyword3") self.fill_textbox(textbox_name="DEMOART_NOTE", text="I don't think I have any additional comments.\nBut maybe I'll input some quotes here: \" ' ` and the rest.") self.fill_textbox(textbox_name="DEMOART_FILE", text="/opt/invenio/lib/webtest/invenio/test.tar.gz") self.find_element_by_name_with_timeout("endS") self.browser.find_element_by_name("endS").click() self.page_source_test(expected_text=['Submission Complete!', \ 'Your document has the following reference(s): DEMO-ARTICLE-']) self.logout() def test_submit_article_guest(self): """websubmit - web test submit an article as a guest""" self.browser.get(CFG_SITE_SECURE_URL) self.find_element_by_link_text_with_timeout("Submit") self.browser.find_element_by_link_text("Submit").click() self.find_element_by_link_text_with_timeout("Demo Article Submission") self.browser.find_element_by_link_text("Demo Article Submission").click() self.find_element_by_xpath_with_timeout("//input[@value='Submit New Record']") self.browser.find_element_by_xpath("//input[@value='Submit New Record']").click() self.fill_textbox(textbox_name="DEMOART_REP", text="Test-Ref-001\nTest-Ref-002") self.fill_textbox(textbox_name="DEMOART_TITLE", text="Test article document title") self.fill_textbox(textbox_name="DEMOART_AU", text="Author1, Firstname1\nAuthor2, Firstname2") self.fill_textbox(textbox_name="DEMOART_ABS", text="This is a test abstract.\nIt has some more lines.\n\n...and some empty lines.\n\nAnd it finishes here.") self.fill_textbox(textbox_name="DEMOART_NUMP", text="1234") self.choose_selectbox_option_by_label(selectbox_name="DEMOART_LANG", label="French") self.fill_textbox(textbox_name="DEMOART_DATE", text="11/01/2001") self.fill_textbox(textbox_name="DEMOART_KW", text="test keyword1\ntest keyword2\ntest keyword3") self.fill_textbox(textbox_name="DEMOART_NOTE", text="I don't think I have any additional comments.\nBut maybe I'll input some quotes here: \" ' ` and the rest.") self.fill_textbox(textbox_name="DEMOART_FILE", text="/opt/invenio/lib/webtest/invenio/test.pdf") self.find_element_by_name_with_timeout("endS") self.browser.find_element_by_name("endS").click() self.page_source_test(expected_text=['Submission Complete!', \ 'Your document has the following reference(s): DEMO-ARTICLE-']) TEST_SUITE = make_test_suite(InvenioWebSubmitWebTest, ) if __name__ == '__main__': run_test_suite(TEST_SUITE, warn_user=True) diff --git a/modules/websubmit/lib/websubmit_webinterface.py b/modules/websubmit/lib/websubmit_webinterface.py index ce41d2977..361339eb9 100644 --- a/modules/websubmit/lib/websubmit_webinterface.py +++ b/modules/websubmit/lib/websubmit_webinterface.py @@ -1,1464 +1,988 @@ ## This file is part of Invenio. ## Copyright (C) 2006, 2007, 2008, 2009, 2010, 2011, 2012 CERN. ## ## Invenio is free software; you can redistribute it and/or ## modify it under the terms of the GNU General Public License as ## published by the Free Software Foundation; either version 2 of the ## License, or (at your option) any later version. ## ## Invenio is distributed in the hope that it will be useful, but ## WITHOUT ANY WARRANTY; without even the implied warranty of ## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ## General Public License for more details. ## ## You should have received a copy of the GNU General Public License ## along with Invenio; if not, write to the Free Software Foundation, Inc., ## 59 Temple Place, Suite 330, Boston, MA 02111-1307, USA. __lastupdated__ = """$Date$""" __revision__ = "$Id$" import os import errno import time import cgi import sys import shutil from urllib import urlencode from invenio.config import \ CFG_ACCESS_CONTROL_LEVEL_SITE, \ CFG_SITE_LANG, \ CFG_SITE_NAME, \ - CFG_TMPSHAREDDIR, \ CFG_SITE_NAME_INTL, \ CFG_SITE_URL, \ CFG_SITE_SECURE_URL, \ CFG_WEBSUBMIT_STORAGEDIR, \ CFG_PREFIX, \ - CFG_CERN_SITE, \ - CFG_SITE_RECORD + CFG_CERN_SITE from invenio import webinterface_handler_config as apache from invenio.dbquery import run_sql -from invenio.access_control_config import VIEWRESTRCOLL -from invenio.access_control_mailcookie import mail_cookie_create_authorize_action from invenio.access_control_engine import acc_authorize_action from invenio.access_control_admin import acc_is_role -from invenio.webpage import page, create_error_box, pageheaderonly, \ - pagefooteronly -from invenio.webuser import getUid, page_not_authorized, collect_user_info, isUserSuperAdmin, \ +from invenio.webpage import page, create_error_box +from invenio.webuser import getUid, page_not_authorized, collect_user_info, \ isGuestUser -from invenio.websubmit_config import * -from invenio import webjournal_utils from invenio.webinterface_handler import wash_urlargd, WebInterfaceDirectory from invenio.urlutils import make_canonical_urlargd, redirect_to_url from invenio.messages import gettext_set_language -from invenio.search_engine import \ - guess_primary_collection_of_a_record, get_colID, record_exists, \ - create_navtrail_links, check_user_can_view_record, record_empty, \ - is_user_owner_of_record -from invenio.bibdocfile import BibRecDocs, normalize_format, file_strip_ext, \ - stream_restricted_icon, BibDoc, InvenioWebSubmitFileError, stream_file, \ - decompose_file, propose_next_docname, get_subformat_from_format +from invenio.bibdocfile import stream_file, \ + decompose_file, propose_next_docname from invenio.errorlib import register_exception from invenio.htmlutils import is_html_text_editor_installed from invenio.websubmit_icon_creator import create_icon, InvenioWebSubmitIconCreatorError -from ckeditor_invenio_connector import process_CKEditor_upload, send_response +from invenio.ckeditor_invenio_connector import process_CKEditor_upload, send_response import invenio.template websubmit_templates = invenio.template.load('websubmit') -from invenio.websearchadminlib import get_detailed_page_tabs from invenio.session import get_session from invenio.jsonutils import json, CFG_JSON_AVAILABLE import invenio.template webstyle_templates = invenio.template.load('webstyle') websearch_templates = invenio.template.load('websearch') -from invenio.websubmit_managedocfiles import \ - create_file_upload_interface, \ - get_upload_file_interface_javascript, \ - get_upload_file_interface_css, \ - move_uploaded_files_to_storage - - -class WebInterfaceFilesPages(WebInterfaceDirectory): - - def __init__(self,recid): - self.recid = recid - - def _lookup(self, component, path): - # after ///files/ every part is used as the file - # name - filename = component - - def getfile(req, form): - args = wash_urlargd(form, websubmit_templates.files_default_urlargd) - ln = args['ln'] - - _ = gettext_set_language(ln) - - uid = getUid(req) - user_info = collect_user_info(req) - - verbose = args['verbose'] - if verbose >= 1 and not isUserSuperAdmin(user_info): - # Only SuperUser can see all the details! - verbose = 0 - - if uid == -1 or CFG_ACCESS_CONTROL_LEVEL_SITE > 1: - return page_not_authorized(req, "/%s/%s" % (CFG_SITE_RECORD, self.recid), - navmenuid='submit') - - if record_exists(self.recid) < 1: - msg = "

    %s

    " % _("Requested record does not seem to exist.") - return warningMsg(msg, req, CFG_SITE_NAME, ln) - - if record_empty(self.recid): - msg = "

    %s

    " % _("Requested record does not seem to have been integrated.") - return warningMsg(msg, req, CFG_SITE_NAME, ln) - - (auth_code, auth_message) = check_user_can_view_record(user_info, self.recid) - if auth_code and user_info['email'] == 'guest': - if webjournal_utils.is_recid_in_released_issue(self.recid): - # We can serve the file - pass - else: - cookie = mail_cookie_create_authorize_action(VIEWRESTRCOLL, {'collection' : guess_primary_collection_of_a_record(self.recid)}) - target = '/youraccount/login' + \ - make_canonical_urlargd({'action': cookie, 'ln' : ln, 'referer' : \ - CFG_SITE_SECURE_URL + user_info['uri']}, {}) - return redirect_to_url(req, target, norobot=True) - elif auth_code: - if webjournal_utils.is_recid_in_released_issue(self.recid): - # We can serve the file - pass - else: - return page_not_authorized(req, "../", \ - text = auth_message) - - - readonly = CFG_ACCESS_CONTROL_LEVEL_SITE == 1 - - # From now on: either the user provided a specific file - # name (and a possible version), or we return a list of - # all the available files. In no case are the docids - # visible. - try: - bibarchive = BibRecDocs(self.recid) - except InvenioWebSubmitFileError, e: - register_exception(req=req, alert_admin=True) - msg = "

    %s

    %s

    " % ( - _("The system has encountered an error in retrieving the list of files for this document."), - _("The error has been logged and will be taken in consideration as soon as possible.")) - return warningMsg(msg, req, CFG_SITE_NAME, ln) - - if bibarchive.deleted_p(): - return print_warning(req, _("Requested record does not seem to exist.")) - - docname = '' - format = '' - version = '' - warn = '' - - if filename: - # We know the complete file name, guess which docid it - # refers to - ## TODO: Change the extension system according to ext.py from setlink - ## and have a uniform extension mechanism... - docname = file_strip_ext(filename) - format = filename[len(docname):] - if format and format[0] != '.': - format = '.' + format - if args['subformat']: - format += ';%s' % args['subformat'] - else: - docname = args['docname'] - - if not format: - format = args['format'] - if args['subformat']: - format += ';%s' % args['subformat'] - - if not version: - version = args['version'] - - ## Download as attachment - is_download = False - if args['download']: - is_download = True - - # version could be either empty, or all or an integer - try: - int(version) - except ValueError: - if version != 'all': - version = '' - - display_hidden = isUserSuperAdmin(user_info) - - if version != 'all': - # search this filename in the complete list of files - for doc in bibarchive.list_bibdocs(): - if docname == doc.get_docname(): - try: - docfile = doc.get_file(format, version) - (auth_code, auth_message) = docfile.is_restricted(user_info) - if auth_code != 0 and not is_user_owner_of_record(user_info, self.recid): - if CFG_WEBSUBMIT_ICON_SUBFORMAT_RE.match(get_subformat_from_format(format)): - return stream_restricted_icon(req) - if user_info['email'] == 'guest': - cookie = mail_cookie_create_authorize_action('viewrestrdoc', {'status' : docfile.get_status()}) - target = '/youraccount/login' + \ - make_canonical_urlargd({'action': cookie, 'ln' : ln, 'referer' : \ - CFG_SITE_SECURE_URL + user_info['uri']}, {}) - redirect_to_url(req, target) - else: - req.status = apache.HTTP_UNAUTHORIZED - warn += print_warning(_("This file is restricted: ") + auth_message) - break - - if not docfile.hidden_p(): - if not readonly: - ip = str(req.remote_ip) - res = doc.register_download(ip, version, format, uid) - try: - return docfile.stream(req, download=is_download) - except InvenioWebSubmitFileError, msg: - register_exception(req=req, alert_admin=True) - req.status = apache.HTTP_INTERNAL_SERVER_ERROR - return warningMsg(_("An error has happened in trying to stream the request file."), req, CFG_SITE_NAME, ln) - else: - req.status = apache.HTTP_UNAUTHORIZED - warn = print_warning(_("The requested file is hidden and can not be accessed.")) - - except InvenioWebSubmitFileError, msg: - register_exception(req=req, alert_admin=True) - - if docname and format and not warn: - req.status = apache.HTTP_NOT_FOUND - warn += print_warning(_("Requested file does not seem to exist.")) - filelist = bibarchive.display("", version, ln=ln, verbose=verbose, display_hidden=display_hidden) - - t = warn + websubmit_templates.tmpl_filelist( - ln=ln, - recid=self.recid, - docname=args['docname'], - version=version, - filelist=filelist) - - cc = guess_primary_collection_of_a_record(self.recid) - unordered_tabs = get_detailed_page_tabs(get_colID(cc), self.recid, ln) - ordered_tabs_id = [(tab_id, values['order']) for (tab_id, values) in unordered_tabs.iteritems()] - ordered_tabs_id.sort(lambda x,y: cmp(x[1],y[1])) - link_ln = '' - if ln != CFG_SITE_LANG: - link_ln = '?ln=%s' % ln - tabs = [(unordered_tabs[tab_id]['label'], \ - '%s/%s/%s/%s%s' % (CFG_SITE_URL, CFG_SITE_RECORD, self.recid, tab_id, link_ln), \ - tab_id == 'files', - unordered_tabs[tab_id]['enabled']) \ - for (tab_id, order) in ordered_tabs_id - if unordered_tabs[tab_id]['visible'] == True] - top = webstyle_templates.detailed_record_container_top(self.recid, - tabs, - args['ln']) - bottom = webstyle_templates.detailed_record_container_bottom(self.recid, - tabs, - args['ln']) - title, description, keywords = websearch_templates.tmpl_record_page_header_content(req, self.recid, args['ln']) - return pageheaderonly(title=title, - navtrail=create_navtrail_links(cc=cc, aas=0, ln=ln) + \ - ''' > %s - > %s''' % \ - (CFG_SITE_URL, CFG_SITE_RECORD, self.recid, title, _("Access to Fulltext")), - - description="", - keywords="keywords", - uid=uid, - language=ln, - req=req, - navmenuid='search', - navtrail_append_title_p=0) + \ - websearch_templates.tmpl_search_pagestart(ln) + \ - top + t + bottom + \ - websearch_templates.tmpl_search_pageend(ln) + \ - pagefooteronly(lastupdated=__lastupdated__, language=ln, req=req) - return getfile, [] - - def __call__(self, req, form): - """Called in case of URLs like /CFG_SITE_RECORD/123/files without - trailing slash. - """ - args = wash_urlargd(form, websubmit_templates.files_default_urlargd) - ln = args['ln'] - link_ln = '' - if ln != CFG_SITE_LANG: - link_ln = '?ln=%s' % ln - - return redirect_to_url(req, '%s/%s/%s/files/%s' % (CFG_SITE_URL, CFG_SITE_RECORD, self.recid, link_ln)) - -def websubmit_legacy_getfile(req, form): - """ Handle legacy /getfile.py URLs """ - - args = wash_urlargd(form, { - 'recid': (int, 0), - 'docid': (int, 0), - 'version': (str, ''), - 'name': (str, ''), - 'format': (str, ''), - 'ln' : (str, CFG_SITE_LANG) - }) - - _ = gettext_set_language(args['ln']) - - def _getfile_py(req, recid=0, docid=0, version="", name="", format="", ln=CFG_SITE_LANG): - if not recid: - ## Let's obtain the recid from the docid - if docid: - try: - bibdoc = BibDoc(docid=docid) - recid = bibdoc.get_recid() - except InvenioWebSubmitFileError, e: - return warningMsg(_("An error has happened in trying to retrieve the requested file."), req, CFG_SITE_NAME, ln) - else: - return warningMsg(_('Not enough information to retrieve the document'), req, CFG_SITE_NAME, ln) - else: - if not name and docid: - ## Let's obtain the name from the docid - try: - bibdoc = BibDoc(docid) - name = bibdoc.get_docname() - except InvenioWebSubmitFileError, e: - return warningMsg(_("An error has happened in trying to retrieving the requested file."), req, CFG_SITE_NAME, ln) - - format = normalize_format(format) - - redirect_to_url(req, '%s/%s/%s/files/%s%s?ln=%s%s' % (CFG_SITE_URL, CFG_SITE_RECORD, recid, name, format, ln, version and 'version=%s' % version or ''), apache.HTTP_MOVED_PERMANENTLY) - - return _getfile_py(req, **args) - - -# -------------------------------------------------- - from invenio.websubmit_engine import home, action, interface, endaction, makeCataloguesTable class WebInterfaceSubmitPages(WebInterfaceDirectory): _exports = ['summary', 'sub', 'direct', '', 'attachfile', 'uploadfile', \ - 'getuploadedfile', 'managedocfiles', 'managedocfilesasync', \ - 'upload_video', ('continue', 'continue_')] - - def managedocfiles(self, req, form): - """ - Display admin interface to manage files of a record - """ - argd = wash_urlargd(form, { - 'ln': (str, ''), - 'access': (str, ''), - 'recid': (int, None), - 'do': (int, 0), - 'cancel': (str, None), - }) - - _ = gettext_set_language(argd['ln']) - uid = getUid(req) - user_info = collect_user_info(req) - # Check authorization - (auth_code, auth_msg) = acc_authorize_action(req, - 'runbibdocfile') - if auth_code and user_info['email'] == 'guest': - # Ask to login - target = '/youraccount/login' + \ - make_canonical_urlargd({'ln' : argd['ln'], - 'referer' : CFG_SITE_SECURE_URL + user_info['uri']}, {}) - return redirect_to_url(req, target) - elif auth_code: - return page_not_authorized(req, referer="/submit/managedocfiles", - uid=uid, text=auth_msg, - ln=argd['ln'], - navmenuid="admin") - - # Prepare navtrail - navtrail = '''Admin Area > %(manage_files)s''' \ - % {'CFG_SITE_URL': CFG_SITE_URL, - 'manage_files': _("Manage Document Files")} - - body = '' - if argd['do'] != 0 and not argd['cancel']: - # Apply modifications - working_dir = os.path.join(CFG_TMPSHAREDDIR, - 'websubmit_upload_interface_config_' + str(uid), - argd['access']) - move_uploaded_files_to_storage(working_dir=working_dir, - recid=argd['recid'], - icon_sizes=['180>','700>'], - create_icon_doctypes=['*'], - force_file_revision=False) - # Clean temporary directory - shutil.rmtree(working_dir) - - # Confirm modifications - body += '

    %s

    ' % \ - (_('Your modifications to record #%i have been submitted') % argd['recid']) - elif argd['cancel']: - # Clean temporary directory - working_dir = os.path.join(CFG_TMPSHAREDDIR, - 'websubmit_upload_interface_config_' + str(uid), - argd['access']) - shutil.rmtree(working_dir) - body += '

    %s

    ' % \ - (_('Your modifications to record #%i have been cancelled') % argd['recid']) - - if not argd['recid'] or argd['do'] != 0: - body += ''' -
    - - - -
    - ''' % {'edit': _('Edit'), - 'edit_record': _('Edit record'), - 'CFG_SITE_URL': CFG_SITE_URL} - - access = time.strftime('%Y%m%d_%H%M%S') - if argd['recid'] and argd['do'] == 0: - # Displaying interface to manage files - # Prepare navtrail - title, description, keywords = websearch_templates.tmpl_record_page_header_content(req, argd['recid'], - argd['ln']) - navtrail = '''Admin Area > - %(manage_files)s > - %(record)s: %(title)s - ''' \ - % {'CFG_SITE_URL': CFG_SITE_URL, - 'title': title, - 'manage_files': _("Document File Manager"), - 'record': _("Record #%i") % argd['recid']} - - body += create_file_upload_interface(\ - recid=argd['recid'], - ln=argd['ln'], - uid=uid, - sbm_access=access, - display_hidden_files=True, - restrictions_and_desc=CFG_WEBSUBMIT_DOCUMENT_FILE_MANAGER_RESTRICTIONS, - doctypes_and_desc=CFG_WEBSUBMIT_DOCUMENT_FILE_MANAGER_DOCTYPES, - **CFG_WEBSUBMIT_DOCUMENT_FILE_MANAGER_MISC)[1] - - body += '''
    -
    - - - - -
    - - -
    ''' % \ - {'apply_changes': _("Apply changes"), - 'cancel_changes': _("Cancel all changes"), - 'recid': argd['recid'], - 'access': access, - 'ln': argd['ln'], - 'CFG_SITE_URL': CFG_SITE_URL} - - body += websubmit_templates.tmpl_page_do_not_leave_submission_js(argd['ln'], enabled=True) - - return page(title = _("Document File Manager") + (argd['recid'] and (': ' + _("Record #%i") % argd['recid']) or ''), - navtrail=navtrail, - navtrail_append_title_p=0, - metaheaderadd = get_upload_file_interface_javascript(form_url_params='?access='+access) + \ - get_upload_file_interface_css(), - body = body, - uid = uid, - language=argd['ln'], - req=req, - navmenuid='admin') - - def managedocfilesasync(self, req, form): - "Upload file and returns upload interface" - - argd = wash_urlargd(form, { - 'ln': (str, ''), - 'recid': (int, 1), - 'doctype': (str, ''), - 'access': (str, ''), - 'indir': (str, ''), - }) - - user_info = collect_user_info(req) - include_headers = False - # User submitted either through WebSubmit, or admin interface. - if form.has_key('doctype') and form.has_key('indir') \ - and form.has_key('access'): - # Submitted through WebSubmit. Check rights - include_headers = True - working_dir = os.path.join(CFG_WEBSUBMIT_STORAGEDIR, - argd['indir'], argd['doctype'], - argd['access']) - try: - assert(working_dir == os.path.abspath(working_dir)) - except AssertionError: - raise apache.SERVER_RETURN(apache.HTTP_UNAUTHORIZED) - try: - # Retrieve recid from working_dir, safer. - recid_fd = file(os.path.join(working_dir, 'SN')) - recid = int(recid_fd.read()) - recid_fd.close() - except: - recid = "" - try: - act_fd = file(os.path.join(working_dir, 'act')) - action = act_fd.read() - act_fd.close() - except: - action = "" - - # Is user authorized to perform this action? - (auth_code, auth_msg) = acc_authorize_action(user_info, - "submit", - authorized_if_no_roles=not isGuestUser(getUid(req)), - doctype=argd['doctype'], - act=action) - if not acc_is_role("submit", doctype=argd['doctype'], act=action): - # There is NO authorization plugged. User should have access - auth_code = 0 - else: - # User must be allowed to attach files - (auth_code, auth_msg) = acc_authorize_action(user_info, - 'runbibdocfile') - recid = argd['recid'] - - if auth_code: - raise apache.SERVER_RETURN(apache.HTTP_UNAUTHORIZED) - - return create_file_upload_interface(recid=recid, - ln=argd['ln'], - print_outside_form_tag=False, - print_envelope=False, - form=form, - include_headers=include_headers, - sbm_indir=argd['indir'], - sbm_access=argd['access'], - sbm_doctype=argd['doctype'], - uid=user_info['uid'])[1] + 'getuploadedfile', 'upload_video', ('continue', 'continue_')] def uploadfile(self, req, form): """ Similar to /submit, but only consider files. Nice for asynchronous Javascript uploads. Should be used to upload a single file. Also try to create an icon, and return URL to file(s) + icon(s) Authentication is performed based on session ID passed as parameter instead of cookie-based authentication, due to the use of this URL by the Flash plugin (to upload multiple files at once), which does not route cookies. FIXME: consider adding /deletefile and /modifyfile functions + parsing of additional parameters to rename files, add comments, restrictions, etc. """ argd = wash_urlargd(form, { 'doctype': (str, ''), 'access': (str, ''), 'indir': (str, ''), 'session_id': (str, ''), 'rename': (str, ''), }) curdir = None if not form.has_key("indir") or \ not form.has_key("doctype") or \ not form.has_key("access"): raise apache.SERVER_RETURN(apache.HTTP_BAD_REQUEST) else: curdir = os.path.join(CFG_WEBSUBMIT_STORAGEDIR, argd['indir'], argd['doctype'], argd['access']) user_info = collect_user_info(req) if form.has_key("session_id"): # Are we uploading using Flash, which does not transmit # cookie? The expect to receive session_id as a form # parameter. First check that IP addresses do not # mismatch. A ValueError will be raises if there is # something wrong session = get_session(req=req, sid=argd['session_id']) try: session = get_session(req=req, sid=argd['session_id']) except ValueError, e: raise apache.SERVER_RETURN(apache.HTTP_BAD_REQUEST) # Retrieve user information. We cannot rely on the session here. res = run_sql("SELECT uid FROM session WHERE session_key=%s", (argd['session_id'],)) if len(res): uid = res[0][0] user_info = collect_user_info(uid) try: act_fd = file(os.path.join(curdir, 'act')) action = act_fd.read() act_fd.close() except: action = "" # Is user authorized to perform this action? (auth_code, auth_message) = acc_authorize_action(uid, "submit", authorized_if_no_roles=not isGuestUser(uid), verbose=0, doctype=argd['doctype'], act=action) if acc_is_role("submit", doctype=argd['doctype'], act=action) and auth_code != 0: # User cannot submit raise apache.SERVER_RETURN(apache.HTTP_UNAUTHORIZED) else: # Process the upload and get the response added_files = {} for key, formfields in form.items(): filename = key.replace("[]", "") file_to_open = os.path.join(curdir, filename) if hasattr(formfields, "filename") and formfields.filename: dir_to_open = os.path.abspath(os.path.join(curdir, 'files', str(user_info['uid']), key)) try: assert(dir_to_open.startswith(CFG_WEBSUBMIT_STORAGEDIR)) except AssertionError: register_exception(req=req, prefix='curdir="%s", key="%s"' % (curdir, key)) raise apache.SERVER_RETURN(apache.HTTP_FORBIDDEN) if not os.path.exists(dir_to_open): try: os.makedirs(dir_to_open) except OSError, e: if e.errno != errno.EEXIST: # If the issue is only that directory # already exists, then continue, else # report register_exception(req=req, alert_admin=True) raise apache.SERVER_RETURN(apache.HTTP_FORBIDDEN) filename = formfields.filename ## Before saving the file to disc, wash the filename (in particular ## washing away UNIX and Windows (e.g. DFS) paths): filename = os.path.basename(filename.split('\\')[-1]) filename = filename.strip() if filename != "": # Check that file does not already exist n = 1 while os.path.exists(os.path.join(dir_to_open, filename)): #dirname, basename, extension = decompose_file(new_destination_path) basedir, name, extension = decompose_file(filename) new_name = propose_next_docname(name) filename = new_name + extension # This may be dangerous if the file size is bigger than the available memory fp = open(os.path.join(dir_to_open, filename), "w") fp.write(formfields.file.read()) fp.close() fp = open(os.path.join(curdir, "lastuploadedfile"), "w") fp.write(filename) fp.close() fp = open(file_to_open, "w") fp.write(filename) fp.close() try: # Create icon (icon_path, icon_name) = create_icon( { 'input-file' : os.path.join(dir_to_open, filename), 'icon-name' : filename, # extension stripped automatically 'icon-file-format' : 'gif', 'multipage-icon' : False, 'multipage-icon-delay' : 100, 'icon-scale' : "300>", # Resize only if width > 300 'verbosity' : 0, }) icons_dir = os.path.join(os.path.join(curdir, 'icons', str(user_info['uid']), key)) if not os.path.exists(icons_dir): # Create uid/icons dir if needed try: os.makedirs(icons_dir) except OSError, e: if e.errno != errno.EEXIST: # If the issue is only that # directory already exists, # then continue, else report register_exception(req=req, alert_admin=True) raise apache.SERVER_RETURN(apache.HTTP_FORBIDDEN) os.rename(os.path.join(icon_path, icon_name), os.path.join(icons_dir, icon_name)) added_files[key] = {'name': filename, 'iconName': icon_name} except InvenioWebSubmitIconCreatorError, e: # We could not create the icon added_files[key] = {'name': filename} continue else: raise apache.SERVER_RETURN(apache.HTTP_BAD_REQUEST) # Send our response if CFG_JSON_AVAILABLE: return json.dumps(added_files) def upload_video(self, req, form): """ A clone of uploadfile but for (large) videos. Does not copy the uploaded file to the websubmit directory. Instead, the path to the file is stored inside the submission directory. """ - def gcd(a,b): + def gcd(a, b): """ the euclidean algorithm """ while a: - a, b = b%a, a + a, b = b % a, a return b from invenio.bibencode_extract import extract_frames from invenio.bibencode_config import CFG_BIBENCODE_WEBSUBMIT_ASPECT_SAMPLE_DIR, CFG_BIBENCODE_WEBSUBMIT_ASPECT_SAMPLE_FNAME from invenio.bibencode_encode import determine_aspect from invenio.bibencode_utils import probe from invenio.bibencode_metadata import ffprobe_metadata from invenio.websubmit_config import CFG_WEBSUBMIT_TMP_VIDEO_PREFIX argd = wash_urlargd(form, { 'doctype': (str, ''), 'access': (str, ''), 'indir': (str, ''), 'session_id': (str, ''), 'rename': (str, ''), }) curdir = None if not form.has_key("indir") or \ not form.has_key("doctype") or \ not form.has_key("access"): raise apache.SERVER_RETURN(apache.HTTP_BAD_REQUEST) else: curdir = os.path.join(CFG_WEBSUBMIT_STORAGEDIR, argd['indir'], argd['doctype'], argd['access']) user_info = collect_user_info(req) if form.has_key("session_id"): # Are we uploading using Flash, which does not transmit # cookie? The expect to receive session_id as a form # parameter. First check that IP addresses do not # mismatch. A ValueError will be raises if there is # something wrong session = get_session(req=req, sid=argd['session_id']) try: session = get_session(req=req, sid=argd['session_id']) except ValueError, e: raise apache.SERVER_RETURN(apache.HTTP_BAD_REQUEST) # Retrieve user information. We cannot rely on the session here. res = run_sql("SELECT uid FROM session WHERE session_key=%s", (argd['session_id'],)) if len(res): uid = res[0][0] user_info = collect_user_info(uid) try: act_fd = file(os.path.join(curdir, 'act')) action = act_fd.read() act_fd.close() except: act = "" # Is user authorized to perform this action? (auth_code, auth_message) = acc_authorize_action(uid, "submit", authorized_if_no_roles=not isGuestUser(uid), verbose=0, doctype=argd['doctype'], act=action) if acc_is_role("submit", doctype=argd['doctype'], act=action) and auth_code != 0: # User cannot submit raise apache.SERVER_RETURN(apache.HTTP_UNAUTHORIZED) else: # Process the upload and get the response json_response = {} for key, formfields in form.items(): filename = key.replace("[]", "") if hasattr(formfields, "filename") and formfields.filename: dir_to_open = os.path.abspath(os.path.join(curdir, 'files', str(user_info['uid']), key)) try: assert(dir_to_open.startswith(CFG_WEBSUBMIT_STORAGEDIR)) except AssertionError: register_exception(req=req, prefix='curdir="%s", key="%s"' % (curdir, key)) raise apache.SERVER_RETURN(apache.HTTP_FORBIDDEN) if not os.path.exists(dir_to_open): try: os.makedirs(dir_to_open) except OSError, e: if e.errno != errno.EEXIST: # If the issue is only that directory # already exists, then continue, else # report register_exception(req=req, alert_admin=True) raise apache.SERVER_RETURN(apache.HTTP_FORBIDDEN) filename = formfields.filename ## Before saving the file to disc, wash the filename (in particular ## washing away UNIX and Windows (e.g. DFS) paths): filename = os.path.basename(filename.split('\\')[-1]) filename = filename.strip() if filename != "": # Check that file does not already exist while os.path.exists(os.path.join(dir_to_open, filename)): #dirname, basename, extension = decompose_file(new_destination_path) basedir, name, extension = decompose_file(filename) new_name = propose_next_docname(name) filename = new_name + extension #-------------# # VIDEO STUFF # #-------------# ## Remove all previous uploads filelist = os.listdir(os.path.split(formfields.file.name)[0]) for afile in filelist: if argd['access'] in afile: os.remove(os.path.join(os.path.split(formfields.file.name)[0], afile)) ## Check if the file is a readable video ## We must exclude all image and audio formats that are readable by ffprobe if (os.path.splitext(filename)[1] in ['jpg', 'jpeg', 'gif', 'tiff', 'bmp', 'png', 'tga', 'jp2', 'j2k', 'jpf', 'jpm', 'mj2', 'biff', 'cgm', 'exif', 'img', 'mng', 'pic', 'pict', 'raw', 'wmf', 'jpe', 'jif', 'jfif', 'jfi', 'tif', 'webp', 'svg', 'ai', 'ps', 'psd', 'wav', 'mp3', 'pcm', 'aiff', 'au', 'flac', 'wma', 'm4a', 'wv', 'oga', 'm4a', 'm4b', 'm4p', 'm4r', 'aac', 'mp4', 'vox', 'amr', 'snd'] or not probe(formfields.file.name)): formfields.file.close() raise apache.SERVER_RETURN(apache.HTTP_FORBIDDEN) ## We have no "delete" attribute in Python 2.4 if sys.hexversion < 0x2050000: ## We need to rename first and create a dummy file ## Rename the temporary file for the garbage collector new_tmp_fullpath = os.path.split(formfields.file.name)[0] + "/" + CFG_WEBSUBMIT_TMP_VIDEO_PREFIX + argd['access'] + "_" + os.path.split(formfields.file.name)[1] os.rename(formfields.file.name, new_tmp_fullpath) dummy = open(formfields.file.name, "w") dummy.close() formfields.file.close() else: # Mark the NamedTemporatyFile as not to be deleted formfields.file.delete = False formfields.file.close() ## Rename the temporary file for the garbage collector new_tmp_fullpath = os.path.split(formfields.file.name)[0] + "/" + CFG_WEBSUBMIT_TMP_VIDEO_PREFIX + argd['access'] + "_" + os.path.split(formfields.file.name)[1] os.rename(formfields.file.name, new_tmp_fullpath) # Write the path to the temp file to a file in STORAGEDIR fp = open(os.path.join(dir_to_open, "filepath"), "w") fp.write(new_tmp_fullpath) fp.close() fp = open(os.path.join(dir_to_open, "filename"), "w") fp.write(filename) fp.close() ## We are going to extract some thumbnails for websubmit ## sample_dir = os.path.join(curdir, 'files', str(user_info['uid']), CFG_BIBENCODE_WEBSUBMIT_ASPECT_SAMPLE_DIR) try: ## Remove old thumbnails shutil.rmtree(sample_dir) except OSError: register_exception(req=req, alert_admin=False) try: os.makedirs(os.path.join(curdir, 'files', str(user_info['uid']), sample_dir)) except OSError: register_exception(req=req, alert_admin=False) try: extract_frames(input_file=new_tmp_fullpath, output_file=os.path.join(sample_dir, CFG_BIBENCODE_WEBSUBMIT_ASPECT_SAMPLE_FNAME), size="600x600", numberof=5) json_response['frames'] = [] for extracted_frame in os.listdir(sample_dir): json_response['frames'].append(extracted_frame) except: ## If the frame extraction fails, something was bad with the video os.remove(new_tmp_fullpath) register_exception(req=req, alert_admin=False) raise apache.SERVER_RETURN(apache.HTTP_FORBIDDEN) ## Try to detect the aspect. if this fails, the video is not readable ## or a wrong file might have been uploaded try: (aspect, width, height) = determine_aspect(new_tmp_fullpath) if aspect: aspx, aspy = aspect.split(':') else: the_gcd = gcd(width, height) aspx = str(width / the_gcd) aspy = str(height / the_gcd) json_response['aspx'] = aspx json_response['aspy'] = aspy except TypeError: ## If the aspect detection completely fails os.remove(new_tmp_fullpath) register_exception(req=req, alert_admin=False) raise apache.SERVER_RETURN(apache.HTTP_FORBIDDEN) ## Try to extract some metadata from the video container metadata = ffprobe_metadata(new_tmp_fullpath) json_response['meta_title'] = metadata['format'].get('TAG:title') json_response['meta_description'] = metadata['format'].get('TAG:description') json_response['meta_year'] = metadata['format'].get('TAG:year') json_response['meta_author'] = metadata['format'].get('TAG:author') ## Empty file name else: raise apache.SERVER_RETURN(apache.HTTP_BAD_REQUEST) ## We found our file, we can break the loop break; # Send our response if CFG_JSON_AVAILABLE: dumped_response = json.dumps(json_response) # store the response in the websubmit directory # this is needed if the submission is not finished and continued later response_dir = os.path.join(curdir, 'files', str(user_info['uid']), "response") try: os.makedirs(response_dir) except OSError: # register_exception(req=req, alert_admin=False) pass fp = open(os.path.join(response_dir, "response"), "w") fp.write(dumped_response) fp.close() return dumped_response def getuploadedfile(self, req, form): """ Stream uploaded files. For the moment, restrict to files in ./curdir/files/uid or ./curdir/icons/uid directory, so that we are sure we stream files only to the user who uploaded them. """ argd = wash_urlargd(form, {'indir': (str, None), 'doctype': (str, None), 'access': (str, None), 'icon': (int, 0), 'key': (str, None), 'filename': (str, None), 'nowait': (int, 0)}) if None in argd.values(): raise apache.SERVER_RETURN(apache.HTTP_BAD_REQUEST) uid = getUid(req) if argd['icon']: file_path = os.path.join(CFG_WEBSUBMIT_STORAGEDIR, argd['indir'], argd['doctype'], argd['access'], 'icons', str(uid), argd['key'], argd['filename'] ) else: file_path = os.path.join(CFG_WEBSUBMIT_STORAGEDIR, argd['indir'], argd['doctype'], argd['access'], 'files', str(uid), argd['key'], argd['filename'] ) abs_file_path = os.path.abspath(file_path) if abs_file_path.startswith(CFG_WEBSUBMIT_STORAGEDIR): # Check if file exist. Note that icon might not yet have # been created. if not argd['nowait']: for i in range(5): if os.path.exists(abs_file_path): return stream_file(req, abs_file_path) time.sleep(1) else: if os.path.exists(abs_file_path): return stream_file(req, abs_file_path) # Send error 404 in all other cases raise apache.SERVER_RETURN(apache.HTTP_NOT_FOUND) def attachfile(self, req, form): """ Process requests received from CKEditor to upload files. If the uploaded file is an image, create an icon version """ if not is_html_text_editor_installed(): return apache.HTTP_NOT_FOUND if not form.has_key('type'): form['type'] = 'File' if not form.has_key('upload') or \ not form['type'] in \ ['File', 'Image', 'Flash', 'Media']: #return apache.HTTP_NOT_FOUND pass filetype = form['type'].lower() uid = getUid(req) # URL where the file can be fetched after upload user_files_path = '%(CFG_SITE_URL)s/submit/getattachedfile/%(uid)s' % \ {'uid': uid, 'CFG_SITE_URL': CFG_SITE_URL, 'filetype': filetype} # Path to directory where uploaded files are saved user_files_absolute_path = '%(CFG_PREFIX)s/var/tmp/attachfile/%(uid)s/%(filetype)s' % \ {'uid': uid, 'CFG_PREFIX': CFG_PREFIX, 'filetype': filetype} try: os.makedirs(user_files_absolute_path) except: pass user_info = collect_user_info(req) (auth_code, auth_message) = acc_authorize_action(user_info, 'attachsubmissionfile') msg = "" if user_info['email'] == 'guest': # User is guest: must login prior to upload msg = 'Please login before uploading file.' elif auth_code: # User cannot submit msg = 'Sorry, you are not allowed to submit files.' ## elif len(form['upload']) != 1: ## msg = 'Sorry, you must upload one single file' else: # Process the upload and get the response (msg, uploaded_file_path, uploaded_file_name, uploaded_file_url, callback_function) = \ process_CKEditor_upload(form, uid, user_files_path, user_files_absolute_path) if uploaded_file_path: # Create an icon if form.get('type','') == 'Image': try: (icon_path, icon_name) = create_icon( { 'input-file' : uploaded_file_path, 'icon-name' : os.path.splitext(uploaded_file_name)[0], 'icon-file-format' : os.path.splitext(uploaded_file_name)[1][1:] or 'gif', 'multipage-icon' : False, 'multipage-icon-delay' : 100, 'icon-scale' : "300>", # Resize only if width > 300 'verbosity' : 0, }) # Move original file to /original dir, and replace it with icon file original_user_files_absolute_path = os.path.join(user_files_absolute_path, 'original') if not os.path.exists(original_user_files_absolute_path): # Create /original dir if needed os.mkdir(original_user_files_absolute_path) os.rename(uploaded_file_path, original_user_files_absolute_path + os.sep + uploaded_file_name) os.rename(icon_path + os.sep + icon_name, uploaded_file_path) except InvenioWebSubmitIconCreatorError, e: pass user_files_path += '/' + filetype + '/' + uploaded_file_name else: user_files_path = '' if not msg: msg = 'No valid file found' # Send our response send_response(req, msg, user_files_path, callback_function) def _lookup(self, component, path): """ This handler is invoked for the dynamic URLs (for getting and putting attachments) Eg: /submit/getattachedfile/41336978/image/myfigure.png /submit/attachfile/41336978/image/myfigure.png """ if component == 'getattachedfile' and len(path) > 2: uid = path[0] # uid of the submitter file_type = path[1] # file, image, flash or media (as # defined by CKEditor) if file_type in ['file', 'image', 'flash', 'media']: file_name = '/'.join(path[2:]) # the filename def answer_get(req, form): """Accessing files attached to submission.""" form['file'] = file_name form['type'] = file_type form['uid'] = uid return self.getattachedfile(req, form) return answer_get, [] # All other cases: file not found return None, [] def getattachedfile(self, req, form): """ Returns a file uploaded to the submission 'drop box' by the CKEditor. """ argd = wash_urlargd(form, {'file': (str, None), 'type': (str, None), 'uid': (int, 0)}) # Can user view this record, i.e. can user access its # attachments? uid = getUid(req) user_info = collect_user_info(req) if not argd['file'] is None: # Prepare path to file on disk. Normalize the path so that # ../ and other dangerous components are removed. path = os.path.abspath(CFG_PREFIX + '/var/tmp/attachfile/' + \ '/' + str(argd['uid']) + \ '/' + argd['type'] + '/' + argd['file']) # Check that we are really accessing attachements # directory, for the declared record. if path.startswith(CFG_PREFIX + '/var/tmp/attachfile/') and os.path.exists(path): return stream_file(req, path) # Send error 404 in all other cases return(apache.HTTP_NOT_FOUND) def continue_(self, req, form): """ Continue an interrupted submission. """ args = wash_urlargd(form, {'access': (str, ''), 'doctype': (str, '')}) ln = args['ln'] _ = gettext_set_language(ln) access = args['access'] doctype = args['doctype'] if not access or not doctype: return warningMsg(_("Sorry, invalid arguments"), req=req, ln=ln) user_info = collect_user_info(req) email = user_info['email'] res = run_sql("SELECT action, status FROM sbmSUBMISSIONS WHERE id=%s AND email=%s and doctype=%s", (access, email, doctype)) if res: action, status = res[0] if status == 'finished': return warningMsg(_("Note: the requested submission has already been completed"), req=req, ln=ln) redirect_to_url(req, CFG_SITE_SECURE_URL + '/submit/direct?' + urlencode({ 'sub': action + doctype, 'access': access})) return warningMsg(_("Sorry, you don't seem to have initiated a submission with the provided access number"), req=req, ln=ln) def direct(self, req, form): """Directly redirected to an initialized submission.""" args = wash_urlargd(form, {'sub': (str, ''), 'access' : (str, '')}) sub = args['sub'] access = args['access'] ln = args['ln'] _ = gettext_set_language(ln) uid = getUid(req) if uid == -1 or CFG_ACCESS_CONTROL_LEVEL_SITE >= 1: return page_not_authorized(req, "direct", navmenuid='submit') myQuery = req.args if not sub: return warningMsg(_("Sorry, 'sub' parameter missing..."), req, ln=ln) res = run_sql("SELECT docname,actname FROM sbmIMPLEMENT WHERE subname=%s", (sub,)) if not res: return warningMsg(_("Sorry. Cannot analyse parameter"), req, ln=ln) else: # get document type doctype = res[0][0] # get action name action = res[0][1] # retrieve other parameter values params = dict(form) # find existing access number if not access: # create 'unique' access number pid = os.getpid() now = time.time() - access = "%i_%s" % (now,pid) + access = "%i_%s" % (now, pid) # retrieve 'dir' value res = run_sql ("SELECT dir FROM sbmACTION WHERE sactname=%s", (action,)) dir = res[0][0] mainmenu = req.headers_in.get('referer') params['access'] = access params['act'] = action params['doctype'] = doctype params['startPg'] = '1' params['mainmenu'] = mainmenu params['ln'] = ln params['indir'] = dir url = "%s/submit?%s" % (CFG_SITE_SECURE_URL, urlencode(params)) redirect_to_url(req, url) def sub(self, req, form): """DEPRECATED: /submit/sub is deprecated now, so raise email to the admin (but allow submission to continue anyway)""" args = wash_urlargd(form, {'password': (str, '')}) uid = getUid(req) if uid == -1 or CFG_ACCESS_CONTROL_LEVEL_SITE >= 1: return page_not_authorized(req, "../sub/", navmenuid='submit') try: raise DeprecationWarning, 'submit/sub handler has been used. Please use submit/direct. e.g. "submit/sub?RN=123@SBIFOO" -> "submit/direct?RN=123&sub=SBIFOO"' except DeprecationWarning: register_exception(req=req, alert_admin=True) ln = args['ln'] _ = gettext_set_language(ln) #DEMOBOO_RN=DEMO-BOOK-2008-001&ln=en&password=1223993532.26572%40APPDEMOBOO params = dict(form) password = args['password'] if password: del params['password'] if "@" in password: params['access'], params['sub'] = password.split('@', 1) else: params['sub'] = password else: args = str(req.args).split('@') if len(args) > 1: params = {'sub' : args[-1]} args = '@'.join(args[:-1]) params.update(cgi.parse_qs(args)) else: return warningMsg(_("Sorry, invalid URL..."), req, ln=ln) url = "%s/submit/direct?%s" % (CFG_SITE_SECURE_URL, urlencode(params, doseq=True)) redirect_to_url(req, url) def summary(self, req, form): args = wash_urlargd(form, { 'doctype': (str, ''), 'act': (str, ''), 'access': (str, ''), 'indir': (str, '')}) uid = getUid(req) if uid == -1 or CFG_ACCESS_CONTROL_LEVEL_SITE >= 1: return page_not_authorized(req, "../summary", navmenuid='submit') - t="" + t = "" curdir = os.path.join(CFG_WEBSUBMIT_STORAGEDIR, args['indir'], args['doctype'], args['access']) try: assert(curdir == os.path.abspath(curdir)) except AssertionError: register_exception(req=req, alert_admin=True, prefix='Possible cracking tentative: indir="%s", doctype="%s", access="%s"' % (args['indir'], args['doctype'], args['access'])) return warningMsg("Invalid parameters", req) subname = "%s%s" % (args['act'], args['doctype']) res = run_sql("select sdesc,fidesc,pagenb,level from sbmFIELD where subname=%s " "order by pagenb,fieldnb", (subname,)) nbFields = 0 values = [] for arr in res: if arr[0] != "": val = { 'mandatory' : (arr[3] == 'M'), 'value' : '', 'page' : arr[2], 'name' : arr[0], } - if os.path.exists(os.path.join(curdir, curdir,arr[1])): + if os.path.exists(os.path.join(curdir, curdir, arr[1])): fd = open(os.path.join(curdir, arr[1]),"r") value = fd.read() fd.close() value = value.replace("\n"," ") value = value.replace("Select:","") else: value = "" val['value'] = value values.append(val) return websubmit_templates.tmpl_submit_summary( ln = args['ln'], values = values, ) def index(self, req, form): args = wash_urlargd(form, { 'c': (str, CFG_SITE_NAME), 'doctype': (str, ''), 'act': (str, ''), 'startPg': (str, "1"), 'access': (str, ''), 'mainmenu': (str, ''), 'fromdir': (str, ''), 'nextPg': (str, ''), 'nbPg': (str, ''), 'curpage': (str, '1'), 'step': (str, '0'), 'mode': (str, 'U'), }) ## Strip whitespace from beginning and end of doctype and action: args["doctype"] = args["doctype"].strip() args["act"] = args["act"].strip() def _index(req, c, ln, doctype, act, startPg, access, mainmenu, fromdir, nextPg, nbPg, curpage, step, mode): auth_args = {} if doctype: auth_args['doctype'] = doctype if act: auth_args['act'] = act uid = getUid(req) if CFG_CERN_SITE: ## HACK BEGIN: this is a hack for CMS and ATLAS draft - from invenio.webuser import collect_user_info user_info = collect_user_info(req) if doctype == 'CMSPUB' and act == "" and 'cds-admin [CERN]' not in user_info['group'] and not user_info['email'].lower() == 'cds.support@cern.ch': if isGuestUser(uid): return redirect_to_url(req, "%s/youraccount/login%s" % ( CFG_SITE_SECURE_URL, make_canonical_urlargd({'referer' : CFG_SITE_SECURE_URL + req.unparsed_uri, 'ln' : args['ln']}, {})) , norobot=True) if 'cms-publication-committee-chair [CERN]' not in user_info['group']: return page_not_authorized(req, "../submit", text="In order to access this submission interface you need to be member of the CMS Publication Committee Chair.", navmenuid='submit') elif doctype == 'ATLPUB' and 'cds-admin [CERN]' not in user_info['group'] and not user_info['email'].lower() == 'cds.support@cern.ch': if isGuestUser(uid): return redirect_to_url(req, "%s/youraccount/login%s" % ( CFG_SITE_SECURE_URL, make_canonical_urlargd({'referer' : CFG_SITE_SECURE_URL + req.unparsed_uri, 'ln' : args['ln']}, {})) , norobot=True) if 'atlas-gen [CERN]' not in user_info['group']: return page_not_authorized(req, "../submit", text="In order to access this submission interface you need to be member of ATLAS.", navmenuid='submit') ## HACK END if doctype == "": - catalogues_text, at_least_one_submission_authorized, submission_exists= makeCataloguesTable(req, ln=CFG_SITE_LANG) + catalogues_text, at_least_one_submission_authorized, submission_exists = makeCataloguesTable(req, ln=CFG_SITE_LANG) if not at_least_one_submission_authorized and submission_exists: if isGuestUser(uid): return redirect_to_url(req, "%s/youraccount/login%s" % ( CFG_SITE_SECURE_URL, make_canonical_urlargd({'referer' : CFG_SITE_SECURE_URL + req.unparsed_uri, 'ln' : args['ln']}, {})) , norobot=True) else: return page_not_authorized(req, "../submit", uid=uid, navmenuid='submit') - return home(req,catalogues_text, c,ln) + return home(req, catalogues_text, c, ln) elif act == "": - return action(req,c,ln,doctype) + return action(req, c, ln, doctype) elif int(step)==0: return interface(req, c, ln, doctype, act, startPg, access, mainmenu, fromdir, nextPg, nbPg, curpage) else: - return endaction(req, c, ln, doctype, act, startPg, access,mainmenu, fromdir, nextPg, nbPg, curpage, step, mode) + return endaction(req, c, ln, doctype, act, startPg, access, mainmenu, fromdir, nextPg, nbPg, curpage, step, mode) return _index(req, **args) # Answer to both /submit/ and /submit __call__ = index def errorMsg(title, req, c=None, ln=CFG_SITE_LANG): # load the right message language _ = gettext_set_language(ln) if c is None: c = CFG_SITE_NAME_INTL.get(ln, CFG_SITE_NAME) return page(title = _("Error"), body = create_error_box(req, title=str(title), verbose=0, ln=ln), description="%s - Internal Error" % c, keywords="%s, Internal Error" % c, uid = getUid(req), language=ln, req=req, navmenuid='submit') def warningMsg(title, req, c=None, ln=CFG_SITE_LANG): # load the right message language _ = gettext_set_language(ln) if c is None: c = CFG_SITE_NAME_INTL.get(ln, CFG_SITE_NAME) return page(title = _("Warning"), body = title, description="%s - Internal Error" % c, keywords="%s, Internal Error" % c, uid = getUid(req), language=ln, req=req, navmenuid='submit') def print_warning(msg, type='', prologue='
    ', epilogue='
    '): """Prints warning message and flushes output.""" if msg: return websubmit_templates.tmpl_print_warning( msg = msg, type = type, prologue = prologue, epilogue = epilogue, ) else: return '' ## def retrieve_most_recent_attached_file(file_path): ## """ ## Retrieve the latest file that has been uploaded with the ## CKEditor. This is the only way to retrieve files that the ## CKEditor has renamed after the upload. ## Eg: 'prefix/image.jpg' was uploaded but did already ## exist. CKEditor silently renamed it to 'prefix/image(1).jpg': ## >>> retrieve_most_recent_attached_file('prefix/image.jpg') ## 'prefix/image(1).jpg' ## """ ## (base_path, filename) = os.path.split(file_path) ## base_name = os.path.splitext(filename)[0] ## file_ext = os.path.splitext(filename)[1][1:] ## most_recent_filename = filename ## i = 0 ## while True: ## i += 1 ## possible_filename = "%s(%d).%s" % \ ## (base_name, i, file_ext) ## if os.path.exists(base_path + os.sep + possible_filename): ## most_recent_filename = possible_filename ## else: ## break ## return os.path.join(base_path, most_recent_filename)